Skip to content

Commit 763a7ad

Browse files
Fix flaky cert tests (#3572)
1 parent 9952c11 commit 763a7ad

File tree

1 file changed

+83
-42
lines changed

1 file changed

+83
-42
lines changed

internal/pkg/api/server_test.go

+83-42
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,8 @@ import (
2424
"github.com/stretchr/testify/require"
2525

2626
fbuild "github.com/elastic/fleet-server/v7/internal/pkg/build"
27-
"github.com/elastic/fleet-server/v7/internal/pkg/cache"
28-
"github.com/elastic/fleet-server/v7/internal/pkg/checkin"
2927
"github.com/elastic/fleet-server/v7/internal/pkg/config"
3028
"github.com/elastic/fleet-server/v7/internal/pkg/monitor/mock"
31-
"github.com/elastic/fleet-server/v7/internal/pkg/policy"
3229
ftesting "github.com/elastic/fleet-server/v7/internal/pkg/testing"
3330
"github.com/elastic/fleet-server/v7/internal/pkg/testing/certs"
3431
testlog "github.com/elastic/fleet-server/v7/internal/pkg/testing/log"
@@ -46,40 +43,38 @@ func Test_server_Run(t *testing.T) {
4643
cfg.Port = port
4744
addr := cfg.BindEndpoints()[0]
4845

49-
verCon := mustBuildConstraints("8.0.0")
50-
c, err := cache.New(config.Cache{NumCounters: 100, MaxCost: 100000})
51-
require.NoError(t, err)
52-
bulker := ftesting.NewMockBulk()
53-
pim := mock.NewMockMonitor()
54-
pm := policy.NewMonitor(bulker, pim, config.ServerLimits{PolicyLimit: config.Limit{Interval: 5 * time.Millisecond, Burst: 1}})
55-
bc := checkin.NewBulk(nil)
56-
ct := NewCheckinT(verCon, cfg, c, bc, pm, nil, nil, nil, nil)
57-
et, err := NewEnrollerT(verCon, cfg, nil, c)
58-
require.NoError(t, err)
59-
60-
srv := NewServer(addr, cfg, ct, et, nil, nil, nil, nil, fbuild.Info{}, nil, nil, nil, nil, nil)
61-
errCh := make(chan error)
46+
srv := NewServer(addr, cfg, nil, nil, nil, nil, nil, nil, fbuild.Info{}, nil, nil, nil, nil, nil)
6247

48+
started := make(chan struct{}, 1)
49+
errCh := make(chan error, 1)
6350
var wg sync.WaitGroup
6451
wg.Add(1)
6552
go func() {
66-
if err := srv.Run(ctx); err != nil {
53+
started <- struct{}{}
54+
if err := srv.Run(ctx); err != nil && !errors.Is(err, context.Canceled) {
6755
errCh <- err
6856
}
6957
wg.Done()
7058
}()
71-
var errFromChan error
72-
select {
59+
60+
select { // if the goroutine has started within 500ms something is wrong, test has timed out
61+
case <-started:
62+
case <-time.After(500 * time.Millisecond):
63+
require.Fail(t, "timed out waiting for server to start")
64+
}
65+
select { // check if there is an error in the 1st 500ms of the server running
7366
case err := <-errCh:
74-
errFromChan = err
67+
require.NoError(t, err, "error during startup")
7568
case <-time.After(500 * time.Millisecond):
7669
break
7770
}
71+
7872
cancel()
7973
wg.Wait()
80-
require.NoError(t, errFromChan)
81-
if !errors.Is(err, http.ErrServerClosed) {
74+
select {
75+
case err := <-errCh:
8276
require.NoError(t, err)
77+
default:
8378
}
8479
}
8580

@@ -130,7 +125,6 @@ func Test_server_ClientCert(t *testing.T) {
130125

131126
st := NewStatusT(cfg, nil, nil)
132127
srv := NewServer(addr, cfg, nil, nil, nil, nil, st, sm, fbuild.Info{}, nil, nil, nil, nil, nil)
133-
errCh := make(chan error)
134128

135129
// make http client with no client certs
136130
certPool := x509.NewCertPool()
@@ -144,17 +138,29 @@ func Test_server_ClientCert(t *testing.T) {
144138
}
145139

146140
started := make(chan struct{}, 1)
141+
errCh := make(chan error, 1)
147142
var wg sync.WaitGroup
148143
wg.Add(1)
149144
go func() {
150145
started <- struct{}{}
151-
if err := srv.Run(ctx); err != nil {
146+
if err := srv.Run(ctx); err != nil && !errors.Is(err, context.Canceled) {
152147
errCh <- err
153148
}
154149
wg.Done()
155150
}()
156151

157-
<-started
152+
select { // make sure goroutine starts within 500ms
153+
case <-started:
154+
case <-time.After(500 * time.Millisecond):
155+
require.Fail(t, "timed out waiting for server to start")
156+
}
157+
select { // make sure there are no errors within 500ms of api server running
158+
case err := <-errCh:
159+
require.NoError(t, err, "error during startup")
160+
case <-time.After(500 * time.Millisecond):
161+
break
162+
}
163+
158164
rCtx, rCancel := context.WithTimeout(ctx, time.Second)
159165
defer rCancel()
160166
req, err := http.NewRequestWithContext(rCtx, "GET", "https://"+addr+"/api/status", nil)
@@ -164,13 +170,13 @@ func Test_server_ClientCert(t *testing.T) {
164170
resp.Body.Close()
165171
require.Equal(t, http.StatusOK, resp.StatusCode)
166172

173+
cancel()
174+
wg.Wait()
167175
select {
168176
case err := <-errCh:
169177
require.NoError(t, err)
170178
default:
171179
}
172-
cancel()
173-
wg.Wait()
174180
})
175181

176182
t.Run("valid client certs", func(t *testing.T) {
@@ -189,7 +195,6 @@ func Test_server_ClientCert(t *testing.T) {
189195

190196
st := NewStatusT(cfg, nil, nil)
191197
srv := NewServer(addr, cfg, nil, nil, nil, nil, st, sm, fbuild.Info{}, nil, nil, nil, nil, nil)
192-
errCh := make(chan error)
193198

194199
// make http client with valid client certs
195200
clientCert := certs.GenCert(t, ca)
@@ -205,17 +210,29 @@ func Test_server_ClientCert(t *testing.T) {
205210
}
206211

207212
started := make(chan struct{}, 1)
213+
errCh := make(chan error, 1)
208214
var wg sync.WaitGroup
209215
wg.Add(1)
210216
go func() {
211217
started <- struct{}{}
212-
if err := srv.Run(ctx); err != nil {
218+
if err := srv.Run(ctx); err != nil && !errors.Is(err, context.Canceled) {
213219
errCh <- err
214220
}
215221
wg.Done()
216222
}()
217223

218-
<-started
224+
select {
225+
case <-started:
226+
case <-time.After(500 * time.Millisecond):
227+
require.Fail(t, "timed out waiting for server to start")
228+
}
229+
select {
230+
case err := <-errCh:
231+
require.NoError(t, err, "error during startup")
232+
case <-time.After(500 * time.Millisecond):
233+
break
234+
}
235+
219236
rCtx, rCancel := context.WithTimeout(ctx, time.Second)
220237
defer rCancel()
221238
req, err := http.NewRequestWithContext(rCtx, "GET", "https://"+addr+"/api/status", nil)
@@ -225,13 +242,13 @@ func Test_server_ClientCert(t *testing.T) {
225242
resp.Body.Close()
226243
require.Equal(t, http.StatusOK, resp.StatusCode)
227244

245+
cancel()
246+
wg.Wait()
228247
select {
229248
case err := <-errCh:
230249
require.NoError(t, err)
231250
default:
232251
}
233-
cancel()
234-
wg.Wait()
235252
})
236253

237254
t.Run("invalid client certs", func(t *testing.T) {
@@ -250,7 +267,6 @@ func Test_server_ClientCert(t *testing.T) {
250267

251268
st := NewStatusT(cfg, nil, nil)
252269
srv := NewServer(addr, cfg, nil, nil, nil, nil, st, sm, fbuild.Info{}, nil, nil, nil, nil, nil)
253-
errCh := make(chan error)
254270

255271
// make http client with invalid client certs
256272
clientCA := certs.GenCA(t)
@@ -267,35 +283,46 @@ func Test_server_ClientCert(t *testing.T) {
267283
}
268284

269285
started := make(chan struct{}, 1)
286+
errCh := make(chan error, 1)
270287
var wg sync.WaitGroup
271288
wg.Add(1)
272289
go func() {
273290
started <- struct{}{}
274-
if err := srv.Run(ctx); err != nil {
291+
if err := srv.Run(ctx); err != nil && !errors.Is(err, context.Canceled) {
275292
errCh <- err
276293
}
277294
wg.Done()
278295
}()
279296

280-
<-started
297+
select {
298+
case <-started:
299+
case <-time.After(500 * time.Millisecond):
300+
require.Fail(t, "timed out waiting for server to start")
301+
}
302+
select {
303+
case err := <-errCh:
304+
require.NoError(t, err, "error during startup")
305+
case <-time.After(500 * time.Millisecond):
306+
break
307+
}
308+
281309
rCtx, rCancel := context.WithTimeout(ctx, time.Second)
282310
defer rCancel()
283311
req, err := http.NewRequestWithContext(rCtx, "GET", "https://"+addr+"/api/status", nil)
284312
require.NoError(t, err)
285313
_, err = httpClient.Do(req)
286314
require.Error(t, err)
287315

316+
cancel()
317+
wg.Wait()
288318
select {
289319
case err := <-errCh:
290320
require.NoError(t, err)
291321
default:
292322
}
293-
cancel()
294-
wg.Wait()
295323
})
296324

297325
t.Run("valid client certs no certs requested", func(t *testing.T) {
298-
t.Skip("test is flakey see fleet-server/issue/3266")
299326
ctx, cancel := context.WithCancel(context.Background())
300327
defer cancel()
301328
ctx = testlog.SetLogger(t).WithContext(ctx)
@@ -323,7 +350,6 @@ key: %s`,
323350

324351
st := NewStatusT(cfg, nil, nil)
325352
srv := NewServer(addr, cfg, nil, nil, nil, nil, st, sm, fbuild.Info{}, nil, nil, nil, nil, nil)
326-
errCh := make(chan error)
327353

328354
// make http client with valid client certs
329355
clientCert := certs.GenCert(t, ca)
@@ -338,15 +364,30 @@ key: %s`,
338364
},
339365
}
340366

367+
started := make(chan struct{}, 1)
368+
errCh := make(chan error, 1)
341369
var wg sync.WaitGroup
342370
wg.Add(1)
343371
go func() {
344-
if err := srv.Run(ctx); err != nil {
372+
started <- struct{}{}
373+
if err := srv.Run(ctx); err != nil && !errors.Is(err, context.Canceled) {
345374
errCh <- err
346375
}
347376
wg.Done()
348377
}()
349378

379+
select {
380+
case <-started:
381+
case <-time.After(500 * time.Millisecond):
382+
require.Fail(t, "timed out waiting for server to start")
383+
}
384+
select {
385+
case err := <-errCh:
386+
require.NoError(t, err, "error during startup")
387+
case <-time.After(500 * time.Millisecond):
388+
break
389+
}
390+
350391
rCtx, rCancel := context.WithTimeout(ctx, time.Second)
351392
defer rCancel()
352393
req, err := http.NewRequestWithContext(rCtx, "GET", "https://"+addr+"/api/status", nil)
@@ -356,12 +397,12 @@ key: %s`,
356397
resp.Body.Close()
357398
require.Equal(t, http.StatusOK, resp.StatusCode)
358399

400+
cancel()
401+
wg.Wait()
359402
select {
360403
case err := <-errCh:
361404
require.NoError(t, err)
362405
default:
363406
}
364-
cancel()
365-
wg.Wait()
366407
})
367408
}

0 commit comments

Comments
 (0)