Skip to content

Commit 4315960

Browse files
committed
[Integration Test Framework] fix createTempDir and flaky tests (#5409)
createTempDir register a test cleanup function to remove the folder it created, however, on Windows, this folder sometimes fails to be removed because there are still open file handlers for the files within the folder. We fix this problem calling install.RemovePath that will retry removing the folder for about 2s. This is a very similar approach to what Go's t.TempDir does. Fix the flakiness from TestUpgradeHandler* tests by re-working the mockUpgradeManager, now it accepts a function for its Upgrade method and their implementation is goroutine safe (cherry picked from commit 1242e71)
1 parent 00d5d84 commit 4315960

File tree

3 files changed

+126
-26
lines changed

3 files changed

+126
-26
lines changed

internal/pkg/agent/application/actions/handlers/handler_action_upgrade_test.go

+121-25
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ package handlers
66

77
import (
88
"context"
9+
"errors"
10+
"sync/atomic"
911
"testing"
1012
"time"
1113

@@ -26,7 +28,15 @@ import (
2628
)
2729

2830
type mockUpgradeManager struct {
29-
msgChan chan string
31+
UpgradeFn func(
32+
ctx context.Context,
33+
version string,
34+
sourceURI string,
35+
action *fleetapi.ActionUpgrade,
36+
details *details.Details,
37+
skipVerifyOverride bool,
38+
skipDefaultPgp bool,
39+
pgpBytes ...string) (reexec.ShutdownCallbackFn, error)
3040
}
3141

3242
func (u *mockUpgradeManager) Upgradeable() bool {
@@ -37,15 +47,25 @@ func (u *mockUpgradeManager) Reload(rawConfig *config.Config) error {
3747
return nil
3848
}
3949

40-
func (u *mockUpgradeManager) Upgrade(ctx context.Context, version string, sourceURI string, action *fleetapi.ActionUpgrade, details *details.Details, skipVerifyOverride bool, skipDefaultPgp bool, pgpBytes ...string) (_ reexec.ShutdownCallbackFn, err error) {
41-
select {
42-
case <-time.After(2 * time.Second):
43-
u.msgChan <- "completed " + version
44-
return nil, nil
45-
case <-ctx.Done():
46-
u.msgChan <- "canceled " + version
47-
return nil, ctx.Err()
48-
}
50+
func (u *mockUpgradeManager) Upgrade(
51+
ctx context.Context,
52+
version string,
53+
sourceURI string,
54+
action *fleetapi.ActionUpgrade,
55+
details *details.Details,
56+
skipVerifyOverride bool,
57+
skipDefaultPgp bool,
58+
pgpBytes ...string) (reexec.ShutdownCallbackFn, error) {
59+
60+
return u.UpgradeFn(
61+
ctx,
62+
version,
63+
sourceURI,
64+
action,
65+
details,
66+
skipVerifyOverride,
67+
skipDefaultPgp,
68+
pgpBytes...)
4969
}
5070

5171
func (u *mockUpgradeManager) Ack(ctx context.Context, acker acker.Acker) error {
@@ -65,7 +85,7 @@ func TestUpgradeHandler(t *testing.T) {
6585
log, _ := logger.New("", false)
6686

6787
agentInfo := &info.AgentInfo{}
68-
msgChan := make(chan string)
88+
upgradeCalledChan := make(chan struct{})
6989

7090
// Create and start the coordinator
7191
c := coordinator.New(
@@ -75,7 +95,21 @@ func TestUpgradeHandler(t *testing.T) {
7595
agentInfo,
7696
component.RuntimeSpecs{},
7797
nil,
78-
&mockUpgradeManager{msgChan: msgChan},
98+
&mockUpgradeManager{
99+
UpgradeFn: func(
100+
ctx context.Context,
101+
version string,
102+
sourceURI string,
103+
action *fleetapi.ActionUpgrade,
104+
details *details.Details,
105+
skipVerifyOverride bool,
106+
skipDefaultPgp bool,
107+
pgpBytes ...string) (reexec.ShutdownCallbackFn, error) {
108+
109+
upgradeCalledChan <- struct{}{}
110+
return nil, nil
111+
},
112+
},
79113
nil, nil, nil, nil, nil, false)
80114
//nolint:errcheck // We don't need the termination state of the Coordinator
81115
go c.Run(ctx)
@@ -86,8 +120,13 @@ func TestUpgradeHandler(t *testing.T) {
86120
ack := noopacker.New()
87121
err := u.Handle(ctx, &a, ack)
88122
require.NoError(t, err)
89-
msg := <-msgChan
90-
require.Equal(t, "completed 8.3.0", msg)
123+
124+
// Make sure this test does not dead lock or wait for too long
125+
select {
126+
case <-time.Tick(50 * time.Millisecond):
127+
t.Fatal("mockUpgradeManager.Upgrade was not called")
128+
case <-upgradeCalledChan:
129+
}
91130
}
92131

93132
func TestUpgradeHandlerSameVersion(t *testing.T) {
@@ -99,17 +138,37 @@ func TestUpgradeHandlerSameVersion(t *testing.T) {
99138
log, _ := logger.New("", false)
100139

101140
agentInfo := &info.AgentInfo{}
102-
msgChan := make(chan string)
141+
upgradeCalledChan := make(chan struct{})
103142

104143
// Create and start the Coordinator
144+
upgradeCalled := atomic.Bool{}
105145
c := coordinator.New(
106146
log,
107147
configuration.DefaultConfiguration(),
108148
logger.DefaultLogLevel,
109149
agentInfo,
110150
component.RuntimeSpecs{},
111151
nil,
112-
&mockUpgradeManager{msgChan: msgChan},
152+
&mockUpgradeManager{
153+
UpgradeFn: func(
154+
ctx context.Context,
155+
version string,
156+
sourceURI string,
157+
action *fleetapi.ActionUpgrade,
158+
details *details.Details,
159+
skipVerifyOverride bool,
160+
skipDefaultPgp bool,
161+
pgpBytes ...string) (reexec.ShutdownCallbackFn, error) {
162+
163+
if upgradeCalled.CompareAndSwap(false, true) {
164+
upgradeCalledChan <- struct{}{}
165+
return nil, nil
166+
}
167+
err := errors.New("mockUpgradeManager.Upgrade called more than once")
168+
t.Error(err.Error())
169+
return nil, err
170+
},
171+
},
113172
nil, nil, nil, nil, nil, false)
114173
//nolint:errcheck // We don't need the termination state of the Coordinator
115174
go c.Run(ctx)
@@ -122,8 +181,13 @@ func TestUpgradeHandlerSameVersion(t *testing.T) {
122181
err2 := u.Handle(ctx, &a, ack)
123182
require.NoError(t, err1)
124183
require.NoError(t, err2)
125-
msg := <-msgChan
126-
require.Equal(t, "completed 8.3.0", msg)
184+
185+
// Make sure this test does not dead lock or wait for too long
186+
select {
187+
case <-time.Tick(50 * time.Millisecond):
188+
t.Fatal("mockUpgradeManager.Upgrade was not called")
189+
case <-upgradeCalledChan:
190+
}
127191
}
128192

129193
func TestUpgradeHandlerNewVersion(t *testing.T) {
@@ -133,9 +197,9 @@ func TestUpgradeHandlerNewVersion(t *testing.T) {
133197
defer cancel()
134198

135199
log, _ := logger.New("", false)
200+
upgradeCalledChan := make(chan string)
136201

137202
agentInfo := &info.AgentInfo{}
138-
msgChan := make(chan string)
139203

140204
// Create and start the Coordinator
141205
c := coordinator.New(
@@ -145,7 +209,27 @@ func TestUpgradeHandlerNewVersion(t *testing.T) {
145209
agentInfo,
146210
component.RuntimeSpecs{},
147211
nil,
148-
&mockUpgradeManager{msgChan: msgChan},
212+
&mockUpgradeManager{
213+
UpgradeFn: func(
214+
ctx context.Context,
215+
version string,
216+
sourceURI string,
217+
action *fleetapi.ActionUpgrade,
218+
details *details.Details,
219+
skipVerifyOverride bool,
220+
skipDefaultPgp bool,
221+
pgpBytes ...string) (reexec.ShutdownCallbackFn, error) {
222+
223+
defer func() {
224+
upgradeCalledChan <- version
225+
}()
226+
if version == "8.2.0" {
227+
return nil, errors.New("upgrade to 8.2.0 will always fail")
228+
}
229+
230+
return nil, nil
231+
},
232+
},
149233
nil, nil, nil, nil, nil, false)
150234
//nolint:errcheck // We don't need the termination state of the Coordinator
151235
go c.Run(ctx)
@@ -156,13 +240,25 @@ func TestUpgradeHandlerNewVersion(t *testing.T) {
156240
a2 := fleetapi.ActionUpgrade{Data: fleetapi.ActionUpgradeData{
157241
Version: "8.5.0", SourceURI: "http://localhost"}}
158242
ack := noopacker.New()
243+
244+
checkMsg := func(c <-chan string, expected, errMsg string) {
245+
t.Helper()
246+
// Make sure this test does not dead lock or wait for too long
247+
// For some reason < 1s sometimes makes the test fail.
248+
select {
249+
case <-time.Tick(1300 * time.Millisecond):
250+
t.Fatal("timed out waiting for Upgrade to return")
251+
case msg := <-c:
252+
require.Equal(t, expected, msg, errMsg)
253+
}
254+
}
255+
256+
// Send both upgrade actions, a1 will error before a2 succeeds
159257
err1 := u.Handle(ctx, &a1, ack)
160258
require.NoError(t, err1)
161-
time.Sleep(1 * time.Second)
259+
checkMsg(upgradeCalledChan, "8.2.0", "first call must be with version 8.2.0")
260+
162261
err2 := u.Handle(ctx, &a2, ack)
163262
require.NoError(t, err2)
164-
msg1 := <-msgChan
165-
require.Equal(t, "canceled 8.2.0", msg1)
166-
msg2 := <-msgChan
167-
require.Equal(t, "completed 8.5.0", msg2)
263+
checkMsg(upgradeCalledChan, "8.5.0", "second call to Upgrade must be with version 8.5.0")
168264
}

pkg/testing/fixture.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626

2727
"github.com/elastic/elastic-agent/internal/pkg/agent/application/paths"
2828
"github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/details"
29+
"github.com/elastic/elastic-agent/internal/pkg/agent/install"
2930
"github.com/elastic/elastic-agent/pkg/component"
3031
"github.com/elastic/elastic-agent/pkg/control"
3132
"github.com/elastic/elastic-agent/pkg/control/v2/client"
@@ -1209,7 +1210,7 @@ func createTempDir(t *testing.T) string {
12091210

12101211
cleanup := func() {
12111212
if !t.Failed() {
1212-
if err := os.RemoveAll(tempDir); err != nil {
1213+
if err := install.RemovePath(tempDir); err != nil {
12131214
t.Errorf("could not remove temp dir '%s': %s", tempDir, err)
12141215
}
12151216
} else {

testing/integration/event_logging_test.go

+3
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,10 @@ func TestEventLogFile(t *testing.T) {
7575
Local: true,
7676
Sudo: false,
7777
})
78+
<<<<<<< HEAD
7879

80+
=======
81+
>>>>>>> 1242e7186a ([Integration Test Framework] fix createTempDir and flaky tests (#5409))
7982
ctx, cancel := testcontext.WithDeadline(
8083
t,
8184
context.Background(),

0 commit comments

Comments
 (0)