Skip to content

Commit 1242e71

Browse files
authoredSep 5, 2024
[Integration Test Framework] fix createTempDir and flaky tests (#5409)
createTempDir register a test cleanup function to remove the folder it created, however, on Windows, this folder sometimes fails to be removed because there are still open file handlers for the files within the folder. We fix this problem calling install.RemovePath that will retry removing the folder for about 2s. This is a very similar approach to what Go's t.TempDir does. Fix the flakiness from TestUpgradeHandler* tests by re-working the mockUpgradeManager, now it accepts a function for its Upgrade method and their implementation is goroutine safe
1 parent 65e7913 commit 1242e71

File tree

3 files changed

+124
-36
lines changed

3 files changed

+124
-36
lines changed
 

‎internal/pkg/agent/application/actions/handlers/handler_action_upgrade_test.go

+122-34
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@ package handlers
66

77
import (
88
"context"
9+
"errors"
10+
"sync/atomic"
911
"testing"
12+
"time"
1013

1114
"github.com/stretchr/testify/require"
1215

@@ -25,8 +28,15 @@ import (
2528
)
2629

2730
type mockUpgradeManager struct {
28-
msgChan chan string
29-
completedChan chan struct{}
31+
UpgradeFn func(
32+
ctx context.Context,
33+
version string,
34+
sourceURI string,
35+
action *fleetapi.ActionUpgrade,
36+
details *details.Details,
37+
skipVerifyOverride bool,
38+
skipDefaultPgp bool,
39+
pgpBytes ...string) (reexec.ShutdownCallbackFn, error)
3040
}
3141

3242
func (u *mockUpgradeManager) Upgradeable() bool {
@@ -37,15 +47,25 @@ func (u *mockUpgradeManager) Reload(rawConfig *config.Config) error {
3747
return nil
3848
}
3949

40-
func (u *mockUpgradeManager) Upgrade(ctx context.Context, version string, sourceURI string, action *fleetapi.ActionUpgrade, details *details.Details, skipVerifyOverride bool, skipDefaultPgp bool, pgpBytes ...string) (_ reexec.ShutdownCallbackFn, err error) {
41-
select {
42-
case <-u.completedChan:
43-
u.msgChan <- "completed " + version
44-
return nil, nil
45-
case <-ctx.Done():
46-
u.msgChan <- "canceled " + version
47-
return nil, ctx.Err()
48-
}
50+
func (u *mockUpgradeManager) Upgrade(
51+
ctx context.Context,
52+
version string,
53+
sourceURI string,
54+
action *fleetapi.ActionUpgrade,
55+
details *details.Details,
56+
skipVerifyOverride bool,
57+
skipDefaultPgp bool,
58+
pgpBytes ...string) (reexec.ShutdownCallbackFn, error) {
59+
60+
return u.UpgradeFn(
61+
ctx,
62+
version,
63+
sourceURI,
64+
action,
65+
details,
66+
skipVerifyOverride,
67+
skipDefaultPgp,
68+
pgpBytes...)
4969
}
5070

5171
func (u *mockUpgradeManager) Ack(ctx context.Context, acker acker.Acker) error {
@@ -65,8 +85,7 @@ func TestUpgradeHandler(t *testing.T) {
6585
log, _ := logger.New("", false)
6686

6787
agentInfo := &info.AgentInfo{}
68-
msgChan := make(chan string)
69-
completedChan := make(chan struct{})
88+
upgradeCalledChan := make(chan struct{})
7089

7190
// Create and start the coordinator
7291
c := coordinator.New(
@@ -76,7 +95,21 @@ func TestUpgradeHandler(t *testing.T) {
7695
agentInfo,
7796
component.RuntimeSpecs{},
7897
nil,
79-
&mockUpgradeManager{msgChan: msgChan, completedChan: completedChan},
98+
&mockUpgradeManager{
99+
UpgradeFn: func(
100+
ctx context.Context,
101+
version string,
102+
sourceURI string,
103+
action *fleetapi.ActionUpgrade,
104+
details *details.Details,
105+
skipVerifyOverride bool,
106+
skipDefaultPgp bool,
107+
pgpBytes ...string) (reexec.ShutdownCallbackFn, error) {
108+
109+
upgradeCalledChan <- struct{}{}
110+
return nil, nil
111+
},
112+
},
80113
nil, nil, nil, nil, nil, false)
81114
//nolint:errcheck // We don't need the termination state of the Coordinator
82115
go c.Run(ctx)
@@ -86,11 +119,14 @@ func TestUpgradeHandler(t *testing.T) {
86119
Version: "8.3.0", SourceURI: "http://localhost"}}
87120
ack := noopacker.New()
88121
err := u.Handle(ctx, &a, ack)
89-
// indicate that upgrade is completed
90-
close(completedChan)
91122
require.NoError(t, err)
92-
msg := <-msgChan
93-
require.Equal(t, "completed 8.3.0", msg)
123+
124+
// Make sure this test does not dead lock or wait for too long
125+
select {
126+
case <-time.Tick(50 * time.Millisecond):
127+
t.Fatal("mockUpgradeManager.Upgrade was not called")
128+
case <-upgradeCalledChan:
129+
}
94130
}
95131

96132
func TestUpgradeHandlerSameVersion(t *testing.T) {
@@ -102,18 +138,37 @@ func TestUpgradeHandlerSameVersion(t *testing.T) {
102138
log, _ := logger.New("", false)
103139

104140
agentInfo := &info.AgentInfo{}
105-
msgChan := make(chan string)
106-
completedChan := make(chan struct{})
141+
upgradeCalledChan := make(chan struct{})
107142

108143
// Create and start the Coordinator
144+
upgradeCalled := atomic.Bool{}
109145
c := coordinator.New(
110146
log,
111147
configuration.DefaultConfiguration(),
112148
logger.DefaultLogLevel,
113149
agentInfo,
114150
component.RuntimeSpecs{},
115151
nil,
116-
&mockUpgradeManager{msgChan: msgChan, completedChan: completedChan},
152+
&mockUpgradeManager{
153+
UpgradeFn: func(
154+
ctx context.Context,
155+
version string,
156+
sourceURI string,
157+
action *fleetapi.ActionUpgrade,
158+
details *details.Details,
159+
skipVerifyOverride bool,
160+
skipDefaultPgp bool,
161+
pgpBytes ...string) (reexec.ShutdownCallbackFn, error) {
162+
163+
if upgradeCalled.CompareAndSwap(false, true) {
164+
upgradeCalledChan <- struct{}{}
165+
return nil, nil
166+
}
167+
err := errors.New("mockUpgradeManager.Upgrade called more than once")
168+
t.Error(err.Error())
169+
return nil, err
170+
},
171+
},
117172
nil, nil, nil, nil, nil, false)
118173
//nolint:errcheck // We don't need the termination state of the Coordinator
119174
go c.Run(ctx)
@@ -126,10 +181,13 @@ func TestUpgradeHandlerSameVersion(t *testing.T) {
126181
err2 := u.Handle(ctx, &a, ack)
127182
require.NoError(t, err1)
128183
require.NoError(t, err2)
129-
// indicate that upgrade is completed
130-
close(completedChan)
131-
msg := <-msgChan
132-
require.Equal(t, "completed 8.3.0", msg)
184+
185+
// Make sure this test does not dead lock or wait for too long
186+
select {
187+
case <-time.Tick(50 * time.Millisecond):
188+
t.Fatal("mockUpgradeManager.Upgrade was not called")
189+
case <-upgradeCalledChan:
190+
}
133191
}
134192

135193
func TestUpgradeHandlerNewVersion(t *testing.T) {
@@ -139,10 +197,9 @@ func TestUpgradeHandlerNewVersion(t *testing.T) {
139197
defer cancel()
140198

141199
log, _ := logger.New("", false)
200+
upgradeCalledChan := make(chan string)
142201

143202
agentInfo := &info.AgentInfo{}
144-
msgChan := make(chan string)
145-
completedChan := make(chan struct{})
146203

147204
// Create and start the Coordinator
148205
c := coordinator.New(
@@ -152,7 +209,27 @@ func TestUpgradeHandlerNewVersion(t *testing.T) {
152209
agentInfo,
153210
component.RuntimeSpecs{},
154211
nil,
155-
&mockUpgradeManager{msgChan: msgChan, completedChan: completedChan},
212+
&mockUpgradeManager{
213+
UpgradeFn: func(
214+
ctx context.Context,
215+
version string,
216+
sourceURI string,
217+
action *fleetapi.ActionUpgrade,
218+
details *details.Details,
219+
skipVerifyOverride bool,
220+
skipDefaultPgp bool,
221+
pgpBytes ...string) (reexec.ShutdownCallbackFn, error) {
222+
223+
defer func() {
224+
upgradeCalledChan <- version
225+
}()
226+
if version == "8.2.0" {
227+
return nil, errors.New("upgrade to 8.2.0 will always fail")
228+
}
229+
230+
return nil, nil
231+
},
232+
},
156233
nil, nil, nil, nil, nil, false)
157234
//nolint:errcheck // We don't need the termination state of the Coordinator
158235
go c.Run(ctx)
@@ -163,14 +240,25 @@ func TestUpgradeHandlerNewVersion(t *testing.T) {
163240
a2 := fleetapi.ActionUpgrade{Data: fleetapi.ActionUpgradeData{
164241
Version: "8.5.0", SourceURI: "http://localhost"}}
165242
ack := noopacker.New()
243+
244+
checkMsg := func(c <-chan string, expected, errMsg string) {
245+
t.Helper()
246+
// Make sure this test does not dead lock or wait for too long
247+
// For some reason < 1s sometimes makes the test fail.
248+
select {
249+
case <-time.Tick(1300 * time.Millisecond):
250+
t.Fatal("timed out waiting for Upgrade to return")
251+
case msg := <-c:
252+
require.Equal(t, expected, msg, errMsg)
253+
}
254+
}
255+
256+
// Send both upgrade actions, a1 will error before a2 succeeds
166257
err1 := u.Handle(ctx, &a1, ack)
167258
require.NoError(t, err1)
259+
checkMsg(upgradeCalledChan, "8.2.0", "first call must be with version 8.2.0")
260+
168261
err2 := u.Handle(ctx, &a2, ack)
169262
require.NoError(t, err2)
170-
msg1 := <-msgChan
171-
require.Equal(t, "canceled 8.2.0", msg1)
172-
// indicate that upgrade is completed
173-
close(completedChan)
174-
msg2 := <-msgChan
175-
require.Equal(t, "completed 8.5.0", msg2)
263+
checkMsg(upgradeCalledChan, "8.5.0", "second call to Upgrade must be with version 8.5.0")
176264
}

‎pkg/testing/fixture.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626

2727
"github.com/elastic/elastic-agent/internal/pkg/agent/application/paths"
2828
"github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/details"
29+
"github.com/elastic/elastic-agent/internal/pkg/agent/install"
2930
"github.com/elastic/elastic-agent/pkg/component"
3031
"github.com/elastic/elastic-agent/pkg/control"
3132
"github.com/elastic/elastic-agent/pkg/control/v2/client"
@@ -1209,7 +1210,7 @@ func createTempDir(t *testing.T) string {
12091210

12101211
cleanup := func() {
12111212
if !t.Failed() {
1212-
if err := os.RemoveAll(tempDir); err != nil {
1213+
if err := install.RemovePath(tempDir); err != nil {
12131214
t.Errorf("could not remove temp dir '%s': %s", tempDir, err)
12141215
}
12151216
} else {

‎testing/integration/event_logging_test.go

-1
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@ func TestEventLogFile(t *testing.T) {
7575
Local: true,
7676
Sudo: false,
7777
})
78-
t.Skip("Flaky test: https://github.com/elastic/elastic-agent/issues/5397")
7978
ctx, cancel := testcontext.WithDeadline(
8079
t,
8180
context.Background(),

0 commit comments

Comments
 (0)