Skip to content

Commit 6695324

Browse files
belimawrpchilajlind23
authored
Fix TestLogIngestionFleetManaged, TestDebLogIngestFleetManaged (#5375)
* [integration tests] Keep work directory if test fails This commit enables the work directory used by the integration tests framework to be kept in the filesystem if the test fails. The full path of the test directory is printed when the test fails. * Update pkg/testing/fixture.go Co-authored-by: Paolo Chilà <paolo.chila@elastic.co> * Update pkg/testing/fixture.go Co-authored-by: Paolo Chilà <paolo.chila@elastic.co> * Remove log in error branch * [Integration Tests] Generate namespace based on UUIDv4 The namespace generated by the integration tests framework was not unique among different tests and test runs, so sometimes collisions would occurs causing some tests to be flaky. * Add debug logs * run mage fmt * Fix TestDebLogIngestFleetManaged - Remove debug logs - Make the deb respect the AGENT_KEEP_INSTALLED env var - Add errors that only happen on deb to allow list * Improve logs and test error message --------- Co-authored-by: Paolo Chilà <paolo.chila@elastic.co> Co-authored-by: Julien Lind <julien.lind@elastic.co>
1 parent 9e2031f commit 6695324

File tree

7 files changed

+49
-20
lines changed

7 files changed

+49
-20
lines changed

internal/pkg/agent/application/upgrade/upgrade.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -353,14 +353,14 @@ func waitForWatcherWithTimeoutCreationFunc(ctx context.Context, log *logger.Logg
353353
return fmt.Errorf("error starting update marker watcher: %w", err)
354354
}
355355

356-
log.Info("waiting up to %s for upgrade watcher to set %s state in upgrade marker", waitTime, details.StateWatching)
356+
log.Infof("waiting up to %s for upgrade watcher to set %s state in upgrade marker", waitTime, details.StateWatching)
357357

358358
for {
359359
select {
360360
case updMarker := <-markerWatcher.Watch():
361361
if updMarker.Details != nil && updMarker.Details.State == details.StateWatching {
362362
// watcher started and it is watching, all good
363-
log.Info("upgrade watcher set %s state in upgrade marker: exiting wait loop", details.StateWatching)
363+
log.Infof("upgrade watcher set %s state in upgrade marker: exiting wait loop", details.StateWatching)
364364
return nil
365365
}
366366

pkg/testing/define/define.go

+7-12
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ import (
1717
"sync"
1818
"testing"
1919

20+
"github.com/gofrs/uuid/v5"
21+
2022
"github.com/elastic/elastic-agent-libs/kibana"
2123
"github.com/elastic/go-elasticsearch/v8"
2224
"github.com/elastic/go-sysinfo"
@@ -216,28 +218,21 @@ func getOSInfo() (*types.OSInfo, error) {
216218
// getNamespace is a general namespace that the test can use that will ensure that it
217219
// is unique and won't collide with other tests (even the same test from a different batch).
218220
//
219-
// this function uses a sha256 of the prefix, package and test name, to ensure that the
221+
// This function uses a sha256 of an UUIDv4 to ensure that the
220222
// length of the namespace is not over the 100 byte limit from Fleet
221223
// see: https://www.elastic.co/guide/en/fleet/current/data-streams.html#data-streams-naming-scheme
222224
func getNamespace(t *testing.T, local bool) (string, error) {
223-
prefix := os.Getenv("TEST_DEFINE_PREFIX")
224-
if prefix == "" {
225-
if local {
226-
prefix = "local"
227-
}
228-
if prefix == "" {
229-
return "", errors.New("TEST_DEFINE_PREFIX must be defined by the test runner")
230-
}
225+
nsUUID, err := uuid.NewV4()
226+
if err != nil {
227+
return "", fmt.Errorf("cannot generate UUID V4: %w", err)
231228
}
232-
name := fmt.Sprintf("%s-%s", prefix, t.Name())
233229
hasher := sha256.New()
234-
hasher.Write([]byte(name))
230+
hasher.Write([]byte(nsUUID.String()))
235231

236232
// Fleet API requires the namespace to be lowercased and not contain
237233
// special characters.
238234
namespace := strings.ToLower(base64.URLEncoding.EncodeToString(hasher.Sum(nil)))
239235
namespace = noSpecialCharsRegexp.ReplaceAllString(namespace, "")
240-
241236
return namespace, nil
242237
}
243238

pkg/testing/fixture.go

+26-1
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ func (f *Fixture) Prepare(ctx context.Context, components ...UsableComponent) er
206206
if err != nil {
207207
return err
208208
}
209-
workDir := f.t.TempDir()
209+
workDir := createTempDir(f.t)
210210
finalDir := filepath.Join(workDir, name)
211211
err = ExtractArtifact(f.t, src, workDir)
212212
if err != nil {
@@ -1196,6 +1196,31 @@ func performConfigure(ctx context.Context, c client.Client, cfg string, timeout
11961196
return nil
11971197
}
11981198

1199+
// createTempDir creates a temporary directory that will be
1200+
// removed after the tests passes. If the test fails, the
1201+
// directory is kept for further investigation.
1202+
//
1203+
// If the test is run with -v and fails the temporary directory is logged
1204+
func createTempDir(t *testing.T) string {
1205+
tempDir, err := os.MkdirTemp("", strings.ReplaceAll(t.Name(), "/", "-"))
1206+
if err != nil {
1207+
t.Fatalf("failed to make temp directory: %s", err)
1208+
}
1209+
1210+
cleanup := func() {
1211+
if !t.Failed() {
1212+
if err := os.RemoveAll(tempDir); err != nil {
1213+
t.Errorf("could not remove temp dir '%s': %s", tempDir, err)
1214+
}
1215+
} else {
1216+
t.Logf("Temporary directory %q preserved for investigation/debugging", tempDir)
1217+
}
1218+
}
1219+
t.Cleanup(cleanup)
1220+
1221+
return tempDir
1222+
}
1223+
11991224
type AgentStatusOutput struct {
12001225
Info struct {
12011226
ID string `json:"id"`

pkg/testing/fixture_install.go

+7
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,7 @@ func (f *Fixture) installDeb(ctx context.Context, installOpts *InstallOpts, opts
416416

417417
f.t.Cleanup(func() {
418418
f.t.Logf("[test %s] Inside fixture installDeb cleanup function", f.t.Name())
419+
419420
uninstallCtx, uninstallCancel := context.WithTimeout(context.Background(), 5*time.Minute)
420421
defer uninstallCancel()
421422
// stop elastic-agent, non fatal if error, might have been stopped before this.
@@ -424,6 +425,12 @@ func (f *Fixture) installDeb(ctx context.Context, installOpts *InstallOpts, opts
424425
if err != nil {
425426
f.t.Logf("error systemctl stop elastic-agent: %s, output: %s", err, string(out))
426427
}
428+
429+
if keepInstalledFlag() {
430+
f.t.Logf("skipping uninstall; test failed and AGENT_KEEP_INSTALLED=true")
431+
return
432+
}
433+
427434
// apt-get purge elastic-agent
428435
f.t.Logf("running 'sudo apt-get -y -q purge elastic-agent'")
429436
out, err = exec.CommandContext(uninstallCtx, "sudo", "apt-get", "-y", "-q", "purge", "elastic-agent").CombinedOutput()

pkg/testing/tools/estools/elasticsearch.go

+1
Original file line numberDiff line numberDiff line change
@@ -593,6 +593,7 @@ func PerformQueryForRawQuery(ctx context.Context, queryRaw map[string]interface{
593593
es.Search.WithContext(ctx),
594594
es.Search.WithSize(300),
595595
)
596+
596597
if err != nil {
597598
return Documents{}, fmt.Errorf("error performing ES search: %w", err)
598599
}

testing/integration/logs_ingestion_test.go

+5-4
Original file line numberDiff line numberDiff line change
@@ -160,14 +160,15 @@ func testMonitoringLogsAreShipped(
160160
return estools.CheckForErrorsInLogs(ctx, info.ESClient, info.Namespace, []string{
161161
// acceptable error messages (include reason)
162162
"Error dialing dial tcp 127.0.0.1:9200: connect: connection refused", // beat is running default config before its config gets updated
163-
"Global configuration artifact is not available", // Endpoint: failed to load user artifact due to connectivity issues
163+
"Failed to apply initial policy from on disk configuration",
164+
"Failed to connect to backoff(elasticsearch(http://127.0.0.1:9200)): Get \"http://127.0.0.1:9200\": dial tcp 127.0.0.1:9200: connect: connection refused", // Deb test
164165
"Failed to download artifact",
165166
"Failed to initialize artifact",
166-
"Failed to apply initial policy from on disk configuration",
167-
"elastic-agent-client error: rpc error: code = Canceled desc = context canceled", // can happen on restart
167+
"Global configuration artifact is not available", // Endpoint: failed to load user artifact due to connectivity issues
168+
"add_cloud_metadata: received error failed fetching EC2 Identity Document", // okay for the cloud metadata to not work
168169
"add_cloud_metadata: received error failed requesting openstack metadata", // okay for the cloud metadata to not work
169170
"add_cloud_metadata: received error failed with http status code 404", // okay for the cloud metadata to not work
170-
"add_cloud_metadata: received error failed fetching EC2 Identity Document", // okay for the cloud metadata to not work
171+
"elastic-agent-client error: rpc error: code = Canceled desc = context canceled", // can happen on restart
171172
"failed to invoke rollback watcher: failed to start Upgrade Watcher", // on debian this happens probably need to fix.
172173
"falling back to IMDSv1: operation error ec2imds: getToken", // okay for the cloud metadata to not work
173174
})

testing/integration/upgrade_rollback_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ inputs:
135135
state, err := client.State(ctx)
136136
require.NoError(t, err)
137137

138-
require.NotNil(t, state.UpgradeDetails)
138+
require.NotNil(t, state.UpgradeDetails, "upgrade details in the state cannot be nil")
139139
require.Equal(t, details.StateRollback, details.State(state.UpgradeDetails.State))
140140
}
141141

0 commit comments

Comments
 (0)