Skip to content

Commit b19e950

Browse files
authored
Fix TestInstallAndCLIUninstallWithEndpointSecurity integration test flakiness (#3410)
* Return error if more than one Agent with same hostname is found in Fleet * Fix make call * Un-enroll Agent on test cleanup * Use policy ID and hostname to uniquely find Agent * Passing policy ID in more calls * Fix implicit memory aliasing * [Testing] Increasing timeout for status check to 10m * Get Agent by ID * Revert "Get Agent by ID" This reverts commit 1c7da6e.
1 parent 23fe2ef commit b19e950

File tree

6 files changed

+62
-45
lines changed

6 files changed

+62
-45
lines changed

pkg/testing/tools/agents.go

+27-16
Original file line numberDiff line numberDiff line change
@@ -19,57 +19,68 @@ import (
1919
"github.com/elastic/elastic-agent/pkg/control/v2/cproto"
2020
)
2121

22-
// GetAgentByHostnameFromList get an agent by the local_metadata.host.name property, reading from the agents list
23-
func GetAgentByHostnameFromList(client *kibana.Client, hostname string) (*kibana.AgentExisting, error) {
22+
// GetAgentByPolicyIDAndHostnameFromList get an agent by the local_metadata.host.name property, reading from the agents list
23+
func GetAgentByPolicyIDAndHostnameFromList(client *kibana.Client, policyID, hostname string) (*kibana.AgentExisting, error) {
2424
listAgentsResp, err := client.ListAgents(context.Background(), kibana.ListAgentsRequest{})
2525
if err != nil {
2626
return nil, err
2727
}
2828

29-
for _, item := range listAgentsResp.Items {
29+
hostnameAgents := make([]*kibana.AgentExisting, 0)
30+
for i, item := range listAgentsResp.Items {
3031
agentHostname := item.LocalMetadata.Host.Hostname
31-
if agentHostname == hostname {
32-
return &item, nil
32+
agentPolicyID := item.PolicyID
33+
34+
if agentHostname == hostname && agentPolicyID == policyID {
35+
hostnameAgents = append(hostnameAgents, &listAgentsResp.Items[i])
3336
}
3437
}
3538

36-
return nil, fmt.Errorf("unable to find agent with hostname [%s]", hostname)
39+
if len(hostnameAgents) == 0 {
40+
return nil, fmt.Errorf("unable to find agent with hostname [%s]", hostname)
41+
}
42+
43+
if len(hostnameAgents) > 1 {
44+
return nil, fmt.Errorf("found %d agents with hostname [%s]; expected to find only one", len(hostnameAgents), hostname)
45+
}
46+
47+
return hostnameAgents[0], nil
3748
}
3849

39-
func GetAgentStatus(client *kibana.Client) (string, error) {
50+
func GetAgentStatus(client *kibana.Client, policyID string) (string, error) {
4051
hostname, err := os.Hostname()
4152
if err != nil {
4253
return "", err
4354
}
4455

45-
agent, err := GetAgentByHostnameFromList(client, hostname)
56+
agent, err := GetAgentByPolicyIDAndHostnameFromList(client, policyID, hostname)
4657
if err != nil {
4758
return "", err
4859
}
4960

5061
return agent.Status, nil
5162
}
5263

53-
func GetAgentVersion(client *kibana.Client) (string, error) {
64+
func GetAgentVersion(client *kibana.Client, policyID string) (string, error) {
5465
hostname, err := os.Hostname()
5566
if err != nil {
5667
return "", err
5768
}
5869

59-
agent, err := GetAgentByHostnameFromList(client, hostname)
70+
agent, err := GetAgentByPolicyIDAndHostnameFromList(client, policyID, hostname)
6071
if err != nil {
6172
return "", err
6273
}
6374

6475
return agent.Agent.Version, err
6576
}
6677

67-
func UnEnrollAgent(client *kibana.Client) error {
78+
func UnEnrollAgent(client *kibana.Client, policyID string) error {
6879
hostname, err := os.Hostname()
6980
if err != nil {
7081
return err
7182
}
72-
agentID, err := GetAgentIDByHostname(client, hostname)
83+
agentID, err := GetAgentIDByHostname(client, policyID, hostname)
7384
if err != nil {
7485
return err
7586
}
@@ -86,20 +97,20 @@ func UnEnrollAgent(client *kibana.Client) error {
8697
return nil
8798
}
8899

89-
func GetAgentIDByHostname(client *kibana.Client, hostname string) (string, error) {
90-
agent, err := GetAgentByHostnameFromList(client, hostname)
100+
func GetAgentIDByHostname(client *kibana.Client, policyID, hostname string) (string, error) {
101+
agent, err := GetAgentByPolicyIDAndHostnameFromList(client, policyID, hostname)
91102
if err != nil {
92103
return "", err
93104
}
94105
return agent.Agent.ID, nil
95106
}
96107

97-
func UpgradeAgent(client *kibana.Client, version string) error {
108+
func UpgradeAgent(client *kibana.Client, policyID, version string) error {
98109
hostname, err := os.Hostname()
99110
if err != nil {
100111
return err
101112
}
102-
agentID, err := GetAgentIDByHostname(client, hostname)
113+
agentID, err := GetAgentIDByHostname(client, policyID, hostname)
103114
if err != nil {
104115
return err
105116
}

pkg/testing/tools/tools.go

+4-4
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ import (
1919
// WaitForAgentStatus returns a niladic function that returns true if the agent
2020
// has reached expectedStatus; false otherwise. The returned function is intended
2121
// for use with assert.Eventually or require.Eventually.
22-
func WaitForAgentStatus(t *testing.T, client *kibana.Client, expectedStatus string) func() bool {
22+
func WaitForAgentStatus(t *testing.T, client *kibana.Client, policyID string, expectedStatus string) func() bool {
2323
return func() bool {
24-
currentStatus, err := GetAgentStatus(client)
24+
currentStatus, err := GetAgentStatus(client, policyID)
2525
if err != nil {
2626
t.Errorf("unable to determine agent status: %s", err.Error())
2727
return false
@@ -128,14 +128,14 @@ func InstallAgentForPolicy(t *testing.T, ctx context.Context,
128128
}
129129
t.Logf(">>> Ran Enroll. Output: %s", output)
130130

131-
timeout := 5 * time.Minute
131+
timeout := 10 * time.Minute
132132
if deadline, ok := ctx.Deadline(); ok {
133133
timeout = time.Until(deadline)
134134
}
135135
// Wait for Agent to be healthy
136136
require.Eventually(
137137
t,
138-
WaitForAgentStatus(t, kibClient, "online"),
138+
WaitForAgentStatus(t, kibClient, policyID, "online"),
139139
timeout,
140140
10*time.Second,
141141
"Elastic Agent status is not online",

testing/integration/endpoint_security_test.go

+7-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"text/template"
2020
"time"
2121

22+
"github.com/stretchr/testify/assert"
2223
"github.com/stretchr/testify/require"
2324

2425
"github.com/google/uuid"
@@ -135,6 +136,11 @@ func testInstallAndCLIUninstallWithEndpointSecurity(t *testing.T, info *define.I
135136
installOpts, fixture, info.KibanaClient, createPolicyReq)
136137
require.NoError(t, err, "failed to install agent with policy")
137138

139+
t.Cleanup(func() {
140+
t.Log("Un-enrolling Elastic Agent...")
141+
assert.NoError(t, tools.UnEnrollAgent(info.KibanaClient, policy.ID))
142+
})
143+
138144
t.Log("Installing Elastic Defend")
139145
pkgPolicyResp, err := installElasticDefendPackage(t, info, policy.ID)
140146
require.NoErrorf(t, err, "Policy Response was: %v", pkgPolicyResp)
@@ -238,7 +244,7 @@ func testInstallAndUnenrollWithEndpointSecurity(t *testing.T, info *define.Info,
238244
hostname, err := os.Hostname()
239245
require.NoError(t, err)
240246

241-
agentID, err := tools.GetAgentIDByHostname(info.KibanaClient, hostname)
247+
agentID, err := tools.GetAgentIDByHostname(info.KibanaClient, policy.ID, hostname)
242248
require.NoError(t, err)
243249

244250
_, err = info.KibanaClient.UnEnrollAgent(ctx, kibana.UnEnrollAgentRequest{ID: agentID})

testing/integration/fqdn_test.go

+18-18
Original file line numberDiff line numberDiff line change
@@ -56,19 +56,6 @@ func TestFQDN(t *testing.T) {
5656
origHostname, err := getHostname(context.Background())
5757
require.NoError(t, err)
5858

59-
t.Cleanup(func() {
60-
t.Log("Un-enrolling Elastic Agent...")
61-
assert.NoError(t, tools.UnEnrollAgent(info.KibanaClient))
62-
63-
t.Log("Restoring hostname...")
64-
err := setHostname(context.Background(), origHostname, t.Log)
65-
require.NoError(t, err)
66-
67-
t.Log("Restoring original /etc/hosts...")
68-
err = setEtcHosts(origEtcHosts)
69-
require.NoError(t, err)
70-
})
71-
7259
ctx := context.Background()
7360
kibClient := info.KibanaClient
7461

@@ -101,8 +88,21 @@ func TestFQDN(t *testing.T) {
10188
policy, err := tools.InstallAgentWithPolicy(t, ctx, installOpts, agentFixture, kibClient, createPolicyReq)
10289
require.NoError(t, err)
10390

91+
t.Cleanup(func() {
92+
t.Log("Un-enrolling Elastic Agent...")
93+
assert.NoError(t, tools.UnEnrollAgent(info.KibanaClient, policy.ID))
94+
95+
t.Log("Restoring hostname...")
96+
err := setHostname(context.Background(), origHostname, t.Log)
97+
require.NoError(t, err)
98+
99+
t.Log("Restoring original /etc/hosts...")
100+
err = setEtcHosts(origEtcHosts)
101+
require.NoError(t, err)
102+
})
103+
104104
t.Log("Verify that agent name is short hostname")
105-
agent := verifyAgentName(t, shortName, info.KibanaClient)
105+
agent := verifyAgentName(t, policy.ID, shortName, info.KibanaClient)
106106

107107
t.Log("Verify that hostname in `logs-*` and `metrics-*` is short hostname")
108108
verifyHostNameInIndices(t, "logs-*", shortName, info.Namespace, info.ESClient)
@@ -133,7 +133,7 @@ func TestFQDN(t *testing.T) {
133133
)
134134

135135
t.Log("Verify that agent name is FQDN")
136-
verifyAgentName(t, fqdn, info.KibanaClient)
136+
verifyAgentName(t, policy.ID, fqdn, info.KibanaClient)
137137

138138
t.Log("Verify that hostname in `logs-*` and `metrics-*` is FQDN")
139139
verifyHostNameInIndices(t, "logs-*", fqdn, info.Namespace, info.ESClient)
@@ -164,7 +164,7 @@ func TestFQDN(t *testing.T) {
164164
)
165165

166166
t.Log("Verify that agent name is short hostname again")
167-
verifyAgentName(t, shortName, info.KibanaClient)
167+
verifyAgentName(t, policy.ID, shortName, info.KibanaClient)
168168

169169
// TODO: Re-enable assertion once https://github.com/elastic/elastic-agent/issues/3078 is
170170
// investigated for root cause and resolved.
@@ -173,7 +173,7 @@ func TestFQDN(t *testing.T) {
173173
// verifyHostNameInIndices(t, "metrics-*", shortName, info.ESClient)
174174
}
175175

176-
func verifyAgentName(t *testing.T, hostname string, kibClient *kibana.Client) *kibana.AgentExisting {
176+
func verifyAgentName(t *testing.T, policyID, hostname string, kibClient *kibana.Client) *kibana.AgentExisting {
177177
t.Helper()
178178

179179
var agent *kibana.AgentExisting
@@ -182,7 +182,7 @@ func verifyAgentName(t *testing.T, hostname string, kibClient *kibana.Client) *k
182182
require.Eventually(
183183
t,
184184
func() bool {
185-
agent, err = tools.GetAgentByHostnameFromList(kibClient, hostname)
185+
agent, err = tools.GetAgentByPolicyIDAndHostnameFromList(kibClient, policyID, hostname)
186186
return err == nil && agent != nil
187187
},
188188
5*time.Minute,

testing/integration/monitoring_logs_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ func TestMonitoringLogsShipped(t *testing.T) {
114114
t.Fatalf("could not get hostname to filter Agent: %s", err)
115115
}
116116

117-
agentID, err := tools.GetAgentIDByHostname(info.KibanaClient, hostname)
117+
agentID, err := tools.GetAgentIDByHostname(info.KibanaClient, policy.ID, hostname)
118118
require.NoError(t, err, "could not get Agent ID by hostname")
119119
t.Logf("Agent ID: %q", agentID)
120120

testing/integration/upgrade_test.go

+5-5
Original file line numberDiff line numberDiff line change
@@ -158,25 +158,25 @@ func testUpgradeFleetManagedElasticAgent(t *testing.T, ctx context.Context, info
158158
require.NoError(t, err)
159159
t.Cleanup(func() {
160160
t.Log("Un-enrolling Elastic Agent...")
161-
assert.NoError(t, tools.UnEnrollAgent(info.KibanaClient))
161+
assert.NoError(t, tools.UnEnrollAgent(info.KibanaClient, policy.ID))
162162
})
163163

164164
t.Log(`Waiting for enrolled Agent status to be "online"...`)
165-
require.Eventually(t, tools.WaitForAgentStatus(t, kibClient, "online"), 2*time.Minute, 10*time.Second, "Agent status is not online")
165+
require.Eventually(t, tools.WaitForAgentStatus(t, kibClient, policy.ID, "online"), 2*time.Minute, 10*time.Second, "Agent status is not online")
166166

167167
t.Logf("Upgrade Elastic Agent to version %s...", toVersion)
168-
err = tools.UpgradeAgent(kibClient, toVersion)
168+
err = tools.UpgradeAgent(kibClient, policy.ID, toVersion)
169169
require.NoError(t, err)
170170

171171
t.Log(`Waiting for enrolled Agent status to be "online"...`)
172-
require.Eventually(t, tools.WaitForAgentStatus(t, kibClient, "online"), 10*time.Minute, 15*time.Second, "Agent status is not online")
172+
require.Eventually(t, tools.WaitForAgentStatus(t, kibClient, policy.ID, "online"), 10*time.Minute, 15*time.Second, "Agent status is not online")
173173

174174
// We remove the `-SNAPSHOT` suffix because, post-upgrade, the version reported
175175
// by the Agent will not contain this suffix, even if a `-SNAPSHOT`-suffixed
176176
// version was used as the target version for the upgrade.
177177
require.Eventually(t, func() bool {
178178
t.Log("Getting Agent version...")
179-
newVersion, err := tools.GetAgentVersion(kibClient)
179+
newVersion, err := tools.GetAgentVersion(kibClient, policy.ID)
180180
if err != nil {
181181
t.Logf("error getting agent version: %v", err)
182182
return false

0 commit comments

Comments
 (0)