Skip to content

Commit b832c15

Browse files
cmacknzrdner
andauthored
Relax leak test condition from Healthy to not Failed. (#5301)
* Relax leak test condition from Health to not Failed. * Invert condition Co-authored-by: Denis <denis@rdner.de> * Explicitly allow only health or degraded. Avoids the check succeeding immediately once starting is reported. * Actually fix condition. * Fix the health check while the tests are running. --------- Co-authored-by: Denis <denis@rdner.de>
1 parent ef69b58 commit b832c15

File tree

1 file changed

+34
-2
lines changed

1 file changed

+34
-2
lines changed

testing/integration/agent_long_running_leak_test.go

+34-2
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ package integration
99
import (
1010
"context"
1111
"encoding/json"
12+
"fmt"
1213
"io"
1314
"net"
1415
"net/http"
@@ -29,7 +30,9 @@ import (
2930
"github.com/elastic/elastic-agent-libs/api/npipe"
3031
"github.com/elastic/elastic-agent-libs/kibana"
3132
"github.com/elastic/elastic-agent/internal/pkg/agent/application/paths"
33+
"github.com/elastic/elastic-agent/pkg/control/v2/client"
3234
"github.com/elastic/elastic-agent/pkg/control/v2/cproto"
35+
"github.com/elastic/elastic-agent/pkg/core/process"
3336
atesting "github.com/elastic/elastic-agent/pkg/testing"
3437
"github.com/elastic/elastic-agent/pkg/testing/define"
3538
"github.com/elastic/elastic-agent/pkg/testing/tools"
@@ -160,7 +163,10 @@ func (runner *ExtendedRunner) TestHandleLeak() {
160163
case <-timer.C:
161164
done = true
162165
case <-ticker.C:
163-
err := runner.agentFixture.IsHealthy(ctx)
166+
// https://github.com/elastic/elastic-agent/issues/5300
167+
// Ideally we would require healthy but we currently report as DEGRADED due to unexpected permissions errors
168+
// accessing some process metrics. Ensure the leak tests still run as long while this is the case.
169+
err := runner.IsHealthyOrDegraded(ctx)
164170
require.NoError(runner.T(), err)
165171
// iterate through our watchers, update them
166172
for _, mon := range runner.resourceWatchers {
@@ -205,6 +211,8 @@ func (runner *ExtendedRunner) TestHandleLeak() {
205211

206212
// CheckHealthAtStartup ensures all the beats and agent are healthy and working before we continue
207213
func (runner *ExtendedRunner) CheckHealthAtStartup(ctx context.Context) {
214+
runner.T().Helper()
215+
208216
// because we need to separately fetch the PIDs, wait until everything is healthy before we look for running beats
209217
compDebugName := ""
210218
require.Eventually(runner.T(), func() bool {
@@ -233,7 +241,11 @@ func (runner *ExtendedRunner) CheckHealthAtStartup(ctx context.Context) {
233241
}
234242
}
235243
runner.T().Logf("component state: %s", comp.Message)
236-
if comp.State != int(cproto.State_HEALTHY) {
244+
245+
// https://github.com/elastic/elastic-agent/issues/5300
246+
// Ideally we would require healthy but we currently report as DEGRADED due to unexpected permissions errors
247+
// accessing some process metrics. Ensure the leak tests still run as long while this is the case.
248+
if !isHealthyOrDegraded(comp.State) {
237249
compDebugName = comp.Name
238250
allHealthy = false
239251
}
@@ -242,6 +254,26 @@ func (runner *ExtendedRunner) CheckHealthAtStartup(ctx context.Context) {
242254
}, runner.healthCheckTime, runner.healthCheckRefreshTime, "install never became healthy: components did not return a healthy state: %s", compDebugName)
243255
}
244256

257+
func (runner *ExtendedRunner) IsHealthyOrDegraded(ctx context.Context, opts ...process.CmdOption) error {
258+
runner.T().Helper()
259+
260+
status, err := runner.agentFixture.ExecStatus(ctx, opts...)
261+
if err != nil {
262+
return fmt.Errorf("agent status returned an error: %w", err)
263+
}
264+
265+
if !isHealthyOrDegraded(status.State) {
266+
return fmt.Errorf("agent isn't healthy or degraded, current status: %s",
267+
client.State(status.State))
268+
}
269+
270+
return nil
271+
}
272+
273+
func isHealthyOrDegraded(state int) bool {
274+
return state == int(cproto.State_HEALTHY) || state == int(cproto.State_DEGRADED)
275+
}
276+
245277
/*
246278
=============================================================================
247279
Watchers for checking resource usage

0 commit comments

Comments
 (0)