@@ -9,6 +9,7 @@ package integration
9
9
import (
10
10
"context"
11
11
"encoding/json"
12
+ "fmt"
12
13
"io"
13
14
"net"
14
15
"net/http"
@@ -29,7 +30,9 @@ import (
29
30
"github.com/elastic/elastic-agent-libs/api/npipe"
30
31
"github.com/elastic/elastic-agent-libs/kibana"
31
32
"github.com/elastic/elastic-agent/internal/pkg/agent/application/paths"
33
+ "github.com/elastic/elastic-agent/pkg/control/v2/client"
32
34
"github.com/elastic/elastic-agent/pkg/control/v2/cproto"
35
+ "github.com/elastic/elastic-agent/pkg/core/process"
33
36
atesting "github.com/elastic/elastic-agent/pkg/testing"
34
37
"github.com/elastic/elastic-agent/pkg/testing/define"
35
38
"github.com/elastic/elastic-agent/pkg/testing/tools"
@@ -160,7 +163,10 @@ func (runner *ExtendedRunner) TestHandleLeak() {
160
163
case <- timer .C :
161
164
done = true
162
165
case <- ticker .C :
163
- err := runner .agentFixture .IsHealthy (ctx )
166
+ // https://github.com/elastic/elastic-agent/issues/5300
167
+ // Ideally we would require healthy but we currently report as DEGRADED due to unexpected permissions errors
168
+ // accessing some process metrics. Ensure the leak tests still run as long while this is the case.
169
+ err := runner .IsHealthyOrDegraded (ctx )
164
170
require .NoError (runner .T (), err )
165
171
// iterate through our watchers, update them
166
172
for _ , mon := range runner .resourceWatchers {
@@ -205,6 +211,8 @@ func (runner *ExtendedRunner) TestHandleLeak() {
205
211
206
212
// CheckHealthAtStartup ensures all the beats and agent are healthy and working before we continue
207
213
func (runner * ExtendedRunner ) CheckHealthAtStartup (ctx context.Context ) {
214
+ runner .T ().Helper ()
215
+
208
216
// because we need to separately fetch the PIDs, wait until everything is healthy before we look for running beats
209
217
compDebugName := ""
210
218
require .Eventually (runner .T (), func () bool {
@@ -233,7 +241,11 @@ func (runner *ExtendedRunner) CheckHealthAtStartup(ctx context.Context) {
233
241
}
234
242
}
235
243
runner .T ().Logf ("component state: %s" , comp .Message )
236
- if comp .State != int (cproto .State_HEALTHY ) {
244
+
245
+ // https://github.com/elastic/elastic-agent/issues/5300
246
+ // Ideally we would require healthy but we currently report as DEGRADED due to unexpected permissions errors
247
+ // accessing some process metrics. Ensure the leak tests still run as long while this is the case.
248
+ if ! isHealthyOrDegraded (comp .State ) {
237
249
compDebugName = comp .Name
238
250
allHealthy = false
239
251
}
@@ -242,6 +254,26 @@ func (runner *ExtendedRunner) CheckHealthAtStartup(ctx context.Context) {
242
254
}, runner .healthCheckTime , runner .healthCheckRefreshTime , "install never became healthy: components did not return a healthy state: %s" , compDebugName )
243
255
}
244
256
257
+ func (runner * ExtendedRunner ) IsHealthyOrDegraded (ctx context.Context , opts ... process.CmdOption ) error {
258
+ runner .T ().Helper ()
259
+
260
+ status , err := runner .agentFixture .ExecStatus (ctx , opts ... )
261
+ if err != nil {
262
+ return fmt .Errorf ("agent status returned an error: %w" , err )
263
+ }
264
+
265
+ if ! isHealthyOrDegraded (status .State ) {
266
+ return fmt .Errorf ("agent isn't healthy or degraded, current status: %s" ,
267
+ client .State (status .State ))
268
+ }
269
+
270
+ return nil
271
+ }
272
+
273
+ func isHealthyOrDegraded (state int ) bool {
274
+ return state == int (cproto .State_HEALTHY ) || state == int (cproto .State_DEGRADED )
275
+ }
276
+
245
277
/*
246
278
=============================================================================
247
279
Watchers for checking resource usage
0 commit comments