Skip to content

Commit e9a0c2a

Browse files
committed
Fix log level value reported by Agent to Fleet
1 parent a4953a9 commit e9a0c2a

File tree

2 files changed

+134
-15
lines changed

2 files changed

+134
-15
lines changed

internal/pkg/agent/application/gateway/fleet/fleet_gateway.go

+10
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,16 @@ func (f *FleetGateway) execute(ctx context.Context) (*fleetapi.CheckinResponse,
329329
// convert components into checkin components structure
330330
components := f.convertToCheckinComponents(state.Components)
331331

332+
if ecsMeta.Elastic == nil || ecsMeta.Elastic.Agent == nil {
333+
// escMeta struct is incomplete: log a warning
334+
f.log.Warnw("Agent ECSMetadata struct is missing/incomplete", "elastic_ecs_metadata", ecsMeta.Elastic)
335+
} else {
336+
// FIXME
337+
f.log.Errorf("correcting agent loglevel from %s to %s using coordinator state", ecsMeta.Elastic.Agent.LogLevel, state.LogLevel.String())
338+
// Fix loglevel with the current log level used by coordinator
339+
ecsMeta.Elastic.Agent.LogLevel = state.LogLevel.String()
340+
}
341+
332342
// checkin
333343
cmd := fleetapi.NewCheckinCmd(f.agentInfo, f.client)
334344
req := &fleetapi.CheckinRequest{

testing/integration/log_level_test.go

+124-15
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@ package integration
99
import (
1010
"bytes"
1111
"context"
12+
"encoding/json"
1213
"fmt"
14+
"io"
1315
"net/http"
1416
"testing"
1517
"text/template"
@@ -70,6 +72,10 @@ func createTestSetLogLevelFunction(ctx context.Context, t *testing.T, f *atestin
7072
})
7173

7274
// the actual test function is the one below
75+
return testLogLevelSetViaFleet(f, fleetServerURL, enrollmentTokenResp, t, info, policyResp)
76+
}
77+
78+
func testLogLevelSetViaFleet(f *atesting.Fixture, fleetServerURL string, enrollmentTokenResp kibana.CreateEnrollmentAPIKeyResponse, t *testing.T, info *define.Info, policyResp kibana.PolicyResponse) func(ctx context.Context) error {
7379
return func(ctx context.Context) error {
7480

7581
out, err := f.Exec(ctx, []string{"enroll", "--url", fleetServerURL, "--enrollment-token", enrollmentTokenResp.APIKey})
@@ -80,10 +86,12 @@ func createTestSetLogLevelFunction(ctx context.Context, t *testing.T, f *atestin
8086

8187
t.Cleanup(unenrollAgentFunction(ctx, t, info.KibanaClient, state.Info.ID))
8288

83-
actualLogLevel, err := getLogLevelForAgent(ctx, t, f)
89+
// Step 0: get the initial log level reported by agent
90+
initialLogLevel, err := getLogLevelForAgent(ctx, t, f)
8491
require.NoError(t, err, "error retrieving agent log level")
85-
assert.Equal(t, logger.DefaultLogLevel, actualLogLevel, "unexpected default log level at agent startup")
92+
assert.Equal(t, logger.DefaultLogLevel, initialLogLevel, "unexpected default log level at agent startup")
8693

94+
// Step 1: set a different log level in Fleet policy
8795
policyLogLevel := logp.ErrorLevel
8896

8997
// make sure we are changing something
@@ -93,6 +101,11 @@ func createTestSetLogLevelFunction(ctx context.Context, t *testing.T, f *atestin
93101
err = updatePolicyLogLevel(ctx, info.KibanaClient, policyResp.AgentPolicy, policyLogLevel.String())
94102
require.NoError(t, err, "error updating policy log level")
95103

104+
// get the agent ID
105+
agentID, err := getAgentID(ctx, t, f)
106+
require.NoError(t, err, "error getting the agent ID")
107+
108+
// assert `elastic-agent inspect` eventually reports the new log level
96109
assert.Eventuallyf(t, func() bool {
97110
agentLogLevel, err := getLogLevelForAgent(ctx, t, f)
98111
if err != nil {
@@ -103,9 +116,19 @@ func createTestSetLogLevelFunction(ctx context.Context, t *testing.T, f *atestin
103116
return agentLogLevel == policyLogLevel.String()
104117
}, 2*time.Minute, time.Second, "agent never received expected log level %q", policyLogLevel)
105118

119+
// assert Fleet eventually receives the new log level from agent through checkin
120+
assert.Eventuallyf(t, func() bool {
121+
fleetMetadataLogLevel, err := getLogLevelFromFleetMetadata(ctx, info.KibanaClient, agentID)
122+
if err != nil {
123+
t.Logf("error getting log level for agent %q from Fleet metadata: %v", agentID, err)
124+
return false
125+
}
126+
t.Logf("Fleet metadata log level for agent %q: %q policy log level: %q", agentID, fleetMetadataLogLevel, policyLogLevel)
127+
return fleetMetadataLogLevel == policyLogLevel.String()
128+
}, 2*time.Minute, time.Second, "agent never communicated policy log level %q to Fleet", policyLogLevel)
129+
130+
// Step 2: set a different log level for the specific agent using Settings action
106131
// set agent log level and verify that it takes precedence over the policy one
107-
agentID, err := getAgentID(ctx, t, f)
108-
require.NoError(t, err, "error getting the agent ID")
109132
agentLogLevel := logp.DebugLevel.String()
110133
err = updateAgentLogLevel(ctx, info.KibanaClient, agentID, agentLogLevel)
111134
require.NoError(t, err, "error updating agent log level")
@@ -118,9 +141,71 @@ func createTestSetLogLevelFunction(ctx context.Context, t *testing.T, f *atestin
118141
}
119142
t.Logf("Agent log level: %q, expected level: %q", actualAgentLogLevel, agentLogLevel)
120143
return actualAgentLogLevel == agentLogLevel
121-
}, 2*time.Minute, time.Second, "agent never received expected log level %q", agentLogLevel)
144+
}, 2*time.Minute, time.Second, "agent never received agent-specific log level %q", agentLogLevel)
145+
146+
// assert Fleet eventually receives the new log level from agent through checkin
147+
assert.Eventuallyf(t, func() bool {
148+
fleetMetadataLogLevel, err := getLogLevelFromFleetMetadata(ctx, info.KibanaClient, agentID)
149+
if err != nil {
150+
t.Logf("error getting log level for agent %q from Fleet metadata: %v", agentID, err)
151+
return false
152+
}
153+
t.Logf("Fleet metadata log level for agent %q: %q agent log level: %q", agentID, fleetMetadataLogLevel, policyLogLevel)
154+
return fleetMetadataLogLevel == agentLogLevel
155+
}, 2*time.Minute, time.Second, "agent never communicated agent-specific log level %q to Fleet", policyLogLevel)
156+
157+
// Step 3: Clear the agent-specific log level override, verify that we revert to policy log level
158+
err = updateAgentLogLevel(ctx, info.KibanaClient, agentID, "")
159+
require.NoError(t, err, "error clearing agent log level")
160+
161+
// assert `elastic-agent inspect` eventually reports the new log level
162+
assert.Eventuallyf(t, func() bool {
163+
actualAgentLogLevel, err := getLogLevelForAgent(ctx, t, f)
164+
if err != nil {
165+
t.Logf("error getting log level from agent: %v", err)
166+
return false
167+
}
168+
t.Logf("Agent log level: %q policy log level: %q", actualAgentLogLevel, policyLogLevel)
169+
return actualAgentLogLevel == policyLogLevel.String()
170+
}, 2*time.Minute, time.Second, "agent never reverted to policy log level %q", policyLogLevel)
171+
172+
// assert Fleet eventually receives the new log level from agent through checkin
173+
assert.Eventuallyf(t, func() bool {
174+
fleetMetadataLogLevel, err := getLogLevelFromFleetMetadata(ctx, info.KibanaClient, agentID)
175+
if err != nil {
176+
t.Logf("error getting log level for agent %q from Fleet metadata: %v", agentID, err)
177+
return false
178+
}
179+
t.Logf("Fleet metadata log level for agent %q: %q policy log level: %q", agentID, fleetMetadataLogLevel, policyLogLevel)
180+
return fleetMetadataLogLevel == policyLogLevel.String()
181+
}, 2*time.Minute, time.Second, "agent never communicated reverting to policy log level %q to Fleet", policyLogLevel)
182+
183+
// Step 4: Clear the log level in policy and verify that agent reverts to the initial log level
184+
err = updatePolicyLogLevel(ctx, info.KibanaClient, policyResp.AgentPolicy, "")
185+
require.NoError(t, err, "error clearing policy log level")
186+
187+
// assert `elastic-agent inspect` eventually reports the initial log level
188+
assert.Eventuallyf(t, func() bool {
189+
actualAgentLogLevel, err := getLogLevelForAgent(ctx, t, f)
190+
if err != nil {
191+
t.Logf("error getting log level from agent: %v", err)
192+
return false
193+
}
194+
t.Logf("Agent log level: %q initial log level: %q", actualAgentLogLevel, initialLogLevel)
195+
return actualAgentLogLevel == policyLogLevel.String()
196+
}, 2*time.Minute, time.Second, "agent never reverted to initial log level %q", policyLogLevel)
197+
198+
// assert Fleet eventually receives the new log level from agent through checkin
199+
assert.Eventuallyf(t, func() bool {
200+
fleetMetadataLogLevel, err := getLogLevelFromFleetMetadata(ctx, info.KibanaClient, agentID)
201+
if err != nil {
202+
t.Logf("error getting log level for agent %q from Fleet metadata: %v", agentID, err)
203+
return false
204+
}
205+
t.Logf("Fleet metadata log level for agent %q: %q initial log level: %q", agentID, fleetMetadataLogLevel, policyLogLevel)
206+
return fleetMetadataLogLevel == policyLogLevel.String()
207+
}, 2*time.Minute, time.Second, "agent never communicated initial log level %q to Fleet", policyLogLevel)
122208

123-
// TODO: We should clear the agent level log setting and check that agent reapplies the policy log level but it's not supported by fleet yet
124209
return nil
125210
}
126211
}
@@ -151,7 +236,7 @@ func updateAgentLogLevel(ctx context.Context, kibanaClient *kibana.Client, agent
151236
}
152237

153238
func updatePolicyLogLevel(ctx context.Context, kibanaClient *kibana.Client, policy kibana.AgentPolicy, newPolicyLogLevel string) error {
154-
// The request we would need is the one below, but at the time of writing there is no way to set overrides with fleet api 8.8.0, need to update
239+
// The request we would need is the one below, but at the time of writing there is no way to set overrides with fleet api definition in elastic-agent-libs, need to update
155240
// info.KibanaClient.UpdatePolicy(ctx, policyResp.ID, kibana.AgentPolicyUpdateRequest{})
156241
// Let's do a generic HTTP request
157242

@@ -179,14 +264,6 @@ func updatePolicyLogLevel(ctx context.Context, kibanaClient *kibana.Client, poli
179264

180265
_, err = kibanaClient.SendWithContext(ctx, http.MethodPut, "/api/fleet/agent_policies/"+policy.ID, nil, nil, buf)
181266

182-
//updateLogLevelReq, err := http.NewRequestWithContext(ctx, http.MethodPut, kibanaClient.URL+"/api/fleet/agent_policies/"+policy.ID, buf)
183-
//if err != nil {
184-
// return fmt.Errorf("error creating policy log level update request: %w", err)
185-
//}
186-
//_, err = kibanaClient.HTTP.Do(updateLogLevelReq)
187-
//if err != nil {
188-
// return fmt.Errorf("error executing policy log level update: %w", err)
189-
//}
190267
if err != nil {
191268
return fmt.Errorf("error executing fleet request: %w", err)
192269
}
@@ -229,6 +306,38 @@ func getLogLevelForAgent(ctx context.Context, t *testing.T, f *atesting.Fixture)
229306
return "", fmt.Errorf("loglevel from inspect output is not a string: %T", actualLogLevel)
230307
}
231308

309+
func getLogLevelFromFleetMetadata(ctx context.Context, kibanaClient *kibana.Client, agentID string) (string, error) {
310+
// The request we would need is kibanaClient.GetAgent(), but at the time of writing there is no way to get loglevel with fleet api definition in elastic-agent-libs, need to update
311+
// kibana.AgentCommon struct to pick up log level from `local_metadata`
312+
// Let's do a generic HTTP request
313+
314+
response, err := kibanaClient.SendWithContext(ctx, http.MethodGet, "/api/fleet/agents/"+agentID, nil, nil, nil)
315+
if err != nil {
316+
return "", fmt.Errorf("getting agent from Fleet: %w", err)
317+
}
318+
defer response.Body.Close()
319+
320+
responseBodyBytes, err := io.ReadAll(response.Body)
321+
if err != nil {
322+
return "", fmt.Errorf("reading response body from Fleet: %w", err)
323+
}
324+
325+
rawJson := map[string]any{}
326+
err = json.Unmarshal(responseBodyBytes, &rawJson)
327+
if err != nil {
328+
return "", fmt.Errorf("unmarshalling Fleet response: %w", err)
329+
}
330+
rawLogLevel, err := utils.GetNestedMap(rawJson, "item", "local_metadata", "elastic", "agent", "log_level")
331+
if err != nil {
332+
return "", fmt.Errorf("looking for item/local_metadata/elastic/agent/log_level key in Fleet response: %w", err)
333+
}
334+
335+
if logLevel, ok := rawLogLevel.(string); ok {
336+
return logLevel, nil
337+
}
338+
return "", fmt.Errorf("loglevel from Fleet output is not a string: %T", rawLogLevel)
339+
}
340+
232341
func agentInspect(ctx context.Context, t *testing.T, f *atesting.Fixture) (map[string]any, error) {
233342
inspectOutBytes, err := f.Exec(ctx, []string{"inspect"})
234343
t.Logf("inspect output:\n%s\n", string(inspectOutBytes))

0 commit comments

Comments
 (0)