Skip to content

Commit 5470f6b

Browse files
Alter checkin API to remove attributes set by audit/unenroll API (#3827)
Alter the checkin wrapper's flush method to include bulk update calls to remove the attributes for agents that have a non-empty audit_unenrolled_reason.
1 parent ef059bd commit 5470f6b

File tree

7 files changed

+220
-17
lines changed

7 files changed

+220
-17
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Kind can be one of:
2+
# - breaking-change: a change to previously-documented behavior
3+
# - deprecation: functionality that is being removed in a later release
4+
# - bug-fix: fixes a problem in a previous version
5+
# - enhancement: extends functionality but does not break or fix existing behavior
6+
# - feature: new functionality
7+
# - known-issue: problems that we are aware of in a given version
8+
# - security: impacts on the security of a product or a user’s deployment.
9+
# - upgrade: important information for someone upgrading from a prior version
10+
# - other: does not fit into any of the other categories
11+
kind: enhancement
12+
13+
# Change summary; a 80ish characters long description of the change.
14+
summary: Checkin after audit/unenroll API
15+
16+
# Long description; in case the summary is not enough to describe the change
17+
# this field accommodate a description without length limits.
18+
# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment.
19+
description: |
20+
Fix the behaviour of the checkin handler if the audit/unenroll endpoint was previously called.
21+
Calling the checkin handler will now result in the removal of the audit_unenroll and unenroll_time attributes from the agent doc.
22+
23+
# Affected component; a word indicating the component this changeset affects.
24+
component:
25+
26+
# PR URL; optional; the PR number that added the changeset.
27+
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
28+
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
29+
# Please provide it if you are adding a fragment for a different PR.
30+
pr: https://github.com/elastic/fleet-server/pull/3827
31+
32+
# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
33+
# If not present is automatically filled by the tooling with the issue linked to the PR number.
34+
issue: https://github.com/elastic/elastic-agent/issues/484

internal/pkg/api/handleCheckin.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,7 @@ func (ct *CheckinT) ProcessRequest(zlog zerolog.Logger, w http.ResponseWriter, r
317317
defer longPoll.Stop()
318318

319319
// Initial update on checkin, and any user fields that might have changed
320-
err = ct.bc.CheckIn(agent.Id, string(req.Status), req.Message, rawMeta, rawComponents, seqno, ver, unhealthyReason)
320+
err = ct.bc.CheckIn(agent.Id, string(req.Status), req.Message, rawMeta, rawComponents, seqno, ver, unhealthyReason, agent.AuditUnenrolledReason != "")
321321
if err != nil {
322322
zlog.Error().Err(err).Str(logger.AgentID, agent.Id).Msg("checkin failed")
323323
}
@@ -371,7 +371,7 @@ func (ct *CheckinT) ProcessRequest(zlog zerolog.Logger, w http.ResponseWriter, r
371371
zlog.Trace().Msg("fire long poll")
372372
break LOOP
373373
case <-tick.C:
374-
err := ct.bc.CheckIn(agent.Id, string(req.Status), req.Message, nil, rawComponents, nil, ver, unhealthyReason)
374+
err := ct.bc.CheckIn(agent.Id, string(req.Status), req.Message, nil, rawComponents, nil, ver, unhealthyReason, false)
375375
if err != nil {
376376
zlog.Error().Err(err).Str(logger.AgentID, agent.Id).Msg("checkin failed")
377377
}

internal/pkg/checkin/bulk.go

+95-12
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,27 @@ package checkin
77

88
import (
99
"context"
10+
_ "embed"
1011
"encoding/json"
12+
"errors"
13+
"fmt"
1114
"sync"
1215
"time"
1316

1417
"github.com/elastic/fleet-server/v7/internal/pkg/bulk"
1518
"github.com/elastic/fleet-server/v7/internal/pkg/dl"
1619
"github.com/elastic/fleet-server/v7/internal/pkg/sqn"
1720

21+
estypes "github.com/elastic/go-elasticsearch/v8/typedapi/types"
22+
"github.com/elastic/go-elasticsearch/v8/typedapi/types/enums/scriptlanguage"
1823
"github.com/rs/zerolog"
1924
)
2025

2126
const defaultFlushInterval = 10 * time.Second
2227

28+
//go:embed deleteAuditFieldsOnCheckin.painless
29+
var deleteAuditAttributesScript string
30+
2331
type optionsT struct {
2432
flushInterval time.Duration
2533
}
@@ -33,10 +41,11 @@ func WithFlushInterval(d time.Duration) Opt {
3341
}
3442

3543
type extraT struct {
36-
meta []byte
37-
seqNo sqn.SeqNo
38-
ver string
39-
components []byte
44+
meta []byte
45+
seqNo sqn.SeqNo
46+
ver string
47+
components []byte
48+
deleteAudit bool
4049
}
4150

4251
// Minimize the size of this structure.
@@ -102,17 +111,18 @@ func (bc *Bulk) timestamp() string {
102111
// The pending agents are sent to elasticsearch as a bulk update at each flush interval.
103112
// NOTE: If Checkin is called after Run has returned it will just add the entry to the pending map and not do any operations, this may occur when the fleet-server is shutting down.
104113
// WARNING: Bulk will take ownership of fields, so do not use after passing in.
105-
func (bc *Bulk) CheckIn(id string, status string, message string, meta []byte, components []byte, seqno sqn.SeqNo, newVer string, unhealthyReason *[]string) error {
114+
func (bc *Bulk) CheckIn(id string, status string, message string, meta []byte, components []byte, seqno sqn.SeqNo, newVer string, unhealthyReason *[]string, deleteAudit bool) error {
106115
// Separate out the extra data to minimize
107116
// the memory footprint of the 90% case of just
108117
// updating the timestamp.
109118
var extra *extraT
110-
if meta != nil || seqno.IsSet() || newVer != "" || components != nil {
119+
if meta != nil || seqno.IsSet() || newVer != "" || components != nil || deleteAudit {
111120
extra = &extraT{
112-
meta: meta,
113-
seqNo: seqno,
114-
ver: newVer,
115-
components: components,
121+
meta: meta,
122+
seqNo: seqno,
123+
ver: newVer,
124+
components: components,
125+
deleteAudit: deleteAudit,
116126
}
117127
}
118128

@@ -182,7 +192,6 @@ func (bc *Bulk) flush(ctx context.Context) error {
182192
// JSON body containing just the timestamp updates.
183193
var body []byte
184194
if pendingData.extra == nil {
185-
186195
var ok bool
187196
body, ok = simpleCache[pendingData]
188197
if !ok {
@@ -198,8 +207,27 @@ func (bc *Bulk) flush(ctx context.Context) error {
198207
}
199208
simpleCache[pendingData] = body
200209
}
210+
} else if pendingData.extra.deleteAudit {
211+
// Use a script instead of a partial doc to update if attributes need to be removed
212+
params, err := encodeParams(nowTimestamp, pendingData)
213+
if err != nil {
214+
return err
215+
}
216+
action := &estypes.UpdateAction{
217+
Script: &estypes.InlineScript{
218+
Lang: &scriptlanguage.Painless,
219+
Source: deleteAuditAttributesScript,
220+
Params: params,
221+
},
222+
}
223+
body, err = json.Marshal(&action)
224+
if err != nil {
225+
return fmt.Errorf("could not marshall script action: %w", err)
226+
}
227+
if pendingData.extra.seqNo.IsSet() {
228+
needRefresh = true
229+
}
201230
} else {
202-
203231
fields := bulk.UpdateFields{
204232
dl.FieldLastCheckin: pendingData.ts, // Set the checkin timestamp
205233
dl.FieldUpdatedAt: nowTimestamp, // Set "updated_at" to the current timestamp
@@ -265,3 +293,58 @@ func (bc *Bulk) flush(ctx context.Context) error {
265293

266294
return err
267295
}
296+
297+
func encodeParams(now string, data pendingT) (map[string]json.RawMessage, error) {
298+
var (
299+
tsNow json.RawMessage
300+
ts json.RawMessage
301+
status json.RawMessage
302+
message json.RawMessage
303+
reason json.RawMessage
304+
ver json.RawMessage
305+
meta json.RawMessage
306+
components json.RawMessage
307+
isSet json.RawMessage
308+
seqNo json.RawMessage
309+
err error
310+
)
311+
tsNow, err = json.Marshal(now)
312+
Err := errors.Join(err)
313+
ts, err = json.Marshal(data.ts)
314+
Err = errors.Join(Err, err)
315+
status, err = json.Marshal(data.status)
316+
Err = errors.Join(Err, err)
317+
message, err = json.Marshal(data.message)
318+
Err = errors.Join(Err, err)
319+
reason, err = json.Marshal(data.unhealthyReason)
320+
Err = errors.Join(Err, err)
321+
ver, err = json.Marshal(data.extra.ver)
322+
Err = errors.Join(Err, err)
323+
isSet, err = json.Marshal(data.extra.seqNo.IsSet())
324+
Err = errors.Join(Err, err)
325+
seqNo, err = json.Marshal(data.extra.seqNo)
326+
Err = errors.Join(Err, err)
327+
if data.extra.meta != nil {
328+
meta, err = json.Marshal(data.extra.meta)
329+
Err = errors.Join(Err, err)
330+
}
331+
if data.extra.components != nil {
332+
components, err = json.Marshal(data.extra.components)
333+
Err = errors.Join(Err, err)
334+
}
335+
if Err != nil {
336+
return nil, Err
337+
}
338+
return map[string]json.RawMessage{
339+
"Now": tsNow,
340+
"TS": ts,
341+
"Status": status,
342+
"Message": message,
343+
"UnhealthyReason": reason,
344+
"Ver": ver,
345+
"Meta": meta,
346+
"Components": components,
347+
"SeqNoSet": isSet,
348+
"SeqNo": seqNo,
349+
}, nil
350+
}

internal/pkg/checkin/bulk_test.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ func TestBulkSimple(t *testing.T) {
193193
mockBulk.On("MUpdate", mock.Anything, mock.MatchedBy(matchOp(t, c, start)), mock.Anything).Return([]bulk.BulkIndexerResponseItem{}, nil).Once()
194194
bc := NewBulk(mockBulk)
195195

196-
if err := bc.CheckIn(c.id, c.status, c.message, c.meta, c.components, c.seqno, c.ver, c.unhealthyReason); err != nil {
196+
if err := bc.CheckIn(c.id, c.status, c.message, c.meta, c.components, c.seqno, c.ver, c.unhealthyReason, false); err != nil {
197197
t.Fatal(err)
198198
}
199199

@@ -228,7 +228,7 @@ func benchmarkBulk(n int, b *testing.B) {
228228
b.ReportAllocs()
229229
for i := 0; i < b.N; i++ {
230230
for _, id := range ids {
231-
err := bc.CheckIn(id, "", "", nil, nil, nil, "", nil)
231+
err := bc.CheckIn(id, "", "", nil, nil, nil, "", nil, false)
232232
if err != nil {
233233
b.Fatal(err)
234234
}
@@ -254,7 +254,7 @@ func benchmarkFlush(n int, b *testing.B) {
254254
for i := 0; i < b.N; i++ {
255255
b.StopTimer()
256256
for _, id := range ids {
257-
err := bc.CheckIn(id, "", "", nil, nil, nil, "", nil)
257+
err := bc.CheckIn(id, "", "", nil, nil, nil, "", nil, false)
258258
if err != nil {
259259
b.Fatal(err)
260260
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
ctx._source.last_checkin = params.TS;
2+
ctx._source.updated_at = params.Now;
3+
ctx._source.last_checkin_status = params.Status;
4+
ctx._source.last_checkin_message = params.Message;
5+
ctx._source.unhealthy_reason = params.UnhealthyReason;
6+
if (params.Ver != "") {
7+
ctx._source.agent.version = params.Ver;
8+
}
9+
if (params.Meta != null) {
10+
ctx._source.local_metadata = params.Meta;
11+
}
12+
if (params.Components != null) {
13+
ctx._source.components = params.Components;
14+
}
15+
if (params.SeqNoSet) {
16+
ctx._source.action_seq_no = params.SeqNo;
17+
}
18+
ctx._source.remove('audit_unenrolled_reason');
19+
ctx._source.remove('audit_unenrolled_time');
20+
ctx._source.remove('dl.unenrolled_at');

internal/pkg/server/fleet_integration_test.go

+42
Original file line numberDiff line numberDiff line change
@@ -1490,4 +1490,46 @@ func Test_SmokeTest_AuditUnenroll(t *testing.T) {
14901490
require.NoError(t, err)
14911491
require.Equal(t, http.StatusConflict, res.StatusCode)
14921492
res.Body.Close()
1493+
1494+
t.Logf("Fake a checkin for agent %s", id)
1495+
req, err = http.NewRequestWithContext(ctx, "POST", srv.baseURL()+"/api/fleet/agents/"+id+"/checkin", strings.NewReader(checkinBody))
1496+
require.NoError(t, err)
1497+
req.Header.Set("Authorization", "ApiKey "+key)
1498+
req.Header.Set("User-Agent", "elastic agent "+serverVersion)
1499+
req.Header.Set("Content-Type", "application/json")
1500+
res, err = cli.Do(req)
1501+
require.NoError(t, err)
1502+
1503+
require.Equal(t, http.StatusOK, res.StatusCode)
1504+
t.Log("Checkin successful, verify body")
1505+
p, _ := io.ReadAll(res.Body)
1506+
res.Body.Close()
1507+
var obj map[string]interface{}
1508+
err = json.Unmarshal(p, &obj)
1509+
require.NoError(t, err)
1510+
1511+
require.Eventually(t, func() bool {
1512+
req, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://localhost:9200/.fleet-agents/_doc/"+id, nil)
1513+
require.NoError(t, err)
1514+
req.SetBasicAuth("elastic", "changeme")
1515+
res, err := cli.Do(req)
1516+
require.NoError(t, err)
1517+
defer res.Body.Close()
1518+
if res.StatusCode != http.StatusOK {
1519+
return false
1520+
}
1521+
p, err := io.ReadAll(res.Body)
1522+
require.NoError(t, err)
1523+
var tmp map[string]interface{}
1524+
err = json.Unmarshal(p, &tmp)
1525+
require.NoError(t, err)
1526+
o, ok := tmp["_source"]
1527+
require.Truef(t, ok, "expected to find _source in: %v", tmp)
1528+
obj, ok := o.(map[string]interface{})
1529+
require.Truef(t, ok, "expected _source to be an object, was: %T", o)
1530+
_, ok = obj["audit_unenrolled_reason"]
1531+
return !ok
1532+
}, time.Second*20, time.Second, "agent document should not have audit_unenrolled_reason attribute")
1533+
cancel()
1534+
srv.waitExit() //nolint:errcheck // test case
14931535
}

testing/e2e/api_version/client_api_current.go

+24
Original file line numberDiff line numberDiff line change
@@ -394,4 +394,28 @@ func (tester *ClientAPITester) TestEnrollAuditUnenroll() {
394394
tester.T().Logf("audit/unenroll endpoint for agent: %s should return conflict", agentID)
395395
status = tester.AuditUnenroll(ctx, agentKey, agentID, api.Uninstall, now)
396396
tester.Require().Equal(http.StatusConflict, status)
397+
398+
tester.T().Logf("test checkin agent: %s", agentID)
399+
_, _, statusCode := tester.Checkin(ctx, agentKey, agentID, nil, nil, nil)
400+
tester.Require().Equal(http.StatusOK, statusCode, "Expected status code 200 for successful checkin")
401+
402+
// verify that audit_unenrolled_reason attribute does not exist in agent doc
403+
tester.Require().Eventually(func() bool {
404+
req, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://"+tester.ESHosts+"/.fleet-agents/_doc/"+agentID, nil)
405+
tester.Require().NoError(err)
406+
req.SetBasicAuth(tester.ElasticUser, tester.ElasticPass)
407+
res, err := tester.Client.Do(req)
408+
tester.Require().NoError(err)
409+
defer res.Body.Close()
410+
if res.StatusCode != http.StatusOK {
411+
return false
412+
}
413+
var obj struct {
414+
Source map[string]interface{} `json:"_source"`
415+
}
416+
err = json.NewDecoder(res.Body).Decode(&obj)
417+
tester.Require().NoError(err)
418+
_, ok := obj.Source["audit_unenrolled_reason"]
419+
return !ok
420+
}, time.Second*20, time.Second, "agent document in elasticsearch should not have audit_unenrolled_reason attribute")
397421
}

0 commit comments

Comments
 (0)