@@ -11,22 +11,23 @@ import (
11
11
"io"
12
12
"strconv"
13
13
"strings"
14
- "sync"
15
14
"time"
16
15
16
+ "github.com/rs/zerolog"
17
+
17
18
"github.com/elastic/fleet-server/v7/internal/pkg/build"
18
19
"github.com/elastic/fleet-server/v7/internal/pkg/config"
19
20
"github.com/elastic/fleet-server/v7/internal/pkg/es"
20
21
"github.com/elastic/fleet-server/v7/internal/pkg/reload"
21
22
"github.com/elastic/fleet-server/v7/internal/pkg/sleep"
22
23
"github.com/elastic/fleet-server/v7/internal/pkg/state"
23
24
"github.com/elastic/fleet-server/v7/internal/pkg/ver"
24
- "github.com/rs/zerolog"
25
+
26
+ "gopkg.in/yaml.v3"
25
27
26
28
"github.com/elastic/elastic-agent-client/v7/pkg/client"
27
29
"github.com/elastic/elastic-agent-client/v7/pkg/proto"
28
30
"github.com/elastic/go-ucfg"
29
- "gopkg.in/yaml.v3"
30
31
)
31
32
32
33
const (
@@ -65,7 +66,7 @@ type Agent struct {
65
66
srv * Fleet
66
67
srvCtx context.Context
67
68
srvCanceller context.CancelFunc
68
- srvDone chan bool
69
+ srvDone chan struct {}
69
70
70
71
outputCheckCanceller context.CancelFunc
71
72
chReconfigure chan struct {}
@@ -97,6 +98,7 @@ func NewAgent(cliCfg *ucfg.Config, reader io.Reader, bi build.Info, reloadables
97
98
98
99
// Run starts a Server instance using config from the configured client.
99
100
func (a * Agent ) Run (ctx context.Context ) error {
101
+ // ctx is cancelled when a SIGTERM or SIGINT is received.
100
102
log := zerolog .Ctx (ctx )
101
103
a .agent .RegisterDiagnosticHook ("fleet-server config" , "fleet-server's current configuration" , "fleet-server.yml" , "application/yml" , func () []byte {
102
104
if a .srv == nil {
@@ -150,24 +152,21 @@ func (a *Agent) Run(ctx context.Context) error {
150
152
log .Warn ().Msg ("Diagnostics hook failure config is nil." )
151
153
return []byte (`Diagnostics hook failure config is nil` )
152
154
}
153
- ctx , cancel := context .WithTimeout (ctx , time .Second * 30 ) // TODO(michel-laterman): duration/timeout should be part of the diagnostics action from fleet-server (https://github.com/elastic/fleet-server/issues/3648) and the control protocol (https://github.com/elastic/elastic-agent-client/issues/113)
155
+ ctx , cancel := context .WithTimeout (ctx , time .Second * 30 ) // diag specific context, has a timeout // TODO(michel-laterman): duration/timeout should be part of the diagnostics action from fleet-server (https://github.com/elastic/fleet-server/issues/3648) and the control protocol (https://github.com/elastic/elastic-agent-client/issues/113)
154
156
defer cancel ()
155
157
return cfg .Output .Elasticsearch .DiagRequests (ctx )
156
158
})
157
159
158
- subCtx , subCanceller := context .WithCancel (ctx )
159
- defer subCanceller ()
160
-
161
- var wg sync.WaitGroup
162
- wg .Add (1 )
160
+ // doneCh is used to track when agent wrapper run loop returns
161
+ doneCh := make (chan struct {})
163
162
go func () {
164
- defer wg . Done ( )
163
+ defer close ( doneCh )
165
164
166
165
t := time .NewTicker (1 * time .Second )
167
166
defer t .Stop ()
168
167
for {
169
168
select {
170
- case <- subCtx .Done ():
169
+ case <- ctx .Done ():
171
170
return
172
171
case err := <- a .agent .Errors ():
173
172
if err != nil && ! errors .Is (err , context .Canceled ) && ! errors .Is (err , io .EOF ) {
@@ -176,13 +175,13 @@ func (a *Agent) Run(ctx context.Context) error {
176
175
case change := <- a .agent .UnitChanges ():
177
176
switch change .Type {
178
177
case client .UnitChangedAdded :
179
- err := a .unitAdded (subCtx , change .Unit )
178
+ err := a .unitAdded (ctx , change .Unit )
180
179
if err != nil {
181
180
log .Error ().Str ("unit" , change .Unit .ID ()).Err (err )
182
181
_ = change .Unit .UpdateState (client .UnitStateFailed , err .Error (), nil )
183
182
}
184
183
case client .UnitChangedModified :
185
- err := a .unitModified (subCtx , change .Unit )
184
+ err := a .unitModified (ctx , change .Unit )
186
185
if err != nil {
187
186
log .Error ().Str ("unit" , change .Unit .ID ()).Err (err )
188
187
_ = change .Unit .UpdateState (client .UnitStateFailed , err .Error (), nil )
@@ -202,7 +201,7 @@ func (a *Agent) Run(ctx context.Context) error {
202
201
if agentInfo != nil && agentInfo .ID != "" {
203
202
// Agent ID is not set for the component.
204
203
t .Stop ()
205
- err := a .reconfigure (subCtx )
204
+ err := a .reconfigure (ctx )
206
205
if err != nil && ! errors .Is (err , context .Canceled ) {
207
206
log .Error ().Err (err ).Msg ("Bootstrap error when reconfiguring" )
208
207
}
@@ -212,13 +211,13 @@ func (a *Agent) Run(ctx context.Context) error {
212
211
}()
213
212
214
213
log .Info ().Msg ("starting communication connection back to Elastic Agent" )
215
- err := a .agent .Start (subCtx )
216
- if err != nil {
214
+ err := a .agent .Start (ctx )
215
+ if err != nil && ! errors . Is ( err , context . Canceled ) {
217
216
return err
218
217
}
219
218
220
- <- subCtx .Done ()
221
- wg . Wait ()
219
+ <- ctx .Done () // wait for a termination signal
220
+ <- doneCh // wait for agent wrapper goroutine to terminate
222
221
223
222
return nil
224
223
}
@@ -355,7 +354,7 @@ func (a *Agent) start(ctx context.Context) error {
355
354
}
356
355
}
357
356
358
- srvDone := make (chan bool )
357
+ srvDone := make (chan struct {} )
359
358
srvCtx , srvCanceller := context .WithCancel (ctx )
360
359
srv , err := NewFleet (a .bi , state .NewChained (state .NewLog (), a ), false )
361
360
if err != nil {
@@ -394,6 +393,7 @@ func (a *Agent) reconfigure(ctx context.Context) error {
394
393
}
395
394
396
395
// reload the generic reloadables
396
+ // Currently logger is the only reloadable
397
397
for _ , r := range a .reloadables {
398
398
err = r .Reload (ctx , cfg )
399
399
if err != nil {
@@ -405,6 +405,7 @@ func (a *Agent) reconfigure(ctx context.Context) error {
405
405
}
406
406
407
407
func (a * Agent ) stop () {
408
+ // stop is called when expected config state indicates an input or output should stop
408
409
if a .srvCanceller == nil {
409
410
return
410
411
}
@@ -418,7 +419,7 @@ func (a *Agent) stop() {
418
419
a .srvCtx = nil
419
420
a .srv = nil
420
421
canceller ()
421
- <- a .srvDone
422
+ <- a .srvDone // wait for srv.Run loop to terminate either because root-context received a signal, or stop has been called
422
423
a .srvDone = nil
423
424
424
425
if a .inputUnit != nil {
@@ -460,6 +461,7 @@ func (a *Agent) configFromUnits(ctx context.Context) (*config.Config, error) {
460
461
461
462
// elastic-agent should be setting bootstrap with config provided through enrollment flags
462
463
if bootstrapCfg , ok := outMap ["bootstrap" ]; ok {
464
+ // Check if an output check loop is running, cancel if it is.
463
465
if a .outputCheckCanceller != nil {
464
466
a .outputCheckCanceller ()
465
467
a .outputCheckCanceller = nil
0 commit comments