Skip to content

Commit eca5bc7

Browse files
authored
Revert "Add /liveness endpoint to elastic-agent (#4499)" (#4583)
This reverts commit 29ce53e.
1 parent 29ce53e commit eca5bc7

22 files changed

+88
-975
lines changed

_meta/config/common.p2.yml.tmpl

-13
Original file line numberDiff line numberDiff line change
@@ -66,19 +66,6 @@ inputs:
6666
# # The name of the output to use for monitoring data.
6767
# use_output: monitoring
6868
# # exposes agent metrics using http, by default sockets and named pipes are used
69-
# #
70-
# # `http` Also exposes a /liveness endpoint that will return an HTTP code depending on agent status:
71-
# # 200: Agent is healthy
72-
# # 500: A component or unit is in a failed state
73-
# # 503: The agent coordinator is unresponsive
74-
# #
75-
# # You can pass a `failon` parameter to the /liveness endpoint to determine what component state will result in a 500.
76-
# # For example: `curl 'localhost:6792/liveness?failon=degraded'` will return 500 if a component is in a degraded state.
77-
# # The possible values for `failon` are:
78-
# # `degraded`: return an error if a component is in a degraded state or failed state, or if the agent coordinator is unresponsive.
79-
# # `failed`: return an error if a unit is in a failed state, or if the agent coordinator is unresponsive.
80-
# # `heartbeat`: return an error only if the agent coordinator is unresponsive.
81-
# # If no `failon` parameter is provided, the default behavior is `failon=heartbeat`
8269
# http:
8370
# # enables http endpoint
8471
# enabled: false

_meta/config/common.reference.p2.yml.tmpl

+1-14
Original file line numberDiff line numberDiff line change
@@ -144,20 +144,7 @@ inputs:
144144
# pprof.enabled: false
145145
# # The name of the output to use for monitoring data.
146146
# use_output: monitoring
147-
# # Exposes agent metrics using http, by default sockets and named pipes are used.
148-
# #
149-
# # `http` Also exposes a /liveness endpoint that will return an HTTP code depending on agent status:
150-
# # 200: Agent is healthy
151-
# # 500: A component or unit is in a failed state
152-
# # 503: The agent coordinator is unresponsive
153-
# #
154-
# # You can pass a `failon` parameter to the /liveness endpoint to determine what component state will result in a 500.
155-
# # For example: `curl 'localhost:6792/liveness?failon=degraded'` will return 500 if a component is in a degraded state.
156-
# # The possible values for `failon` are:
157-
# # `degraded`: return an error if a component is in a degraded state or failed state, or if the agent coordinator is unresponsive.
158-
# # `failed`: return an error if a unit is in a failed state, or if the agent coordinator is unresponsive.
159-
# # `heartbeat`: return an error only if the agent coordinator is unresponsive.
160-
# # If no `failon` parameter is provided, the default behavior is `failon=heartbeat`
147+
# # exposes agent metrics using http, by default sockets and named pipes are used
161148
# http:
162149
# # enables http endpoint
163150
# enabled: false

_meta/config/elastic-agent.docker.yml.tmpl

-13
Original file line numberDiff line numberDiff line change
@@ -116,19 +116,6 @@ inputs:
116116
# # recommended that these endpoints are only enabled if the monitoring endpoint is set to localhost
117117
# pprof.enabled: false
118118
# # exposes agent metrics using http, by default sockets and named pipes are used
119-
# #
120-
# # `http` Also exposes a /liveness endpoint that will return an HTTP code depending on agent status:
121-
# # 200: Agent is healthy
122-
# # 500: A component or unit is in a failed state
123-
# # 503: The agent coordinator is unresponsive
124-
# #
125-
# # You can pass a `failon` parameter to the /liveness endpoint to determine what component state will result in a 500.
126-
# # For example: `curl 'localhost:6792/liveness?failon=degraded'` will return 500 if a component is in a degraded state.
127-
# # The possible values for `failon` are:
128-
# # `degraded`: return an error if a component is in a degraded state or failed state, or if the agent coordinator is unresponsive.
129-
# # `failed`: return an error if a unit is in a failed state, or if the agent coordinator is unresponsive.
130-
# # `heartbeat`: return an error only if the agent coordinator is unresponsive.
131-
# # If no `failon` parameter is provided, the default behavior is `failon=heartbeat`
132119
# http:
133120
# # enables http endpoint
134121
# enabled: false

_meta/elastic-agent.yml

+1-9
Original file line numberDiff line numberDiff line change
@@ -103,15 +103,7 @@ inputs:
103103
# logs: false
104104
# # enables metrics monitoring
105105
# metrics: false
106-
# # Exposes agent metrics using http, by default sockets and named pipes are used.
107-
# # Also exposes a /liveness endpoint that will return an HTTP code depending on agent status:
108-
# # 200: Agent is healthy
109-
# # 500: A component or unit is in a failed state
110-
# # 503: The agent coordinator is unresponsive
111-
# # You can pass a `failon` parameter to the /liveness endpoint to determine what component state will result in a 500.
112-
# # For example: `curl 'localhost:6792/liveness?failon=degraded'` will return 500 if a component is in a degraded state.
113-
# # The two possible values for `failon` are `degraded` and `failed`. If no `failon` parameter is provided, the default
114-
# # behavior is `failon=failed`
106+
# # exposes agent metrics using http, by default sockets and named pipes are used
115107
# http:
116108
# # enables http endpoint
117109
# enabled: false

changelog/fragments/1711653910-add-liveness-endpoint.yaml

-32
This file was deleted.

elastic-agent.docker.yml

-13
Original file line numberDiff line numberDiff line change
@@ -116,19 +116,6 @@ inputs:
116116
# # recommended that these endpoints are only enabled if the monitoring endpoint is set to localhost
117117
# pprof.enabled: false
118118
# # exposes agent metrics using http, by default sockets and named pipes are used
119-
# #
120-
# # `http` Also exposes a /liveness endpoint that will return an HTTP code depending on agent status:
121-
# # 200: Agent is healthy
122-
# # 500: A component or unit is in a failed state
123-
# # 503: The agent coordinator is unresponsive
124-
# #
125-
# # You can pass a `failon` parameter to the /liveness endpoint to determine what component state will result in a 500.
126-
# # For example: `curl 'localhost:6792/liveness?failon=degraded'` will return 500 if a component is in a degraded state.
127-
# # The possible values for `failon` are:
128-
# # `degraded`: return an error if a component is in a degraded state or failed state, or if the agent coordinator is unresponsive.
129-
# # `failed`: return an error if a unit is in a failed state, or if the agent coordinator is unresponsive.
130-
# # `heartbeat`: return an error only if the agent coordinator is unresponsive.
131-
# # If no `failon` parameter is provided, the default behavior is `failon=heartbeat`
132119
# http:
133120
# # enables http endpoint
134121
# enabled: false

elastic-agent.reference.yml

+1-14
Original file line numberDiff line numberDiff line change
@@ -150,20 +150,7 @@ inputs:
150150
# pprof.enabled: false
151151
# # The name of the output to use for monitoring data.
152152
# use_output: monitoring
153-
# # Exposes agent metrics using http, by default sockets and named pipes are used.
154-
# #
155-
# # `http` Also exposes a /liveness endpoint that will return an HTTP code depending on agent status:
156-
# # 200: Agent is healthy
157-
# # 500: A component or unit is in a failed state
158-
# # 503: The agent coordinator is unresponsive
159-
# #
160-
# # You can pass a `failon` parameter to the /liveness endpoint to determine what component state will result in a 500.
161-
# # For example: `curl 'localhost:6792/liveness?failon=degraded'` will return 500 if a component is in a degraded state.
162-
# # The possible values for `failon` are:
163-
# # `degraded`: return an error if a component is in a degraded state or failed state, or if the agent coordinator is unresponsive.
164-
# # `failed`: return an error if a unit is in a failed state, or if the agent coordinator is unresponsive.
165-
# # `heartbeat`: return an error only if the agent coordinator is unresponsive.
166-
# # If no `failon` parameter is provided, the default behavior is `failon=heartbeat`
153+
# # exposes agent metrics using http, by default sockets and named pipes are used
167154
# http:
168155
# # enables http endpoint
169156
# enabled: false

elastic-agent.yml

-13
Original file line numberDiff line numberDiff line change
@@ -72,19 +72,6 @@ inputs:
7272
# # The name of the output to use for monitoring data.
7373
# use_output: monitoring
7474
# # exposes agent metrics using http, by default sockets and named pipes are used
75-
# #
76-
# # `http` Also exposes a /liveness endpoint that will return an HTTP code depending on agent status:
77-
# # 200: Agent is healthy
78-
# # 500: A component or unit is in a failed state
79-
# # 503: The agent coordinator is unresponsive
80-
# #
81-
# # You can pass a `failon` parameter to the /liveness endpoint to determine what component state will result in a 500.
82-
# # For example: `curl 'localhost:6792/liveness?failon=degraded'` will return 500 if a component is in a degraded state.
83-
# # The possible values for `failon` are:
84-
# # `degraded`: return an error if a component is in a degraded state or failed state, or if the agent coordinator is unresponsive.
85-
# # `failed`: return an error if a unit is in a failed state, or if the agent coordinator is unresponsive.
86-
# # `heartbeat`: return an error only if the agent coordinator is unresponsive.
87-
# # If no `failon` parameter is provided, the default behavior is `failon=heartbeat`
8875
# http:
8976
# # enables http endpoint
9077
# enabled: false

internal/pkg/agent/application/coordinator/coordinator.go

-24
Original file line numberDiff line numberDiff line change
@@ -279,11 +279,6 @@ type Coordinator struct {
279279

280280
// mx sync.RWMutex
281281
// protection protection.Config
282-
283-
// a sync channel that can be called by other components to check if the main coordinator
284-
// loop in runLoopIteration() is active and listening.
285-
// Should only be interacted with via CoordinatorActive() or runLoopIteration()
286-
heartbeatChan chan struct{}
287282
}
288283

289284
// The channels Coordinator reads to receive updates from the various managers.
@@ -377,7 +372,6 @@ func New(logger *logger.Logger, cfg *configuration.Configuration, logLevel logp.
377372
logLevelCh: make(chan logp.Level),
378373
overrideStateChan: make(chan *coordinatorOverrideState),
379374
upgradeDetailsChan: make(chan *details.Details),
380-
heartbeatChan: make(chan struct{}),
381375
}
382376
// Setup communication channels for any non-nil components. This pattern
383377
// lets us transparently accept nil managers / simulated events during
@@ -418,22 +412,6 @@ func (c *Coordinator) State() State {
418412
return c.stateBroadcaster.Get()
419413
}
420414

421-
// CoordinatorActive is a blocking method that waits for a channel response
422-
// from the coordinator loop. This can be used to as a basic health check,
423-
// as we'll timeout and return false if the coordinator run loop doesn't
424-
// respond to our channel.
425-
func (c *Coordinator) CoordinatorActive(timeout time.Duration) bool {
426-
ctx, cancel := context.WithTimeout(context.Background(), timeout)
427-
defer cancel()
428-
429-
select {
430-
case <-c.heartbeatChan:
431-
return true
432-
case <-ctx.Done():
433-
return false
434-
}
435-
}
436-
437415
func (c *Coordinator) RegisterMonitoringServer(s configReloader) {
438416
c.monitoringServerReloader = s
439417
}
@@ -999,8 +977,6 @@ func (c *Coordinator) runLoopIteration(ctx context.Context) {
999977
case upgradeDetails := <-c.upgradeDetailsChan:
1000978
c.setUpgradeDetails(upgradeDetails)
1001979

1002-
case c.heartbeatChan <- struct{}{}:
1003-
1004980
case componentState := <-c.managerChans.runtimeManagerUpdate:
1005981
// New component change reported by the runtime manager via
1006982
// Coordinator.watchRuntimeComponents(), merge it with the

internal/pkg/agent/application/coordinator/coordinator_unit_test.go

+1-6
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ import (
1414
"context"
1515
"errors"
1616
"fmt"
17-
"net"
1817
"testing"
1918
"time"
2019

@@ -571,7 +570,7 @@ func TestCoordinatorPolicyChangeUpdatesMonitorReloader(t *testing.T) {
571570
}
572571

573572
monitoringServer := &fakeMonitoringServer{}
574-
newServerFn := func(*monitoringCfg.MonitoringConfig) (reload.ServerController, error) {
573+
newServerFn := func() (reload.ServerController, error) {
575574
return monitoringServer, nil
576575
}
577576
monitoringReloader := reload.NewServerReloader(newServerFn, logger, monitoringCfg.DefaultConfig())
@@ -1055,7 +1054,3 @@ func (fs *fakeMonitoringServer) Reset() {
10551054
fs.stopTriggered = false
10561055
fs.startTriggered = false
10571056
}
1058-
1059-
func (fs *fakeMonitoringServer) Addr() net.Addr {
1060-
return nil
1061-
}

internal/pkg/agent/application/monitoring/handler.go

+1-11
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,6 @@ import (
88
"encoding/json"
99
"fmt"
1010
"net/http"
11-
"time"
12-
13-
"github.com/elastic/elastic-agent/internal/pkg/agent/application/coordinator"
1411
)
1512

1613
const errTypeUnexpected = "UNEXPECTED"
@@ -19,13 +16,6 @@ type apiError interface {
1916
Status() int
2017
}
2118

22-
// CoordinatorState is used by the HTTP handlers that take a coordinator object.
23-
// This interface exists to help make testing easier.
24-
type CoordinatorState interface {
25-
State() coordinator.State
26-
CoordinatorActive(timeout time.Duration) bool
27-
}
28-
2919
func createHandler(fn func(w http.ResponseWriter, r *http.Request) error) *apiHandler {
3020
return &apiHandler{
3121
innerFn: fn,
@@ -40,7 +30,7 @@ type apiHandler struct {
4030
func (h *apiHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
4131
err := h.innerFn(w, r)
4232
if err != nil {
43-
switch e := err.(type) { //nolint:errorlint // Will need refactor.
33+
switch e := err.(type) { // nolint:errorlint // Will need refactor.
4434
case apiError:
4535
w.WriteHeader(e.Status())
4636
default:

internal/pkg/agent/application/monitoring/liveness.go

-88
This file was deleted.

0 commit comments

Comments
 (0)