From 7b81fea37ed79b26a641f9d2c28cfcd913cdb645 Mon Sep 17 00:00:00 2001 From: Craig MacKenzie Date: Fri, 31 May 2024 15:57:52 -0400 Subject: [PATCH 1/3] Wait on the watcher at startup instead of releasing. --- .../pkg/agent/application/upgrade/rollback.go | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/internal/pkg/agent/application/upgrade/rollback.go b/internal/pkg/agent/application/upgrade/rollback.go index 37b2f414717..3fa6895bc67 100644 --- a/internal/pkg/agent/application/upgrade/rollback.go +++ b/internal/pkg/agent/application/upgrade/rollback.go @@ -146,13 +146,6 @@ func InvokeWatcher(log *logger.Logger, agentExecutable string) (*exec.Cmd, error } cmd := invokeCmd(agentExecutable) - defer func() { - if cmd.Process != nil { - log.Infof("releasing watcher %v", cmd.Process.Pid) - _ = cmd.Process.Release() - } - }() - log.Infow("Starting upgrade watcher", "path", cmd.Path, "args", cmd.Args, "env", cmd.Env, "dir", cmd.Dir) if err := cmd.Start(); err != nil { return nil, fmt.Errorf("failed to start Upgrade Watcher: %w", err) @@ -160,9 +153,17 @@ func InvokeWatcher(log *logger.Logger, agentExecutable string) (*exec.Cmd, error upgradeWatcherPID := cmd.Process.Pid agentPID := os.Getpid() + + go func() { + if err := cmd.Wait(); err != nil { + log.Infow("Upgrade Watcher exited with error", "agent.upgrade.watcher.process.pid", "agent.process.pid", agentPID, upgradeWatcherPID, "error.message", err) + } + }() + log.Infow("Upgrade Watcher invoked", "agent.upgrade.watcher.process.pid", upgradeWatcherPID, "agent.process.pid", agentPID) return cmd, nil + } func restartAgent(ctx context.Context, log *logger.Logger, c client.Client) error { From 30c0422da3fcb3c6712e6f00d157736c3d8d7627 Mon Sep 17 00:00:00 2001 From: Craig MacKenzie Date: Fri, 31 May 2024 16:15:32 -0400 Subject: [PATCH 2/3] Add changelog. --- ...ting-a-zombie-process-on-each-restart.yaml | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 changelog/fragments/1717185708-Stop-creating-a-zombie-process-on-each-restart.yaml diff --git a/changelog/fragments/1717185708-Stop-creating-a-zombie-process-on-each-restart.yaml b/changelog/fragments/1717185708-Stop-creating-a-zombie-process-on-each-restart.yaml new file mode 100644 index 00000000000..95a24b44fcf --- /dev/null +++ b/changelog/fragments/1717185708-Stop-creating-a-zombie-process-on-each-restart.yaml @@ -0,0 +1,32 @@ +# Kind can be one of: +# - breaking-change: a change to previously-documented behavior +# - deprecation: functionality that is being removed in a later release +# - bug-fix: fixes a problem in a previous version +# - enhancement: extends functionality but does not break or fix existing behavior +# - feature: new functionality +# - known-issue: problems that we are aware of in a given version +# - security: impacts on the security of a product or a user’s deployment. +# - upgrade: important information for someone upgrading from a prior version +# - other: does not fit into any of the other categories +kind: feature + +# Change summary; a 80ish characters long description of the change. +summary: Stop creating a zombie process on each restart. + +# Long description; in case the summary is not enough to describe the change +# this field accommodate a description without length limits. +# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment. +#description: + +# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc. +component: "elastic-agent" + +# PR URL; optional; the PR number that added the changeset. +# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added. +# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number. +# Please provide it if you are adding a fragment for a different PR. +pr: https://github.com/elastic/elastic-agent/pull/4834 + +# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of). +# If not present is automatically filled by the tooling with the issue linked to the PR number. +#issue: https://github.com/owner/repo/1234 From 52e910d9244b443ae7c8bdf574d6bb92a5f5b430 Mon Sep 17 00:00:00 2001 From: Craig MacKenzie Date: Fri, 7 Jun 2024 15:57:59 -0400 Subject: [PATCH 3/3] Update changelog/fragments/1717185708-Stop-creating-a-zombie-process-on-each-restart.yaml Co-authored-by: Blake Rouse --- ...17185708-Stop-creating-a-zombie-process-on-each-restart.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog/fragments/1717185708-Stop-creating-a-zombie-process-on-each-restart.yaml b/changelog/fragments/1717185708-Stop-creating-a-zombie-process-on-each-restart.yaml index 95a24b44fcf..f0b79fbe097 100644 --- a/changelog/fragments/1717185708-Stop-creating-a-zombie-process-on-each-restart.yaml +++ b/changelog/fragments/1717185708-Stop-creating-a-zombie-process-on-each-restart.yaml @@ -8,7 +8,7 @@ # - security: impacts on the security of a product or a user’s deployment. # - upgrade: important information for someone upgrading from a prior version # - other: does not fit into any of the other categories -kind: feature +kind: bug-fix # Change summary; a 80ish characters long description of the change. summary: Stop creating a zombie process on each restart.