diff --git a/changelog/fragments/1730284359-assume-installed-agent-on-upgrade.yaml b/changelog/fragments/1730284359-assume-installed-agent-on-upgrade.yaml new file mode 100644 index 00000000000..25a6db9a27a --- /dev/null +++ b/changelog/fragments/1730284359-assume-installed-agent-on-upgrade.yaml @@ -0,0 +1,32 @@ +# Kind can be one of: +# - breaking-change: a change to previously-documented behavior +# - deprecation: functionality that is being removed in a later release +# - bug-fix: fixes a problem in a previous version +# - enhancement: extends functionality but does not break or fix existing behavior +# - feature: new functionality +# - known-issue: problems that we are aware of in a given version +# - security: impacts on the security of a product or a user’s deployment. +# - upgrade: important information for someone upgrading from a prior version +# - other: does not fit into any of the other categories +kind: bug-fix + +# Change summary; a 80ish characters long description of the change. +summary: This PR assumes installed agent on upgrade in order to initialize control path properly. + +# Long description; in case the summary is not enough to describe the change +# this field accommodate a description without length limits. +# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment. +#description: + +# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc. +component: elastic-agent + +# PR URL; optional; the PR number that added the changeset. +# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added. +# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number. +# Please provide it if you are adding a fragment for a different PR. +pr: https://github.com/owner/repo/5879 + +# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of). +# If not present is automatically filled by the tooling with the issue linked to the PR number. +issue: https://github.com/owner/repo/5872 diff --git a/internal/pkg/agent/application/paths/paths_test.go b/internal/pkg/agent/application/paths/paths_test.go index 62c49f65ef3..1da540530e8 100644 --- a/internal/pkg/agent/application/paths/paths_test.go +++ b/internal/pkg/agent/application/paths/paths_test.go @@ -5,10 +5,12 @@ package paths import ( + "fmt" "runtime" "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestEqual(t *testing.T) { @@ -92,3 +94,123 @@ func TestHasPrefixWindows(t *testing.T) { }) } } + +func TestResolveControlSocketWithInstalledState(t *testing.T) { + testCases := []struct { + os string + controlSocket string + topPath string + runningInstalled bool + expectedSocket string + }{ + { + "darwin", + ControlSocketFromPath("darwin", "/top"), + "/top", + true, + ControlSocketFromPath("darwin", "/top"), + }, + { + "darwin", + ControlSocketFromPath("darwin", "/top"), + "/top", + false, + ControlSocketFromPath("darwin", "/top"), + }, + { + "darwin", + "/control/socket", + "/top", + true, + "/control/socket", + }, + { + "darwin", + "/control/socket", + "/top", + false, + "/control/socket", + }, + + { + "linux", + ControlSocketFromPath("linux", "/top"), + "/top", + true, + ControlSocketFromPath("linux", "/top"), + }, + { + "linux", + ControlSocketFromPath("linux", "/top"), + "/top", + false, + ControlSocketFromPath("linux", "/top"), + }, + { + "linux", + "/control/socket", + "/top", + true, + "/control/socket", + }, + { + "linux", + "/control/socket", + "/top", + false, + "/control/socket", + }, + + { + "windows", + ControlSocketFromPath("windows", "/top"), + "/top", + true, + WindowsControlSocketInstalledPath, + }, + { + "windows", + ControlSocketFromPath("windows", "/top"), + "/top", + false, + ControlSocketFromPath("windows", "/top"), + }, + + { + "windows", + "/control/socket", + "/top", + true, + "/control/socket", + }, + { + "windows", + "/control/socket", + "/top", + false, + "/control/socket", + }, + } + + for i, tc := range testCases { + if runtime.GOOS != tc.os { + continue + } + t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) { + prevControlSocketPath := controlSocketPath + prevTopPath := topPath + defer func() { + // just to be sure + controlSocketPath = prevControlSocketPath + topPath = prevTopPath + }() + controlSocketPath = tc.controlSocket + topPath = tc.topPath + + ResolveControlSocketWithInstalledState(tc.runningInstalled) + + require.Equal(t, tc.expectedSocket, controlSocketPath) + }) + + } +} diff --git a/internal/pkg/agent/application/paths/paths_unix.go b/internal/pkg/agent/application/paths/paths_unix.go index a667d725ffa..0bcf66ca290 100644 --- a/internal/pkg/agent/application/paths/paths_unix.go +++ b/internal/pkg/agent/application/paths/paths_unix.go @@ -35,6 +35,9 @@ func initialControlSocketPath(topPath string) string { // ResolveControlSocket does nothing on non-Windows hosts. func ResolveControlSocket() {} +// ResolveControlSocketWithInstalledState updates the control socket path. +func ResolveControlSocketWithInstalledState(_ bool) {} + // HasPrefix tests if the path starts with the prefix. func HasPrefix(path string, prefix string) bool { if path == "" || prefix == "" { diff --git a/internal/pkg/agent/application/paths/paths_windows.go b/internal/pkg/agent/application/paths/paths_windows.go index c252f311dba..516e44751ec 100644 --- a/internal/pkg/agent/application/paths/paths_windows.go +++ b/internal/pkg/agent/application/paths/paths_windows.go @@ -65,8 +65,13 @@ func initialControlSocketPath(topPath string) string { // RunningInstalled will always be false, even when it is an installed version. Once // that is fixed from the upgrade process the control socket path needs to be updated. func ResolveControlSocket() { + ResolveControlSocketWithInstalledState(RunningInstalled()) +} + +// ResolveControlSocketWithInstalledState updates the control socket path. +func ResolveControlSocketWithInstalledState(runningInstalled bool) { currentPath := ControlSocket() - if currentPath == ControlSocketFromPath(runtime.GOOS, topPath) && RunningInstalled() { + if currentPath == ControlSocketFromPath(runtime.GOOS, topPath) && runningInstalled { // path is not correct being that it's installed // reset the control socket path to be the installed path SetControlSocket(WindowsControlSocketInstalledPath) diff --git a/internal/pkg/agent/application/upgrade/watcher.go b/internal/pkg/agent/application/upgrade/watcher.go index 38089000d47..4eca42a96cc 100644 --- a/internal/pkg/agent/application/upgrade/watcher.go +++ b/internal/pkg/agent/application/upgrade/watcher.go @@ -12,6 +12,7 @@ import ( "google.golang.org/grpc" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" "github.com/elastic/elastic-agent/pkg/control/v2/client" "github.com/elastic/elastic-agent/pkg/core/logger" ) @@ -50,6 +51,11 @@ type AgentWatcher struct { // NewAgentWatcher creates a new agent watcher. func NewAgentWatcher(ch chan error, log *logger.Logger, checkInterval time.Duration) *AgentWatcher { + // when starting watcher from pre 8.8 version of agent control socket is evaluated incorrectly and upgrade fails. + // resolving control socket updates it to a proper value before client is initiated + // upgrade is only available for installed agent so we can assume + paths.ResolveControlSocketWithInstalledState(true) + c := client.New() ec := &AgentWatcher{ notifyChan: ch, diff --git a/testing/upgradetest/watcher.go b/testing/upgradetest/watcher.go index a2563400fd3..c5684382b3f 100644 --- a/testing/upgradetest/watcher.go +++ b/testing/upgradetest/watcher.go @@ -15,9 +15,10 @@ import ( ) // FastWatcherCfg is configuration that makes the watcher run faster. +// we need to set grace period to 100s to be able to detect 5 failures 15 seconds apart and have a little buffer. const FastWatcherCfg = ` agent.upgrade.watcher: - grace_period: 1m + grace_period: 100s error_check.interval: 15s crash_check.interval: 15s `