Skip to content

Commit

Permalink
Revert "fix install/enroll cmd to fail when agent restart fails (#3207)…
Browse files Browse the repository at this point in the history
…" (#3553)
  • Loading branch information
AndersonQ authored Oct 6, 2023
1 parent 39f324f commit 99b14c8
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 151 deletions.

This file was deleted.

2 changes: 1 addition & 1 deletion internal/pkg/agent/cmd/enroll.go
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ func enroll(streams *cli.IOStreams, cmd *cobra.Command) error {
// Error: failed to fix permissions: chown /Library/Elastic/Agent/data/elastic-agent-c13f91/elastic-agent.app: operation not permitted
// This is because we are fixing permissions twice, once during installation and again during the enrollment step.
// When we are enrolling as part of installation on MacOS, skip the second attempt to fix permissions.
fixPermissions := fromInstall
var fixPermissions bool = fromInstall
if runtime.GOOS == "darwin" {
fixPermissions = false
}
Expand Down
62 changes: 17 additions & 45 deletions internal/pkg/agent/cmd/enroll_cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ func newEnrollCmd(
)
}

// newEnrollCmdWithStore creates a new enrollment and accept a custom store.
// newEnrollCmdWithStore creates an new enrollment and accept a custom store.
func newEnrollCmdWithStore(
log *logger.Logger,
options *enrollCmdOption,
Expand All @@ -187,11 +187,10 @@ func newEnrollCmdWithStore(
}, nil
}

// Execute enrolls the agent into Fleet.
// Execute tries to enroll the agent into Fleet.
func (c *enrollCmd) Execute(ctx context.Context, streams *cli.IOStreams) error {
var err error
defer c.stopAgent() // ensure its stopped no matter what

span, ctx := apm.StartSpan(ctx, "enroll", "app.internal")
defer func() {
apm.CaptureError(ctx, err).Send()
Expand Down Expand Up @@ -236,7 +235,7 @@ func (c *enrollCmd) Execute(ctx context.Context, streams *cli.IOStreams) error {
// Ensure that the agent does not use a proxy configuration
// when connecting to the local fleet server.
// Note that when running fleet-server the enroll request will be sent to :8220,
// however when the agent is running afterward requests will be sent to :8221
// however when the agent is running afterwards requests will be sent to :8221
c.remoteConfig.Transport.Proxy.Disable = true
}

Expand All @@ -257,7 +256,7 @@ func (c *enrollCmd) Execute(ctx context.Context, streams *cli.IOStreams) error {

err = c.enrollWithBackoff(ctx, persistentConfig)
if err != nil {
return fmt.Errorf("fail to enroll: %w", err)
return errors.New(err, "fail to enroll")
}

if c.options.FixPermissions {
Expand All @@ -268,23 +267,17 @@ func (c *enrollCmd) Execute(ctx context.Context, streams *cli.IOStreams) error {
}

defer func() {
if err != nil {
fmt.Fprintf(streams.Err, "Something went wrong while enrolling the Elastic Agent: %v\n", err)
} else {
fmt.Fprintln(streams.Out, "Successfully enrolled the Elastic Agent.")
}
fmt.Fprintln(streams.Out, "Successfully enrolled the Elastic Agent.")
}()

if c.agentProc == nil {
if err = c.daemonReloadWithBackoff(ctx); err != nil {
c.log.Errorf("Elastic Agent might not be running; unable to trigger restart: %v", err)
return fmt.Errorf("could not reload agent daemon, unable to trigger restart: %w", err)
if err := c.daemonReload(ctx); err != nil {
c.log.Infow("Elastic Agent might not be running; unable to trigger restart", "error", err)
} else {
c.log.Info("Successfully triggered restart on running Elastic Agent.")
}

c.log.Info("Successfully triggered restart on running Elastic Agent.")
return nil
}

c.log.Info("Elastic Agent has been enrolled; start Elastic Agent")
return nil
}
Expand Down Expand Up @@ -450,32 +443,24 @@ func (c *enrollCmd) prepareFleetTLS() error {

func (c *enrollCmd) daemonReloadWithBackoff(ctx context.Context) error {
err := c.daemonReload(ctx)
if err != nil &&
(errors.Is(err, context.DeadlineExceeded) ||
errors.Is(err, context.Canceled)) {
return fmt.Errorf("could not reload deamon: %w", err)
}
if err == nil {
return nil
}

signal := make(chan struct{})
backExp := backoff.NewExpBackoff(signal, 10*time.Second, 1*time.Minute)

var i int
for ; i < 5; i++ {
for i := 5; i >= 0; i-- {
backExp.Wait()
c.log.Info("Retrying to restart...")
err = c.daemonReload(ctx)
if err == nil ||
errors.Is(err, context.DeadlineExceeded) ||
errors.Is(err, context.Canceled) {
if err == nil {
break
}
}

close(signal)
return fmt.Errorf("could not reload deamon after %d retries: %w", i+1, err)
return err
}

func (c *enrollCmd) daemonReload(ctx context.Context) error {
Expand All @@ -493,20 +478,8 @@ func (c *enrollCmd) enrollWithBackoff(ctx context.Context, persistentConfig map[

c.log.Infof("Starting enrollment to URL: %s", c.client.URI())
err := c.enroll(ctx, persistentConfig)
if err == nil {
return nil
}

const deadline = 10 * time.Minute
const frequency = 60 * time.Second

c.log.Infof("1st enrollment attempt failed, retrying for %s, every %s enrolling to URL: %s",
deadline,
frequency,
c.client.URI())
signal := make(chan struct{})
defer close(signal)
backExp := backoff.NewExpBackoff(signal, frequency, deadline)
backExp := backoff.NewExpBackoff(signal, 60*time.Second, 10*time.Minute)

for {
retry := false
Expand All @@ -525,6 +498,7 @@ func (c *enrollCmd) enrollWithBackoff(ctx context.Context, persistentConfig map[
err = c.enroll(ctx, persistentConfig)
}

close(signal)
return err
}

Expand Down Expand Up @@ -573,10 +547,8 @@ func (c *enrollCmd) enroll(ctx context.Context, persistentConfig map[string]inte
c.options.FleetServer.ElasticsearchInsecure,
)
if err != nil {
return fmt.Errorf(
"failed creating fleet-server bootstrap config: %w", err)
return err
}

// no longer need bootstrap at this point
serverConfig.Server.Bootstrap = false
fleetConfig.Server = serverConfig.Server
Expand All @@ -596,11 +568,11 @@ func (c *enrollCmd) enroll(ctx context.Context, persistentConfig map[string]inte

reader, err := yamlToReader(configToStore)
if err != nil {
return fmt.Errorf("yamlToReader failed: %w", err)
return err
}

if err := safelyStoreAgentInfo(c.configStore, reader); err != nil {
return fmt.Errorf("failed to store agent config: %w", err)
return err
}

// clear action store
Expand Down
71 changes: 21 additions & 50 deletions internal/pkg/agent/cmd/enroll_cmd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,8 @@ import (
"os"
"runtime"
"strconv"
"strings"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/elastic/elastic-agent/internal/pkg/agent/configuration"
Expand Down Expand Up @@ -162,24 +159,14 @@ func TestEnroll(t *testing.T) {
require.NoError(t, err)

streams, _, _, _ := cli.NewTestingIOStreams()
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
defer cancel()
err = cmd.Execute(ctx, streams)

if err != nil &&
// There is no agent running, therefore nothing to be restarted.
// However, this will cause the Enroll command to return an error
// which we'll ignore here.
!strings.Contains(err.Error(),
"could not reload agent daemon, unable to trigger restart") {
t.Fatalf("enrrol coms returned and unexpected error: %v", err)
}
err = cmd.Execute(context.Background(), streams)
require.NoError(t, err)

config, err := readConfig(store.Content)
require.NoError(t, err)

assert.Equal(t, "my-access-api-key", config.AccessAPIKey)
assert.Equal(t, host, config.Client.Host)
require.NoError(t, err)
require.Equal(t, "my-access-api-key", config.AccessAPIKey)
require.Equal(t, host, config.Client.Host)
},
))

Expand Down Expand Up @@ -229,24 +216,16 @@ func TestEnroll(t *testing.T) {
require.NoError(t, err)

streams, _, _, _ := cli.NewTestingIOStreams()
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
defer cancel()
err = cmd.Execute(ctx, streams)
if err != nil &&
// There is no agent running, therefore nothing to be restarted.
// However, this will cause the Enroll command to return an error
// which we'll ignore here.
!strings.Contains(err.Error(),
"could not reload agent daemon, unable to trigger restart") {
t.Fatalf("enrrol coms returned and unexpected error: %v", err)
}

assert.True(t, store.Called)
err = cmd.Execute(context.Background(), streams)
require.NoError(t, err)

require.True(t, store.Called)

config, err := readConfig(store.Content)

assert.NoError(t, err)
assert.Equal(t, "my-access-api-key", config.AccessAPIKey)
assert.Equal(t, host, config.Client.Host)
require.NoError(t, err)
require.Equal(t, "my-access-api-key", config.AccessAPIKey)
require.Equal(t, host, config.Client.Host)
},
))

Expand Down Expand Up @@ -296,24 +275,16 @@ func TestEnroll(t *testing.T) {
require.NoError(t, err)

streams, _, _, _ := cli.NewTestingIOStreams()
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
defer cancel()
err = cmd.Execute(ctx, streams)

if err != nil &&
// There is no agent running, therefore nothing to be restarted.
// However, this will cause the Enroll command to return an error
// which we'll ignore here.
!strings.Contains(err.Error(),
"could not reload agent daemon, unable to trigger restart") {
t.Fatalf("enrrol coms returned and unexpected error: %v", err)
}

assert.True(t, store.Called)
err = cmd.Execute(context.Background(), streams)
require.NoError(t, err)

require.True(t, store.Called)

config, err := readConfig(store.Content)

require.NoError(t, err)
assert.Equal(t, "my-access-api-key", config.AccessAPIKey)
assert.Equal(t, host, config.Client.Host)
require.Equal(t, "my-access-api-key", config.AccessAPIKey)
require.Equal(t, host, config.Client.Host)
},
))

Expand Down
4 changes: 1 addition & 3 deletions internal/pkg/agent/cmd/install.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ func installCmd(streams *cli.IOStreams, cmd *cobra.Command) error {
return fmt.Errorf("problem reading prompt response")
}
if url == "" {
fmt.Fprintln(streams.Out, "Enrollment cancelled because no URL was provided.")
fmt.Fprintf(streams.Out, "Enrollment cancelled because no URL was provided.\n")
return nil
}
}
Expand Down Expand Up @@ -224,8 +224,6 @@ func installCmd(streams *cli.IOStreams, cmd *cobra.Command) error {
}
}()
}

fmt.Fprintln(streams.Out, "Elastic Agent successfully installed, starting enrollment.")
}

if enroll {
Expand Down
32 changes: 12 additions & 20 deletions internal/pkg/agent/install/perms_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ package install

import (
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"
Expand All @@ -19,26 +18,19 @@ func fixPermissions(topPath string) error {
return recursiveRootPermissions(topPath)
}

func recursiveRootPermissions(root string) error {
return filepath.Walk(root, func(path string, info fs.FileInfo, err error) error {
if errors.Is(err, fs.ErrNotExist) {
func recursiveRootPermissions(path string) error {
return filepath.Walk(path, func(name string, info fs.FileInfo, err error) error {
if err == nil {
// all files should be owned by root:root
err = os.Chown(name, 0, 0)
if err != nil {
return err
}
// remove any world permissions from the file
err = os.Chmod(name, info.Mode().Perm()&0770)
} else if errors.Is(err, fs.ErrNotExist) {
return nil
}
if err != nil {
return fmt.Errorf("walk on %q failed: %w", path, err)
}

// all files should be owned by root:root
err = os.Chown(path, 0, 0)
if err != nil {
return fmt.Errorf("could not fix ownership of %q: %w", path, err)
}
// remove any world permissions from the file
err = os.Chmod(path, info.Mode().Perm()&0770)
if err != nil {
return fmt.Errorf("could not fix permissions of %q: %w", path, err)
}

return nil
return err
})
}

0 comments on commit 99b14c8

Please sign in to comment.