From cf04909f8989b0fcdb2cc6daf545869fa4ee532a Mon Sep 17 00:00:00 2001 From: James Otting Date: Mon, 27 Jan 2025 12:19:31 -0600 Subject: [PATCH 1/6] Squashed changes for rebase --- Makefile | 2 +- subsystems/viamagent/viamagent.go => agent.go | 63 +- cmd/provisioning-client/main.go | 4 +- cmd/viam-agent/main.go | 116 ++-- examples/agent-config.jsonc | 51 ++ go.mod | 9 +- go.sum | 35 +- manager.go | 344 +++++----- subsystem.go | 616 ------------------ .../{provisioning => networking}/connstate.go | 2 +- subsystems/networking/definitions.go | 285 ++++++++ .../generators.go | 7 +- .../{provisioning => networking}/grpc.go | 2 +- .../networking.go} | 136 ++-- .../networkmanager.go | 63 +- .../networkstate.go | 2 +- .../{provisioning => networking}/portal.go | 2 +- .../{provisioning => networking}/scanning.go | 4 +- .../{provisioning => networking}/setup.go | 4 +- .../templates/base.html | 0 .../templates/index.html | 0 subsystems/provisioning/definitions.go | 484 -------------- subsystems/registry/registry.go | 51 -- subsystems/subsystems.go | 7 +- subsystems/syscfg/logging.go | 32 +- subsystems/syscfg/syscfg.go | 73 +-- subsystems/syscfg/upgrades.go | 25 +- subsystems/viamserver/viamserver.go | 167 ++--- utils/config.go | 426 ++++++++++++ utils/config_old.go | 105 +++ utils/config_test.go | 71 ++ logger.go => utils/logger.go | 2 +- logger_test.go => utils/logger_test.go | 2 +- utils.go => utils/utils.go | 27 +- version_control.go | 251 +++++++ .../viam-agent.service => viam-agent.service | 0 36 files changed, 1636 insertions(+), 1834 deletions(-) rename subsystems/viamagent/viamagent.go => agent.go (72%) create mode 100644 examples/agent-config.jsonc delete mode 100644 subsystem.go rename subsystems/{provisioning => networking}/connstate.go (99%) create mode 100644 subsystems/networking/definitions.go rename subsystems/{provisioning => networking}/generators.go (94%) rename subsystems/{provisioning => networking}/grpc.go (99%) rename subsystems/{provisioning/provisioning.go => networking/networking.go} (70%) rename subsystems/{provisioning => networking}/networkmanager.go (91%) rename subsystems/{provisioning => networking}/networkstate.go (99%) rename subsystems/{provisioning => networking}/portal.go (99%) rename subsystems/{provisioning => networking}/scanning.go (99%) rename subsystems/{provisioning => networking}/setup.go (98%) rename subsystems/{provisioning => networking}/templates/base.html (100%) rename subsystems/{provisioning => networking}/templates/index.html (100%) delete mode 100644 subsystems/provisioning/definitions.go delete mode 100644 subsystems/registry/registry.go create mode 100644 utils/config.go create mode 100644 utils/config_old.go create mode 100644 utils/config_test.go rename logger.go => utils/logger.go (99%) rename logger_test.go => utils/logger_test.go (99%) rename utils.go => utils/utils.go (93%) create mode 100644 version_control.go rename subsystems/viamagent/viam-agent.service => viam-agent.service (100%) diff --git a/Makefile b/Makefile index de766b8..35edd59 100644 --- a/Makefile +++ b/Makefile @@ -33,7 +33,7 @@ arm64: amd64: make GOARCH=amd64 -bin/viam-agent-$(PATH_VERSION)-$(LINUX_ARCH): go.* *.go */*.go */*/*.go subsystems/viamagent/*.service Makefile +bin/viam-agent-$(PATH_VERSION)-$(LINUX_ARCH): go.* *.go */*.go */*/*.go *.service Makefile go build -o $@ -trimpath -tags $(TAGS) -ldflags $(LDFLAGS) ./cmd/viam-agent/main.go test "$(PATH_VERSION)" != "custom" && cp $@ bin/viam-agent-stable-$(LINUX_ARCH) || true diff --git a/subsystems/viamagent/viamagent.go b/agent.go similarity index 72% rename from subsystems/viamagent/viamagent.go rename to agent.go index 3f05e70..af23610 100644 --- a/subsystems/viamagent/viamagent.go +++ b/agent.go @@ -1,11 +1,10 @@ -// Package viamagent is the subsystem for the viam-agent itself. It contains code to install/update the systemd service as well. -package viamagent +// Package agent is the viam-agent itself. It contains code to install/update the systemd service as well. +package agent import ( "context" _ "embed" "errors" - "fmt" "io/fs" "os" "os/exec" @@ -13,19 +12,11 @@ import ( "strings" errw "github.com/pkg/errors" - "github.com/viamrobotics/agent" - "github.com/viamrobotics/agent/subsystems" - "github.com/viamrobotics/agent/subsystems/registry" - pb "go.viam.com/api/app/agent/v1" + "github.com/viamrobotics/agent/utils" "go.viam.com/rdk/logging" ) -func init() { - registry.Register(subsysName, NewSubsystem) -} - const ( - subsysName = "viam-agent" serviceFileDir = "/usr/local/lib/systemd/system" fallbackFileDir = "/etc/systemd/system" serviceFileName = "viam-agent.service" @@ -40,49 +31,23 @@ var ( serviceFileContents []byte ) -type agentSubsystem struct{} - -func NewSubsystem(ctx context.Context, logger logging.Logger, updateConf *pb.DeviceSubsystemConfig) (subsystems.Subsystem, error) { - return agent.NewAgentSubsystem(ctx, subsysName, logger, &agentSubsystem{}) -} - -// Start does nothing (we're already running as we ARE the agent.) -func (a *agentSubsystem) Start(ctx context.Context) error { - return nil -} - -// Stop does nothing (special logic elsewhere handles self-restart.) -func (a *agentSubsystem) Stop(ctx context.Context) error { - return nil -} - -// HealthCheck does nothing (we're obviously runnning as we are the agent.) -func (a *agentSubsystem) HealthCheck(ctx context.Context) error { - return nil -} - -// Update here handles the post-update installation of systemd files and the like. -// The actual update check and download is done in the wrapper (agent.AgentSubsystem). -func (a *agentSubsystem) Update(ctx context.Context, cfg *pb.DeviceSubsystemConfig, newVersion bool) (bool, error) { - if !newVersion { - return false, nil - } - - expectedPath := filepath.Join(agent.ViamDirs["bin"], subsysName) +// InstallNewVersion runs the newly downloaded binary's Install() for installation of systemd files and the like. +func InstallNewVersion(ctx context.Context, logger logging.Logger) (bool, error) { + expectedPath := filepath.Join(utils.ViamDirs["bin"], SubsystemName) // Run the newly updated version to install systemd and other service files. //nolint:gosec cmd := exec.Command(expectedPath, "--install") output, err := cmd.CombinedOutput() + logger.Info("running viam-agent --install for new version") + logger.Info(output) if err != nil { return false, errw.Wrapf(err, "running post install step %s", output) } - //nolint:forbidigo - fmt.Print(string(output)) - return true, nil } +// Install is directly executed from main() when --install is passed. func Install(logger logging.Logger) error { // Check for systemd cmd := exec.Command("systemctl", "--version") @@ -92,18 +57,18 @@ func Install(logger logging.Logger) error { } // Create/check required folder structure exists. - if err := agent.InitPaths(); err != nil { + if err := utils.InitPaths(); err != nil { return err } // If this is a brand new install, we want to symlink ourselves into place temporarily. - expectedPath := filepath.Join(agent.ViamDirs["bin"], subsysName) + expectedPath := filepath.Join(utils.ViamDirs["bin"], SubsystemName) curPath, err := os.Executable() if err != nil { return errw.Wrap(err, "getting path to self") } - isSelf, err := agent.CheckIfSame(curPath, expectedPath) + isSelf, err := utils.CheckIfSame(curPath, expectedPath) if err != nil { return errw.Wrap(err, "checking if installed viam-agent is myself") } @@ -129,7 +94,7 @@ func Install(logger logging.Logger) error { logger.Infof("writing systemd service file to %s", serviceFilePath) - newFile, err := agent.WriteFileIfNew(serviceFilePath, serviceFileContents) + newFile, err := utils.WriteFileIfNew(serviceFilePath, serviceFileContents) if err != nil { return errw.Wrapf(err, "writing systemd service file %s", serviceFilePath) } @@ -171,7 +136,7 @@ func Install(logger logging.Logger) error { logger.Info("Install complete. Please (re)start the service with 'systemctl restart viam-agent' when ready.") - return errors.Join(agent.SyncFS("/etc"), agent.SyncFS(serviceFilePath), agent.SyncFS(agent.ViamDirs["viam"])) + return errors.Join(utils.SyncFS("/etc"), utils.SyncFS(serviceFilePath), utils.SyncFS(utils.ViamDirs["viam"])) } func inSystemdPath(path string, logger logging.Logger) bool { diff --git a/cmd/provisioning-client/main.go b/cmd/provisioning-client/main.go index 7394376..4958900 100644 --- a/cmd/provisioning-client/main.go +++ b/cmd/provisioning-client/main.go @@ -7,7 +7,7 @@ import ( "strings" "github.com/jessevdk/go-flags" - "github.com/viamrobotics/agent/subsystems/provisioning" + "github.com/viamrobotics/agent/subsystems/networking" pb "go.viam.com/api/provisioning/v1" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" @@ -134,7 +134,7 @@ func SetDeviceCreds(ctx context.Context, client pb.ProvisioningServiceClient, id func SetWifiCreds(ctx context.Context, client pb.ProvisioningServiceClient, ssid, psk string) { req := &pb.SetNetworkCredentialsRequest{ - Type: provisioning.NetworkTypeWifi, + Type: networking.NetworkTypeWifi, Ssid: ssid, Psk: psk, } diff --git a/cmd/viam-agent/main.go b/cmd/viam-agent/main.go index 25cd3e3..8b9a0fc 100644 --- a/cmd/viam-agent/main.go +++ b/cmd/viam-agent/main.go @@ -19,12 +19,11 @@ import ( "github.com/nightlyone/lockfile" "github.com/pkg/errors" "github.com/viamrobotics/agent" - "github.com/viamrobotics/agent/subsystems/provisioning" + "github.com/viamrobotics/agent/subsystems/networking" _ "github.com/viamrobotics/agent/subsystems/syscfg" - "github.com/viamrobotics/agent/subsystems/viamagent" - "github.com/viamrobotics/agent/subsystems/viamserver" + "github.com/viamrobotics/agent/utils" "go.viam.com/rdk/logging" - "go.viam.com/utils" + goutils "go.viam.com/utils" ) var ( @@ -45,14 +44,14 @@ func main() { //nolint:lll var opts struct { - Config string `default:"/etc/viam.json" description:"Path to config file" long:"config" short:"c"` - ProvisioningConfig string `default:"/etc/viam-provisioning.json" description:"Path to provisioning (customization) config file" long:"provisioning" short:"p"` - Debug bool `description:"Enable debug logging (agent only)" env:"VIAM_AGENT_DEBUG" long:"debug" short:"d"` - Fast bool `description:"Enable fast start mode" env:"VIAM_AGENT_FAST_START" long:"fast" short:"f"` - Help bool `description:"Show this help message" long:"help" short:"h"` - Version bool `description:"Show version" long:"version" short:"v"` - Install bool `description:"Install systemd service" long:"install"` - DevMode bool `description:"Allow non-root and non-service" env:"VIAM_AGENT_DEVMODE" long:"dev-mode"` + Config string `default:"/etc/viam.json" description:"Path to connectcion config file" long:"config" short:"c"` + DefaultsConfig string `default:"/etc/viam-defaults.json" description:"Path to device/manufacturer defaults file" long:"defaults"` + Debug bool `description:"Enable debug logging (agent only)" env:"VIAM_AGENT_DEBUG" long:"debug" short:"d"` + UpdateFirst bool `description:"Update versions before starting" env:"VIAM_AGENT_WAIT_FOR_UPDATE" long:"wait" short:"w"` + Help bool `description:"Show this help message" long:"help" short:"h"` + Version bool `description:"Show version" long:"version" short:"v"` + Install bool `description:"Install systemd service" long:"install"` + DevMode bool `description:"Allow non-root and non-service" env:"VIAM_AGENT_DEVMODE" long:"dev-mode"` } parser := flags.NewParser(&opts, flags.IgnoreUnknown) @@ -71,14 +70,19 @@ func main() { if opts.Version { //nolint:forbidigo - fmt.Printf("Version: %s\nGit Revision: %s\n", agent.GetVersion(), agent.GetRevision()) + fmt.Printf("Version: %s\nGit Revision: %s\n", utils.GetVersion(), utils.GetRevision()) return } if opts.Debug { + utils.CLIDebug = true globalLogger.SetLevel(logging.DEBUG) } + if opts.UpdateFirst { + utils.CLIWaitForUpdateCheck = true + } + // need to be root to go any further than this curUser, err := user.Current() exitIfError(err) @@ -89,24 +93,24 @@ func main() { } if opts.Install { - exitIfError(viamagent.Install(globalLogger)) + exitIfError(agent.Install(globalLogger)) return } if !opts.DevMode { // confirm that we're running from a proper install - if !strings.HasPrefix(os.Args[0], agent.ViamDirs["viam"]) { + if !strings.HasPrefix(os.Args[0], utils.ViamDirs["viam"]) { //nolint:forbidigo fmt.Printf("viam-agent is intended to be run as a system service and installed in %s.\n"+ "Please install with '%s --install' and then start the service with 'systemctl start viam-agent'\n"+ "Note you may need to preface the above commands with 'sudo' if you are not currently root.\n", - agent.ViamDirs["viam"], os.Args[0]) + utils.ViamDirs["viam"], os.Args[0]) return } } // set up folder structure - exitIfError(agent.InitPaths()) + exitIfError(utils.InitPaths()) // use a lockfile to prevent running two agents on the same machine pidFile, err := getLock() @@ -117,34 +121,39 @@ func main() { } }() - // pass the provisioning path arg to the subsystem - absProvConfigPath, err := filepath.Abs(opts.ProvisioningConfig) + utils.DefaultsFilePath, err = filepath.Abs(opts.DefaultsConfig) exitIfError(err) - provisioning.ProvisioningConfigFilePath = absProvConfigPath - globalLogger.Infof("provisioning config file path: %s", absProvConfigPath) + globalLogger.Infof("manufacturer defaults file path: %s", utils.DefaultsFilePath) - // tie the manager config to the viam-server config - absConfigPath, err := filepath.Abs(opts.Config) + utils.AppConfigFilePath, err = filepath.Abs(opts.Config) exitIfError(err) - viamserver.ConfigFilePath = absConfigPath - provisioning.AppConfigFilePath = absConfigPath - globalLogger.Infof("config file path: %s", absConfigPath) + globalLogger.Infof("connection config file path: %s", utils.AppConfigFilePath) - // main manager structure - manager, err := agent.NewManager(ctx, globalLogger) + cfg, err := utils.LoadConfigFromCache() exitIfError(err) - err = manager.LoadConfig(absConfigPath) + cfg = utils.ApplyCLIArgs(cfg) + + // main manager structure + manager := agent.NewManager(ctx, globalLogger, cfg) + + err = manager.LoadAppConfig() //nolint:nestif if err != nil { + if cfg.AdvancedSettings.DisableNetworkConfiguration { + globalLogger.Errorf("Cannot read %s and network configuration is diabled. Please correct and restart viam-agent.", utils.AppConfigFilePath) + manager.CloseAll() + return + } + // If the local /etc/viam.json config is corrupted, invalid, or missing (due to a new install), we can get stuck here. // Rename the file (if it exists) and wait to provision a new one. if !errors.Is(err, fs.ErrNotExist) { - globalLogger.Error(errors.Wrapf(err, "reading %s", absConfigPath)) - globalLogger.Warn("renaming %s to %s.old", absConfigPath, absConfigPath) - if err := os.Rename(absConfigPath, absConfigPath+".old"); err != nil { + globalLogger.Error(errors.Wrapf(err, "reading %s", utils.AppConfigFilePath)) + globalLogger.Warn("renaming %s to %s.old", utils.AppConfigFilePath, utils.AppConfigFilePath) + if err := os.Rename(utils.AppConfigFilePath, utils.AppConfigFilePath+".old"); err != nil { // if we can't rename the file, we're up a creek, and it's fatal - globalLogger.Error(errors.Wrapf(err, "removing invalid config file %s", absConfigPath)) + globalLogger.Error(errors.Wrapf(err, "removing invalid config file %s", utils.AppConfigFilePath)) globalLogger.Error("unable to continue with provisioning, exiting") manager.CloseAll() return @@ -153,30 +162,27 @@ func main() { // We manually start the provisioning service to allow the user to update it and wait. // The user may be updating it soon, so better to loop quietly than to exit and let systemd keep restarting infinitely. - globalLogger.Infof("main config file %s missing or corrupt, entering provisioning mode", absConfigPath) - - if err := manager.StartSubsystem(ctx, provisioning.SubsysName); err != nil { - if errors.Is(err, agent.ErrSubsystemDisabled) { - globalLogger.Warn("provisioning subsystem disabled, please manually update /etc/viam.json and connect to internet") - } else { - globalLogger.Error(errors.Wrapf(err, - "could not start provisioning subsystem, please manually update /etc/viam.json and connect to internet")) - manager.CloseAll() - return - } + globalLogger.Infof("main config file %s missing or corrupt, entering provisioning mode", utils.AppConfigFilePath) + + if err := manager.StartSubsystem(ctx, networking.SubsysName); err != nil { + globalLogger.Error(errors.Wrapf(err, "could not start provisioning subsystem, please manually update /etc/viam.json and connect to internet")) + manager.CloseAll() + return } for { globalLogger.Warn("waiting for user provisioning") - if !utils.SelectContextOrWait(ctx, time.Second*10) { + if !goutils.SelectContextOrWait(ctx, time.Second*10) { manager.CloseAll() return } - if err := manager.LoadConfig(absConfigPath); err == nil { + if err := manager.LoadAppConfig(); err == nil { break } } } + + // valid viam.json from this point forward netAppender, err := manager.CreateNetAppender() if err != nil { globalLogger.Errorf("error creating NetAppender: %s", err) @@ -185,19 +191,9 @@ func main() { } // wait until now when we (potentially) have a network logger to record this - globalLogger.Infof("Viam Agent Version: %s Git Revision: %s", agent.GetVersion(), agent.GetRevision()) - - // if FastStart is set, skip updates and start viam-server immediately, then proceed as normal - var fastSuccess bool - if opts.Fast || viamserver.FastStart.Load() { - if err := manager.StartSubsystem(ctx, viamserver.SubsysName); err != nil { - globalLogger.Error(err) - } else { - fastSuccess = true - } - } + globalLogger.Infof("Viam Agent Version: %s Git Revision: %s", utils.GetVersion(), utils.GetRevision()) - if !fastSuccess { + if cfg.AdvancedSettings.WaitForUpdateCheck { // wait to be online timeoutCtx, cancel := context.WithTimeout(ctx, time.Minute) defer cancel() @@ -214,7 +210,7 @@ func main() { globalLogger.Error(errors.Wrap(err, "running 'systemctl is-active network-online.target'")) break } - if !utils.SelectContextOrWait(timeoutCtx, time.Second) { + if !goutils.SelectContextOrWait(timeoutCtx, time.Second) { break } } @@ -291,7 +287,7 @@ func exitIfError(err error) { } func getLock() (lockfile.Lockfile, error) { - pidFile, err := lockfile.New(filepath.Join(agent.ViamDirs["tmp"], "viam-agent.pid")) + pidFile, err := lockfile.New(filepath.Join(utils.ViamDirs["tmp"], "viam-agent.pid")) if err != nil { return "", errors.Wrap(err, "init lockfile") } diff --git a/examples/agent-config.jsonc b/examples/agent-config.jsonc new file mode 100644 index 0000000..18935a9 --- /dev/null +++ b/examples/agent-config.jsonc @@ -0,0 +1,51 @@ +{ +"agent": { + "version_control": { + // There's a "magic" parser here, so agent/viam-server can be one of three formats, version number, URL, or plain-word (release channel) + // Ex: `agent: stable` or `agent: "file:///home/test/myLocalBuild"` or `agent: "0.1.3"` + "agent": "stable", + "viam-server": "0.52.1" + }, + "advanced_settings": { + "debug": false, + "wait_for_update_check": false, + "viam_server_start_timeout_minutes": 10, + "disable_viam_server": false, + "disable_network_configuration": false, + "disable_system_configuration": false + }, + "network_configuration": { + "manufacturer": "viam", + "model": "custom", + "fragment_id": "", + "hotspot_interface": "", // determined dynamically as first discovered wifi interface if unset + "hotspot_prefix": "viam-setup", + "hotspot_password": "viamsetup", + "disable_captive_portal_redirect": false, + "turn_on_hotspot_if_wifi_has_no_internet": false, + "wifi_power_save": null, // boolean: null leaves the setting to system default, true or false enforces it + "offline_before_starting_hotspot_minutes": 2, + "user_idle_minutes": 5, + "retry_connection_timeout_minutes": 10, + "device_reboot_after_offline_minutes": 0 // does nothing when set to zero + }, + "additional_networks": { + "myNetwork1": { + "type": "", + "interface": "", + "ssid": "", + "psk": "", + "priority": 0, + "ipv4_address": "", + "ipv4_gateway": "", + "ipv4_dns": [], + "ipv4_route_metric": 0 + } + }, + "system_configuration": { + "logging_journald_system_max_use_megabytes": 512, // can be -1 to disable + "logging_journald_runtime_max_use_megabytes": 512, // can be -1 to disable + "os_auto_upgrade_type": "security" // can be "" to disable + } +} +} diff --git a/go.mod b/go.mod index 35a030d..b3bee8b 100644 --- a/go.mod +++ b/go.mod @@ -10,6 +10,7 @@ require ( github.com/nightlyone/lockfile v1.0.0 github.com/pkg/errors v0.9.1 github.com/sergeymakinen/go-systemdconf/v2 v2.0.2 + github.com/tidwall/jsonc v0.3.2 github.com/ulikunitz/xz v0.5.12 go.uber.org/zap v1.27.0 go.viam.com/api v0.1.357 @@ -23,7 +24,7 @@ require ( require ( github.com/cenkalti/backoff v2.2.1+incompatible // indirect - github.com/cenkalti/backoff/v4 v4.2.1 // indirect + github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/decred/dcrd/dcrec/secp256k1/v4 v4.2.0 // indirect github.com/desertbit/timer v0.0.0-20180107155436-c41aec40b27f // indirect @@ -39,9 +40,9 @@ require ( github.com/google/go-cmp v0.6.0 // indirect github.com/gorilla/securecookie v1.1.1 // indirect github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 // indirect - github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 // indirect github.com/improbable-eng/grpc-web v0.15.0 // indirect - github.com/klauspost/compress v1.17.2 // indirect + github.com/klauspost/compress v1.17.7 // indirect github.com/lestrrat-go/backoff/v2 v2.0.8 // indirect github.com/lestrrat-go/blackmagic v1.0.2 // indirect github.com/lestrrat-go/httpcc v1.0.1 // indirect @@ -93,3 +94,5 @@ require ( gopkg.in/yaml.v3 v3.0.1 // indirect nhooyr.io/websocket v1.8.9 // indirect ) + +replace go.viam.com/api => ../api/ diff --git a/go.sum b/go.sum index 59c6811..f35156c 100644 --- a/go.sum +++ b/go.sum @@ -13,7 +13,7 @@ cloud.google.com/go/auth v0.9.3/go.mod h1:7z6VY+7h3KUdRov5F1i8NDP5ZzWKYmEPO842Bg cloud.google.com/go/auth/oauth2adapt v0.2.4 h1:0GWE/FUsXhf6C+jAkWgYm7X9tK8cuEIfy19DBn6B6bY= cloud.google.com/go/auth/oauth2adapt v0.2.4/go.mod h1:jC/jOpwFP6JBxhB3P5Rr0a9HLMC/Pe3eaL4NmdvqPtc= cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= -cloud.google.com/go/compute v1.21.0 h1:JNBsyXVoOoNJtTQcnEY5uYpZIbeCTYIeDe0Xh1bySMk= +cloud.google.com/go/compute v1.19.1 h1:am86mquDUgjGNWxiGn+5PGLbmgiWXlE/yNWpIpNvuXY= cloud.google.com/go/compute/metadata v0.5.0 h1:Zr0eK8JbFv6+Wi4ilXAR8FJ3wyNdpxHKJNPos6LTZOY= cloud.google.com/go/compute/metadata v0.5.0/go.mod h1:aHnloV2TPI38yx4s9+wAZhHykWvVCfu7hQbF+9CWoiY= cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= @@ -74,8 +74,8 @@ github.com/casbin/casbin/v2 v2.1.2/go.mod h1:YcPU1XXisHhLzuxH9coDNf2FbKpjGlbCg3n github.com/cenkalti/backoff v2.2.1+incompatible h1:tNowT99t7UNflLxfYYSlKYsBpXdEet03Pg2g16Swow4= github.com/cenkalti/backoff v2.2.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM= github.com/cenkalti/backoff/v4 v4.1.1/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw= -github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM= -github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= +github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= +github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= @@ -196,8 +196,6 @@ github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69 github.com/golang-jwt/jwt/v4 v4.5.1 h1:JdqV9zKUdtaa9gdPlywC3aeoEsR681PlKC+4F5gQgeo= github.com/golang-jwt/jwt/v4 v4.5.1/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= -github.com/golang/glog v1.2.2 h1:1+mZ9upx1Dh6FmUTFR1naJ77miKiXgALjWOZ3NVFPmY= -github.com/golang/glog v1.2.2/go.mod h1:6AhwSGph0fcJtXVM/PEHPqZlFeoLxhs7/t5UDAwmO+w= github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= @@ -296,8 +294,8 @@ github.com/grpc-ecosystem/go-grpc-middleware v1.4.0/go.mod h1:g5qyo/la0ALbONm6Vb github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.0 h1:RtRsiaGvWxcwd8y3BiRZxsylPT8hLWZ5SPcfI+3IDNk= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.0/go.mod h1:TzP6duP4Py2pHLVPPQp42aoYI92+PCrVotyR5e8Vqlk= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 h1:bkypFPDjIYGfCYD5mRBvpqxfYX1YCS1PXdKYWi8FsN0= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0/go.mod h1:P+Lt/0by1T8bfcF3z737NnSbmxQAppXMRziHUxPOC8k= github.com/hashicorp/consul/api v1.1.0/go.mod h1:VmuI/Lkw1nC05EYQWNKwWGbkg+FbDBtguAZLlVdkD9Q= github.com/hashicorp/consul/api v1.3.0/go.mod h1:MmDNSzIMUjNpY/mQ398R4bk2FnqQLoPndWW5VkKPlCE= github.com/hashicorp/consul/sdk v0.1.1/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8= @@ -354,8 +352,8 @@ github.com/klauspost/compress v1.10.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYs github.com/klauspost/compress v1.10.7/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.11.0/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.11.7/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= -github.com/klauspost/compress v1.17.2 h1:RlWWUY/Dr4fL8qk9YG7DTZ7PDgME2V4csBXA8L/ixi4= -github.com/klauspost/compress v1.17.2/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= +github.com/klauspost/compress v1.17.7 h1:ehO88t2UGzQK66LMdE8tibEd1ErmzZjNEqWkjLAKQQg= +github.com/klauspost/compress v1.17.7/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg= @@ -570,8 +568,9 @@ github.com/quasilyte/regex/syntax v0.0.0-20200407221936-30656e2c4a95/go.mod h1:r github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= -github.com/rogpeppe/go-internal v1.6.2 h1:aIihoIOHCiLZHxyoNQ+ABL4NKhFTgKLBdMLyEAh98m0= github.com/rogpeppe/go-internal v1.6.2/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= +github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= +github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= github.com/rs/cors v1.7.0/go.mod h1:gFx+x8UowdsKA9AchylcLynDq+nNFfI8FkUZdN/jGCU= github.com/rs/cors v1.11.1 h1:eU3gRzXLRK57F5rKMGMZURNdIG4EoAmX8k94r9wXWHA= github.com/rs/cors v1.11.1/go.mod h1:XyqrcTp5zjWr1wsJ8PIRZssZ8b/WMcMf71DJnit4EMU= @@ -642,6 +641,8 @@ github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8 github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw= github.com/tdakkota/asciicheck v0.0.0-20200416200610-e657995f937b/go.mod h1:yHp0ai0Z9gUljN3o0xMhYJnH/IcvkdTBOX2fmJ93JEM= github.com/tetafro/godot v1.4.4/go.mod h1:FVDd4JuKliW3UgjswZfJfHq4vAx0bD/Jd5brJjGeaz4= +github.com/tidwall/jsonc v0.3.2 h1:ZTKrmejRlAJYdn0kcaFqRAKlxxFIC21pYq8vLa4p2Wc= +github.com/tidwall/jsonc v0.3.2/go.mod h1:dw+3CIxqHi+t8eFSpzzMlcVYxKp08UP5CD8/uSFCyJE= github.com/timakin/bodyclose v0.0.0-20200424151742-cb6215831a94/go.mod h1:Qimiffbc6q9tBWlVV6x0P9sat/ao1xEkREYPPj9hphk= github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= @@ -697,12 +698,12 @@ go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.5 go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0/go.mod h1:B9yO6b04uB80CzjedvewuqDhxJxi11s7/GtiGa8bAjI= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 h1:TT4fX+nBOA/+LUkobKGW1ydGcn+G3vRw9+g5HwCphpk= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0/go.mod h1:L7UH0GbB0p47T4Rri3uHjbpCFYrVrwc1I25QhNPiGK8= -go.opentelemetry.io/otel v1.29.0 h1:PdomN/Al4q/lN6iBJEN3AwPvUiHPMlt93c8bqTG5Llw= -go.opentelemetry.io/otel v1.29.0/go.mod h1:N/WtXPs1CNCUEx+Agz5uouwCba+i+bJGFicT8SR4NP8= -go.opentelemetry.io/otel/metric v1.29.0 h1:vPf/HFWTNkPu1aYeIsc98l4ktOQaL6LeSoeV2g+8YLc= -go.opentelemetry.io/otel/metric v1.29.0/go.mod h1:auu/QWieFVWx+DmQOUMgj0F8LHWdgalxXqvp7BII/W8= -go.opentelemetry.io/otel/trace v1.29.0 h1:J/8ZNK4XgR7a21DZUAsbF8pZ5Jcw1VhACmnYt39JTi4= -go.opentelemetry.io/otel/trace v1.29.0/go.mod h1:eHl3w0sp3paPkYstJOmAimxhiFXPg+MMTlEh3nsQgWQ= +go.opentelemetry.io/otel v1.31.0 h1:NsJcKPIW0D0H3NgzPDHmo0WW6SptzPdqg/L1zsIm2hY= +go.opentelemetry.io/otel v1.31.0/go.mod h1:O0C14Yl9FgkjqcCZAsE053C13OaddMYr/hz6clDkEJE= +go.opentelemetry.io/otel/metric v1.31.0 h1:FSErL0ATQAmYHUIzSezZibnyVlft1ybhy4ozRPcF2fE= +go.opentelemetry.io/otel/metric v1.31.0/go.mod h1:C3dEloVbLuYoX41KpmAhOqNriGbA+qqH6PQ5E5mUfnY= +go.opentelemetry.io/otel/trace v1.31.0 h1:ffjsj1aRouKewfr85U2aGagJ46+MvodynlQ1HYdmJys= +go.opentelemetry.io/otel/trace v1.31.0/go.mod h1:TXZkRk7SM2ZQLtR6eoAWQFIHPvzQ06FJAsO1tJg480A= go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.5.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= @@ -725,8 +726,6 @@ go.uber.org/zap v1.18.1/go.mod h1:xg/QME4nWcxGxrpdeYfq7UvYrLh66cuVKdrbD1XF/NI= go.uber.org/zap v1.23.0/go.mod h1:D+nX8jyLsMHMYrln8A0rJjFt/T/9/bGgIhAqxv5URuY= go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= -go.viam.com/api v0.1.357 h1:L9LBYbaH0imv/B+mVxqtSgClIl4flzjLV6LclfnD9Nc= -go.viam.com/api v0.1.357/go.mod h1:5lpVRxMsKFCaahqsnJfPGwJ9baoQ6PIKQu3lxvy6Wtw= go.viam.com/rdk v0.51.0 h1:1SFf4wVY5wNbXRDUC0GsBVmXikD74Tu5PHLJXngN8oA= go.viam.com/rdk v0.51.0/go.mod h1:MobIDjs3EFbwwmQE+b0oVdToUTguNMFIWdttzEGoDE4= go.viam.com/test v1.2.3 h1:tT2QqthC2BL2tiloUC2T1AIwuLILyMRx8mmxunN+cT4= diff --git a/manager.go b/manager.go index 26a0ab9..cfa07df 100644 --- a/manager.go +++ b/manager.go @@ -1,13 +1,12 @@ +// Package agent contains the public interfaces, functions, consts, and vars for the viam-server agent. package agent import ( "context" "encoding/json" "errors" - "io/fs" "net/url" "os" - "path/filepath" "regexp" "runtime" "runtime/debug" @@ -16,8 +15,12 @@ import ( "time" errw "github.com/pkg/errors" + "github.com/tidwall/jsonc" "github.com/viamrobotics/agent/subsystems" - "github.com/viamrobotics/agent/subsystems/registry" + "github.com/viamrobotics/agent/subsystems/networking" + "github.com/viamrobotics/agent/subsystems/syscfg" + "github.com/viamrobotics/agent/subsystems/viamserver" + "github.com/viamrobotics/agent/utils" pb "go.viam.com/api/app/agent/v1" "go.viam.com/rdk/logging" "go.viam.com/utils/rpc" @@ -45,33 +48,46 @@ type Manager struct { logger logging.Logger netAppender *logging.NetAppender - subsystemsMu sync.Mutex - loadedSubsystems map[string]subsystems.Subsystem + cfgMu sync.RWMutex + cfg utils.AgentConfig + + viamServerNeedsRestart bool + + viamServer subsystems.Subsystem + networking subsystems.Subsystem + sysConfig subsystems.Subsystem + + cache *VersionCache } // NewManager returns a new Manager. -func NewManager(ctx context.Context, logger logging.Logger) (*Manager, error) { +func NewManager(ctx context.Context, logger logging.Logger, cfg utils.AgentConfig) *Manager { manager := &Manager{ - logger: logger, - loadedSubsystems: make(map[string]subsystems.Subsystem), + logger: logger, + cfg: cfg, + + viamServer: viamserver.NewSubsystem(ctx, logger, cfg), + networking: networking.NewSubsystem(ctx, logger, cfg), + sysConfig: syscfg.NewSubsystem(ctx, logger, cfg), + cache: NewVersionCache(logger), } - return manager, manager.LoadSubsystems(ctx) + return manager } -func (m *Manager) LoadConfig(cfgPath string) error { +func (m *Manager) LoadAppConfig() error { m.connMu.Lock() defer m.connMu.Unlock() - m.logger.Debugf("loading config: %s", cfgPath) - //nolint:gosec - b, err := os.ReadFile(cfgPath) + m.logger.Debugf("loading config: %s", utils.AppConfigFilePath) + + b, err := os.ReadFile(utils.AppConfigFilePath) if err != nil { return errw.Wrap(err, "reading config file") } cfg := make(map[string]map[string]string) - err = json.Unmarshal(b, &cfg) + err = json.Unmarshal(jsonc.ToJSON(b), &cfg) if err != nil { return errw.Wrap(err, "parsing config file") } @@ -113,15 +129,17 @@ func (m *Manager) CreateNetAppender() (*logging.NetAppender, error) { // StartSubsystem may be called early in startup when no cloud connectivity is configured. func (m *Manager) StartSubsystem(ctx context.Context, name string) error { defer m.handlePanic() - m.subsystemsMu.Lock() - defer m.subsystemsMu.Unlock() - subsys, ok := m.loadedSubsystems[name] - if !ok { - return errw.Errorf("unable to find subsystem %s", name) + switch name { + case viamserver.SubsysName: + return m.viamServer.Start(ctx) + case networking.SubsysName: + return m.networking.Start(ctx) + case syscfg.SubsysName: + return m.sysConfig.Start(ctx) + default: + return errw.Errorf("unknown subsystem: %s", name) } - - return subsys.Start(ctx) } // SelfUpdate is called early in startup to update the viam-agent subsystem before any other work is started. @@ -129,51 +147,100 @@ func (m *Manager) SelfUpdate(ctx context.Context) (bool, error) { if ctx.Err() != nil { return false, ctx.Err() } - m.subsystemsMu.Lock() - subsys, ok := m.loadedSubsystems[SubsystemName] - m.subsystemsMu.Unlock() - if !ok { - m.logger.Warnf("cannot load %s subsystem", SubsystemName) + + _, err := m.GetConfig(ctx) + if err != nil { + return false, err } - cfgMap, _, err := m.GetConfig(ctx) + + needRestart, err := m.cache.UpdateBinary(ctx, SubsystemName) if err != nil { return false, err } - cfg, ok := cfgMap[SubsystemName] - if !ok { - return false, errw.Errorf("no %s section found in config", SubsystemName) + + if needRestart { + return InstallNewVersion(ctx, m.logger) } - return subsys.Update(ctx, cfg) + return false, err } // SubsystemUpdates checks for updates to configured subsystems and restarts them as needed. -func (m *Manager) SubsystemUpdates(ctx context.Context, cfg map[string]*pb.DeviceSubsystemConfig) { +func (m *Manager) SubsystemUpdates(ctx context.Context) { defer m.handlePanic() if ctx.Err() != nil { return } - m.subsystemsMu.Lock() - defer m.subsystemsMu.Unlock() - // check updates and (re)start - for name, sub := range m.loadedSubsystems { - if ctx.Err() != nil { - return + m.cfgMu.Lock() + defer m.cfgMu.Unlock() + + // Agent + needRestart, err := m.cache.UpdateBinary(ctx, SubsystemName) + if err != nil { + m.logger.Error(err) + } + if needRestart { + m.logger.Info("viam-agent update complete, please restart using 'systemctl restart viam-agent'") + } + + // Viam Server + if m.cfg.AdvancedSettings.DisableViamServer { + if err := m.viamServer.Stop(ctx); err != nil { + m.logger.Error(err) } - cancelCtx, cancel := context.WithTimeout(ctx, time.Minute*5) - defer cancel() - restart, err := sub.Update(cancelCtx, cfg[name]) + } else { + needRestart, err := m.cache.UpdateBinary(ctx, viamserver.SubsysName) if err != nil { m.logger.Error(err) - continue } - if restart { - if err := sub.Stop(ctx); err != nil { + m.viamServer.Update(ctx, m.cfg) + + if needRestart || m.viamServerNeedsRestart { + if m.viamServer.(viamserver.RestartCheck).SafeToRestart(ctx) { + if err := m.viamServer.Stop(ctx); err != nil { + m.logger.Error(err) + } else { + m.viamServerNeedsRestart = false + } + } else { + m.viamServerNeedsRestart = true + } + } + if err := m.viamServer.Start(ctx); err != nil { + m.logger.Error(err) + } + } + + // System Configuration + if m.cfg.AdvancedSettings.DisableSystemConfiguration { + if err := m.sysConfig.Stop(ctx); err != nil { + m.logger.Error(err) + } + } else { + needRestart = m.sysConfig.Update(ctx, m.cfg) + if needRestart { + if err := m.sysConfig.Stop(ctx); err != nil { + m.logger.Error(err) + } + } + if err := m.sysConfig.Start(ctx); err != nil { + m.logger.Error(err) + } + } + + // Network + if m.cfg.AdvancedSettings.DisableNetworkConfiguration { + if err := m.networking.Stop(ctx); err != nil { + m.logger.Error(err) + } + } else { + needRestart = m.networking.Update(ctx, m.cfg) + if needRestart { + if err := m.networking.Stop(ctx); err != nil { m.logger.Error(err) - continue } } - if err := sub.Start(ctx); err != nil && !errors.Is(err, ErrSubsystemDisabled) { + if err := m.networking.Start(ctx); err != nil { m.logger.Error(err) } } @@ -183,10 +250,20 @@ func (m *Manager) SubsystemUpdates(ctx context.Context, cfg map[string]*pb.Devic func (m *Manager) CheckUpdates(ctx context.Context) time.Duration { defer m.handlePanic() m.logger.Debug("Checking cloud for update") - cfg, interval, err := m.GetConfig(ctx) + interval, err := m.GetConfig(ctx) + + if interval < minimalCheckInterval { + interval = minimalCheckInterval + } + + if m.cfg.AdvancedSettings.Debug { + m.logger.SetLevel(logging.DEBUG) + } else { + m.logger.SetLevel(logging.INFO) + } // randomly fuzz the interval by +/- 5% - interval = fuzzTime(interval, 0.05) + interval = utils.FuzzTime(interval, 0.05) if err != nil { m.logger.Error(err) @@ -194,7 +271,7 @@ func (m *Manager) CheckUpdates(ctx context.Context) time.Duration { } // update and (re)start subsystems - m.SubsystemUpdates(ctx, cfg) + m.SubsystemUpdates(ctx) return interval } @@ -206,10 +283,12 @@ func (m *Manager) SubsystemHealthChecks(ctx context.Context) { return } m.logger.Debug("Starting health checks for all subsystems") - m.subsystemsMu.Lock() - defer m.subsystemsMu.Unlock() - for subsystemName, sub := range m.loadedSubsystems { + for subsystemName, sub := range map[string]subsystems.Subsystem{ + "viam-server": m.viamServer, + "sysconfig": m.sysConfig, + "networking": m.networking, + } { if ctx.Err() != nil { return } @@ -226,7 +305,9 @@ func (m *Manager) SubsystemHealthChecks(ctx context.Context) { if ctx.Err() != nil { return } - if err := sub.Start(ctx); err != nil && !errors.Is(err, ErrSubsystemDisabled) { + + // SMURF check if disabled! + if err := sub.Start(ctx); err != nil && !errors.Is(err, utils.ErrSubsystemDisabled) { m.logger.Error(errw.Wrapf(err, "restarting subsystem %s", subsystemName)) } } else { @@ -240,10 +321,8 @@ func (m *Manager) CloseAll() { ctx, cancelFunc := context.WithTimeout(context.Background(), stopAllTimeout) defer cancelFunc() - m.subsystemsMu.Lock() - defer m.subsystemsMu.Unlock() // close all subsystems - for _, sub := range m.loadedSubsystems { + for _, sub := range []subsystems.Subsystem{m.viamServer, m.sysConfig, m.networking} { if err := sub.Stop(ctx); err != nil { m.logger.Error(err) } @@ -274,10 +353,15 @@ func (m *Manager) StartBackgroundChecks(ctx context.Context) { if ctx.Err() != nil { return } + m.logger.Debug("starting background checks") m.activeBackgroundWorkers.Add(1) go func() { - checkInterval := m.CheckUpdates(ctx) + checkInterval := minimalCheckInterval + if m.cfg.AdvancedSettings.WaitForUpdateCheck { + checkInterval = m.CheckUpdates(ctx) + } + timer := time.NewTimer(checkInterval) defer timer.Stop() defer m.activeBackgroundWorkers.Done() @@ -297,82 +381,6 @@ func (m *Manager) StartBackgroundChecks(ctx context.Context) { }() } -// LoadSubsystems runs at startup, before getting online. -func (m *Manager) LoadSubsystems(ctx context.Context) error { - if ctx.Err() != nil { - return ctx.Err() - } - m.subsystemsMu.Lock() - defer m.subsystemsMu.Unlock() - - cachedConfig, err := m.getCachedConfig() - if err != nil { - m.logger.Error(errw.Wrap(err, "getting cached config")) - } - m.processConfig(cachedConfig) - - for _, name := range registry.List() { - cfg, ok := cachedConfig[name] - if !ok { - cfg = &pb.DeviceSubsystemConfig{} - } - err := m.loadSubsystem(ctx, name, cfg) - if err != nil { - m.logger.Warnw("couldn't load subsystem", "name", name, "error", err) - } - } - - return nil -} - -// loadSubsystem needs to be called inside a lock. -func (m *Manager) loadSubsystem(ctx context.Context, name string, subCfg *pb.DeviceSubsystemConfig) error { - creator := registry.GetCreator(name) - if creator != nil { - sub, err := creator(ctx, m.logger, subCfg) - if err != nil { - return err - } - m.loadedSubsystems[name] = sub - return nil - } - return errw.Errorf("unknown subsystem name %s", name) -} - -// getCachedConfig returns a cached config, for when the cloud is not reachable. -func (m *Manager) getCachedConfig() (map[string]*pb.DeviceSubsystemConfig, error) { - // return a bare-minimum for self-update on new installs or for fallback - cachedConfig := map[string]*pb.DeviceSubsystemConfig{SubsystemName: {}} - - cacheFilePath := filepath.Join(ViamDirs["cache"], agentCachePath) - - cacheBytes, err := os.ReadFile(cacheFilePath) //nolint:gosec - if err != nil { - if errors.Is(err, fs.ErrNotExist) { - return cachedConfig, nil - } - return nil, errw.Wrap(err, "reading cached config") - } - - err = json.Unmarshal(cacheBytes, &cachedConfig) - if err != nil { - return nil, errw.Wrapf(err, "parsing cached config") - } - return cachedConfig, nil -} - -// saveCachedConfig saves a local copy of the config normally fetched from the cloud. -func (m *Manager) saveCachedConfig(cfg map[string]*pb.DeviceSubsystemConfig) error { - cacheFilePath := filepath.Join(ViamDirs["cache"], agentCachePath) - - cacheData, err := json.Marshal(cfg) - if err != nil { - return err - } - //nolint:gosec - return errors.Join(os.WriteFile(cacheFilePath, cacheData, 0o644), SyncFS(cacheFilePath)) -} - // dial establishes a connection to the cloud for grpc communication. // If the dial succeeds, a NetAppender will be attached to m.logger. func (m *Manager) dial(ctx context.Context) error { @@ -423,66 +431,46 @@ func (m *Manager) dial(ctx context.Context) error { return nil } -// process non-subsystem effects of a config (i.e. agent-specific stuff that needs to happen when loading cache and when updating). -func (m *Manager) processConfig(cfg map[string]*pb.DeviceSubsystemConfig) { - if agent, ok := cfg["viam-agent"]; ok { - if debugRaw, ok := agent.GetAttributes().AsMap()["debug"]; ok { - if debug, ok := debugRaw.(bool); !ok { - m.logger.Error("viam-agent debug attribute is present but is not a bool") - } else { - // note: if this is present (true or false, rather than missing) it overrides the CLI debug switch. - // if the user removes the `debug` attribute, we don't revert to the CLI debug switch state. (we ideally should). - // note: this assumes m.logger is the global logger shared by the other subsystems. - if debug { - m.logger.SetLevel(logging.DEBUG) - } else { - m.logger.SetLevel(logging.INFO) - } - } - } - } -} - -// GetConfig retrieves the configuration from the cloud, or returns a cached version if unable to communicate. -func (m *Manager) GetConfig(ctx context.Context) (map[string]*pb.DeviceSubsystemConfig, time.Duration, error) { +// GetConfig retrieves the configuration from the cloud. +func (m *Manager) GetConfig(ctx context.Context) (time.Duration, error) { if m.cloudConfig == nil { - return nil, 0, errors.New("can't GetConfig until successful LoadConfig") + return minimalCheckInterval, errors.New("can't GetConfig until successful LoadConfig") } timeoutCtx, cancelFunc := context.WithTimeout(ctx, defaultNetworkTimeout) defer cancelFunc() if err := m.dial(timeoutCtx); err != nil { m.logger.Error(errw.Wrapf(err, "fetching %s config", SubsystemName)) - conf, err := m.getCachedConfig() - return conf, minimalCheckInterval, err + return minimalCheckInterval, err } req := &pb.DeviceAgentConfigRequest{ - Id: m.cloudConfig.ID, - HostInfo: m.getHostInfo(), - SubsystemVersions: m.getSubsystemVersions(), + Id: m.cloudConfig.ID, + HostInfo: m.getHostInfo(), + VersionInfo: m.getVersions(), } resp, err := m.client.DeviceAgentConfig(timeoutCtx, req) if err != nil { m.logger.Error(errw.Wrapf(err, "fetching %s config", SubsystemName)) - conf, err := m.getCachedConfig() - return conf, minimalCheckInterval, err + return minimalCheckInterval, err } - err = m.saveCachedConfig(resp.GetSubsystemConfigs()) + cfg, err := utils.StackConfigs(resp) if err != nil { - m.logger.Error(errw.Wrap(err, "saving agent config to cache")) + m.logger.Error(errw.Wrap(err, "processing config")) } - m.processConfig(resp.GetSubsystemConfigs()) + if err := utils.SaveConfigToCache(cfg); err != nil { + m.logger.Error(err) + } - interval := resp.GetCheckInterval().AsDuration() + cfg = utils.ApplyCLIArgs(cfg) - if interval < minimalCheckInterval { - interval = minimalCheckInterval - } + m.cfgMu.Lock() + defer m.cfgMu.Unlock() + m.cfg = cfg - return resp.GetSubsystemConfigs(), interval, nil + return resp.GetCheckInterval().AsDuration(), nil } func (m *Manager) getHostInfo() *pb.HostInfo { @@ -527,13 +515,15 @@ func (m *Manager) getHostInfo() *pb.HostInfo { return pbInfo } -func (m *Manager) getSubsystemVersions() map[string]string { - m.subsystemsMu.Lock() - defer m.subsystemsMu.Unlock() - vers := make(map[string]string) - for name, sys := range m.loadedSubsystems { - vers[name] = sys.Version() +func (m *Manager) getVersions() *pb.VersionInfo { + // SMURF TODO + vers := &pb.VersionInfo{ + AgentRunning: Version, + AgentInstalled: "", + ViamServerRunning: "", + ViamServerInstalled: "", } + return vers } diff --git a/subsystem.go b/subsystem.go deleted file mode 100644 index 013a795..0000000 --- a/subsystem.go +++ /dev/null @@ -1,616 +0,0 @@ -package agent - -import ( - "bytes" - "context" - "encoding/base64" - "encoding/json" - "errors" - "fmt" - "io/fs" - "os" - "os/exec" - "path" - "path/filepath" - "regexp" - "sync" - "syscall" - "time" - - errw "github.com/pkg/errors" - pb "go.viam.com/api/app/agent/v1" - "go.viam.com/rdk/logging" -) - -const ( - ShortFailTime = time.Second * 30 - StartTimeout = time.Minute - StopTermTimeout = time.Second * 30 - StopKillTimeout = time.Second * 10 -) - -var ErrSubsystemDisabled = errors.New("subsystem disabled") - -// BasicSubsystem is the minimal interface. -type BasicSubsystem interface { - // Start runs the subsystem - Start(ctx context.Context) error - - // Stop signals the subsystem to shutdown - Stop(ctx context.Context) error - - // HealthCheck reports if a subsystem is running correctly (it is restarted if not) - HealthCheck(ctx context.Context) error -} - -// updatable is if a wrapped subsystem has it's own (additional) update code to run. -type updatable interface { - Update(ctx context.Context, cfg *pb.DeviceSubsystemConfig, newVersion bool) (bool, error) -} - -// AgentSubsystem is a wrapper for the real subsystems, mostly allowing sharing of download/update code. -type AgentSubsystem struct { - mu sync.Mutex - CacheData *CacheData - startTime *time.Time - disable bool - - name string - logger logging.Logger - inner BasicSubsystem -} - -// CacheData stores VersionInfo and the current/previous versions for (TODO) rollback. -type CacheData struct { - CurrentVersion string `json:"current_version"` - PreviousVersion string `json:"previous_version"` - Versions map[string]*VersionInfo `json:"versions"` -} - -// VersionInfo records details about each version of a subsystem. -type VersionInfo struct { - Version string - URL string - DlPath string - DlSHA []byte - UnpackedPath string - UnpackedSHA []byte - SymlinkPath string - Installed time.Time - StartCount uint - LongFailCount uint - ShortFailCount uint -} - -// Version returns the running version. -func (s *AgentSubsystem) Version() string { - s.mu.Lock() - defer s.mu.Unlock() - if s.CacheData != nil { - return s.CacheData.CurrentVersion - } - return "" -} - -// Start starts the subsystem. -func (s *AgentSubsystem) Start(ctx context.Context) error { - s.mu.Lock() - defer s.mu.Unlock() - - if s.disable { - return ErrSubsystemDisabled - } - - info, ok := s.CacheData.Versions[s.CacheData.CurrentVersion] - if !ok { - s.CacheData.CurrentVersion = "unknown" - info = &VersionInfo{Version: s.CacheData.CurrentVersion} - s.CacheData.Versions[s.CacheData.CurrentVersion] = info - s.logger.Warnf("cache info not found for %s, version: %s", s.name, s.CacheData.CurrentVersion) - } - info.StartCount++ - start := time.Now() - s.startTime = &start - err := s.saveCache() - if err != nil { - return err - } - return s.inner.Start(ctx) -} - -// Stop stops the subsystem. -func (s *AgentSubsystem) Stop(ctx context.Context) error { - s.mu.Lock() - defer s.mu.Unlock() - s.startTime = nil - return s.inner.Stop(ctx) -} - -// HealthCheck calls the inner subsystem's HealthCheck() to verify, and logs failures/successes. -func (s *AgentSubsystem) HealthCheck(ctx context.Context) error { - s.mu.Lock() - defer s.mu.Unlock() - if s.disable { - return nil - } - err := s.inner.HealthCheck(ctx) - if err != nil { - if s.startTime == nil { - return err - } - failTime := time.Since(*s.startTime) - - info, ok := s.CacheData.Versions[s.CacheData.CurrentVersion] - if !ok { - return errors.Join(err, errw.Errorf("cache info not found for %s, version: %s", s.name, s.CacheData.CurrentVersion)) - } - - if failTime <= ShortFailTime { - info.ShortFailCount++ - } else { - info.LongFailCount++ - } - s.startTime = nil - - // TODO if shortfails exceed a threshold, revert to previous version. - - return errors.Join(err, s.saveCache()) - } - - return nil -} - -// NewAgentSubsystem returns a new wrapped subsystem. -func NewAgentSubsystem( - ctx context.Context, - name string, - logger logging.Logger, - subsys BasicSubsystem, -) (*AgentSubsystem, error) { - sub := &AgentSubsystem{name: name, logger: logger, inner: subsys} - err := sub.LoadCache() - if err != nil { - return nil, err - } - return sub, nil -} - -// LoadCache loads the cached data for the subsystem from disk. -func (s *AgentSubsystem) LoadCache() error { - s.mu.Lock() - defer s.mu.Unlock() - - cache := &CacheData{ - Versions: make(map[string]*VersionInfo), - } - - cacheFilePath := filepath.Join(ViamDirs["cache"], fmt.Sprintf("%s.json", s.name)) - //nolint:gosec - cacheBytes, err := os.ReadFile(cacheFilePath) - if err != nil { - if !errors.Is(err, fs.ErrNotExist) { - s.logger.Error(err) - } - } else { - err = json.Unmarshal(cacheBytes, cache) - if err != nil { - s.logger.Error(errw.Wrap(err, "parsing subsystem cache, using new defaults")) - s.CacheData = &CacheData{ - Versions: make(map[string]*VersionInfo), - } - return nil - } - } - - s.CacheData = cache - return nil -} - -// saveCache should only be run when protected by mutex locks. Use SaveCache() for normal use. -func (s *AgentSubsystem) saveCache() error { - cacheFilePath := filepath.Join(ViamDirs["cache"], fmt.Sprintf("%s.json", s.name)) - - cacheData, err := json.Marshal(s.CacheData) - if err != nil { - return err - } - //nolint:gosec - return errors.Join(os.WriteFile(cacheFilePath, cacheData, 0o644), SyncFS(cacheFilePath)) -} - -// SaveCache saves the cached data to disk. -func (s *AgentSubsystem) SaveCache() error { - s.mu.Lock() - defer s.mu.Unlock() - - return s.saveCache() -} - -// Update is the main function of the AgentSubsystem wrapper, as it's shared between subsystems. Returns true if a restart is needed. -// -//nolint:gocognit -func (s *AgentSubsystem) Update(ctx context.Context, cfg *pb.DeviceSubsystemConfig) (bool, error) { - s.mu.Lock() - defer s.mu.Unlock() - - var needRestart bool - - if s.disable != cfg.GetDisable() { - s.disable = cfg.GetDisable() - needRestart = true - if s.disable { - s.logger.Infof("%s %s", s.name, "disabled") - return true, nil - } else { - s.logger.Infof("%s %s", s.name, "enabled") - } - } - - updateInfo := cfg.GetUpdateInfo() - - // check if we already have the version given by the cloud - verData, ok := s.CacheData.Versions[updateInfo.GetVersion()] - //nolint:nestif - if ok && s.CacheData.CurrentVersion == updateInfo.GetVersion() { - // if a known version, make sure the symlink is correct - same, err := CheckIfSame(verData.DlPath, verData.SymlinkPath) - if err != nil { - return needRestart, err - } - if !same { - if err := ForceSymlink(verData.UnpackedPath, verData.SymlinkPath); err != nil { - return needRestart, err - } - } - - // check for matching shasum, which won't be available for pin_url - checkSum := updateInfo.GetSha256() - - // with pin_url, no SHA is available from the cloud, so we check the local copy for corruption and matching url. - if len(updateInfo.GetSha256()) <= 1 && verData.URL == updateInfo.GetUrl() { - checkSum = verData.UnpackedSHA - } - - shasum, err := GetFileSum(verData.UnpackedPath) - if err == nil && bytes.Equal(shasum, checkSum) { - // No update, but let the inner logic run if needed. - return s.tryInner(ctx, cfg, needRestart) - } - } - - // this is a new version, so instantiate the basics - if !ok { - verData = &VersionInfo{Version: updateInfo.GetVersion()} - s.CacheData.Versions[updateInfo.GetVersion()] = verData - s.logger.Infof("new version (%s) found for %s", verData.Version, s.name) - } - // always record the URL, it may be updated for "customURL" versions - verData.URL = updateInfo.GetUrl() - - // download and record the sha of the download itself - var err error - verData.DlPath, err = DownloadFile(ctx, updateInfo.GetUrl()) - if err != nil { - return needRestart, errw.Wrapf(err, "downloading %s subsystem", s.name) - } - verData.DlSHA, err = GetFileSum(verData.DlPath) - if err != nil { - return needRestart, errw.Wrap(err, "getting file shasum") - } - - // extract and verify sha of contents if it's a compressed file - if updateInfo.GetFormat() == pb.PackageFormat_PACKAGE_FORMAT_XZ || - updateInfo.GetFormat() == pb.PackageFormat_PACKAGE_FORMAT_XZ_EXECUTABLE { - verData.UnpackedPath, err = DecompressFile(verData.DlPath) - if err != nil { - return needRestart, errw.Wrapf(err, "decompressing %s subsystem", s.name) - } - } else { - verData.UnpackedPath = verData.DlPath - } - - shasum, err := GetFileSum(verData.UnpackedPath) - if err != nil { - return needRestart, errw.Wrap(err, "getting file shasum") - } - verData.UnpackedSHA = shasum - if len(updateInfo.GetSha256()) > 1 && !bytes.Equal(shasum, updateInfo.GetSha256()) { - //nolint:goerr113 - return needRestart, fmt.Errorf( - "sha256 (%s) of downloaded file (%s) does not match config (%s)", - base64.StdEncoding.EncodeToString(shasum), - verData.UnpackedPath, - base64.StdEncoding.EncodeToString(updateInfo.GetSha256()), - ) - } - - // chmod with execute permissions if the file is executable - if updateInfo.GetFormat() == pb.PackageFormat_PACKAGE_FORMAT_EXECUTABLE || - updateInfo.GetFormat() == pb.PackageFormat_PACKAGE_FORMAT_XZ_EXECUTABLE { - //nolint:gosec - if err := os.Chmod(verData.UnpackedPath, 0o755); err != nil { - return needRestart, err - } - } else { - //nolint:gosec - if err := os.Chmod(verData.UnpackedPath, 0o644); err != nil { - return needRestart, err - } - } - - // symlink the extracted file to bin - verData.SymlinkPath = path.Join(ViamDirs["bin"], updateInfo.GetFilename()) - if err = ForceSymlink(verData.UnpackedPath, verData.SymlinkPath); err != nil { - return needRestart, errw.Wrap(err, "creating symlink") - } - - // update current and previous versions - if s.CacheData.CurrentVersion != s.CacheData.PreviousVersion { - s.CacheData.PreviousVersion = s.CacheData.CurrentVersion - } - s.CacheData.CurrentVersion = updateInfo.GetVersion() - verData.Installed = time.Now() - - // if we made it here we performed an update and need to restart - s.logger.Infof("%s updated from %s to %s", s.name, s.CacheData.PreviousVersion, s.CacheData.CurrentVersion) - needRestart = true - - // record the cache - err = s.saveCache() - if err != nil { - return needRestart, err - } - - // if the subsystem has its own additional update code, run it - return s.tryInner(ctx, cfg, needRestart) -} - -func (s *AgentSubsystem) tryInner(ctx context.Context, cfg *pb.DeviceSubsystemConfig, newVersion bool) (bool, error) { - inner, ok := s.inner.(updatable) - if ok { - return inner.Update(ctx, cfg, newVersion) - } - - return newVersion, nil -} - -// InternalSubsystem is shared start/stop/update code between "internal" (not viam-server) subsystems. -type InternalSubsystem struct { - // only set during New - name string - cmdArgs []string - logger logging.Logger - cfgPath string - uploadAll bool - - // protected by mutex - mu sync.Mutex - cmd *exec.Cmd - running bool - shouldRun bool - lastExit int - exitChan chan struct{} - - // for blocking start/stop/check ops while another is in progress - startStopMu sync.Mutex -} - -func NewInternalSubsystem(name string, extraArgs []string, logger logging.Logger, uploadAll bool) (*InternalSubsystem, error) { - if name == "" { - return nil, errors.New("name cannot be empty") - } - if logger == nil { - return nil, errors.New("logger cannot be nil") - } - - cfgPath := path.Join(ViamDirs["etc"], name+".json") - - is := &InternalSubsystem{ - name: name, - cmdArgs: append([]string{"--config", cfgPath}, extraArgs...), - cfgPath: cfgPath, - logger: logger, - uploadAll: uploadAll, - } - return is, nil -} - -func (is *InternalSubsystem) Start(ctx context.Context) error { - is.startStopMu.Lock() - defer is.startStopMu.Unlock() - - is.mu.Lock() - - if is.running { - is.mu.Unlock() - return nil - } - if is.shouldRun { - is.logger.Warnf("Restarting %s after unexpected exit", is.name) - } else { - is.logger.Infof("Starting %s", is.name) - is.shouldRun = true - } - - stdio := NewMatchingLogger(is.logger, false, is.uploadAll) - stderr := NewMatchingLogger(is.logger, true, is.uploadAll) - - //nolint:gosec - is.cmd = exec.Command(path.Join(ViamDirs["bin"], is.name), is.cmdArgs...) - is.cmd.Dir = ViamDirs["viam"] - is.cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} - is.cmd.Stdout = stdio - is.cmd.Stderr = stderr - - // watch for this line in the logs to indicate successful startup - c, err := stdio.AddMatcher("checkStartup", regexp.MustCompile(`startup complete`), false) - if err != nil { - is.mu.Unlock() - return err - } - defer stdio.DeleteMatcher("checkStartup") - - err = is.cmd.Start() - if err != nil { - is.mu.Unlock() - return errw.Wrapf(err, "starting %s", is.name) - } - is.running = true - is.exitChan = make(chan struct{}) - - // must be unlocked before spawning goroutine - is.mu.Unlock() - go func() { - err := is.cmd.Wait() - is.mu.Lock() - defer is.mu.Unlock() - is.running = false - is.logger.Infof("%s exited", is.name) - if err != nil { - is.logger.Errorw("error while getting process status", "error", err) - } - if is.cmd.ProcessState != nil { - is.lastExit = is.cmd.ProcessState.ExitCode() - if is.lastExit != 0 { - is.logger.Errorw("non-zero exit code", "exit code", is.lastExit) - } - } - close(is.exitChan) - }() - - select { - case <-c: - is.logger.Infof("%s started", is.name) - return nil - case <-ctx.Done(): - return ctx.Err() - case <-time.After(StartTimeout): - return errw.New("startup timed out") - case <-is.exitChan: - return errw.New("startup failed") - } -} - -func (is *InternalSubsystem) Stop(ctx context.Context) error { - is.startStopMu.Lock() - defer is.startStopMu.Unlock() - - is.mu.Lock() - running := is.running - is.shouldRun = false - is.mu.Unlock() - - if !running { - return nil - } - - // interrupt early in startup - if is.cmd == nil { - return nil - } - - is.logger.Infof("Stopping %s", is.name) - - err := is.cmd.Process.Signal(syscall.SIGTERM) - if err != nil { - is.logger.Error(err) - } - - if is.waitForExit(ctx, StopTermTimeout) { - is.logger.Infof("%s successfully stopped", is.name) - return nil - } - - is.logger.Warnf("%s refused to exit, killing", is.name) - err = syscall.Kill(-is.cmd.Process.Pid, syscall.SIGKILL) - if err != nil { - is.logger.Error(err) - } - - if is.waitForExit(ctx, StopKillTimeout) { - is.logger.Infof("%s successfully killed", is.name) - return nil - } - - return errw.Errorf("%s process couldn't be killed", is.name) -} - -func (is *InternalSubsystem) waitForExit(ctx context.Context, timeout time.Duration) bool { - is.mu.Lock() - exitChan := is.exitChan - running := is.running - is.mu.Unlock() - - if !running { - return true - } - - select { - case <-exitChan: - return true - case <-ctx.Done(): - return false - case <-time.After(timeout): - return false - } -} - -// HealthCheck sends a USR1 signal to the subsystem process, which should cause it to log "HEALTHY" to stdout. -func (is *InternalSubsystem) HealthCheck(ctx context.Context) (errRet error) { - is.startStopMu.Lock() - defer is.startStopMu.Unlock() - is.mu.Lock() - defer is.mu.Unlock() - if !is.running { - return errw.Errorf("%s not running", is.name) - } - - is.logger.Debugf("starting healthcheck for %s", is.name) - - checkChan, err := is.cmd.Stdout.(*MatchingLogger).AddMatcher("healthcheck", regexp.MustCompile(`HEALTHY`), true) - if err != nil { - return err - } - defer func() { - matcher, ok := is.cmd.Stdout.(*MatchingLogger) - if ok { - matcher.DeleteMatcher("healthcheck") - } - }() - - err = is.cmd.Process.Signal(syscall.SIGUSR1) - if err != nil { - is.logger.Error(err) - } - - select { - case <-time.After(time.Second * 30): - case <-ctx.Done(): - case <-checkChan: - is.logger.Debugf("healthcheck for %s is good", is.name) - return nil - } - return errw.Errorf("timeout waiting for healthcheck on %s", is.name) -} - -func (is *InternalSubsystem) Update(ctx context.Context, cfg *pb.DeviceSubsystemConfig, newVersion bool) (bool, error) { - jsonBytes, err := cfg.GetAttributes().MarshalJSON() - if err != nil { - return true, err - } - - fileBytes, err := os.ReadFile(is.cfgPath) - // If no changes, only restart if there was a new version. - if err == nil && bytes.Equal(fileBytes, jsonBytes) { - return newVersion, nil - } - - // If an error reading the config file, restart and return the error - if err != nil && !errors.Is(err, fs.ErrNotExist) { - return true, err - } - - // If attribute changes, restart after writing the new config file. - //nolint:gosec - return true, errors.Join(os.WriteFile(is.cfgPath, jsonBytes, 0o644), SyncFS(is.cfgPath)) -} diff --git a/subsystems/provisioning/connstate.go b/subsystems/networking/connstate.go similarity index 99% rename from subsystems/provisioning/connstate.go rename to subsystems/networking/connstate.go index cc011a4..3ddc2b2 100644 --- a/subsystems/provisioning/connstate.go +++ b/subsystems/networking/connstate.go @@ -1,4 +1,4 @@ -package provisioning +package networking import ( "sync" diff --git a/subsystems/networking/definitions.go b/subsystems/networking/definitions.go new file mode 100644 index 0000000..815e54e --- /dev/null +++ b/subsystems/networking/definitions.go @@ -0,0 +1,285 @@ +package networking + +import ( + "context" + "encoding/json" + "errors" + "os" + "sync" + "time" + + gnm "github.com/Otterverse/gonetworkmanager/v2" + pb "go.viam.com/api/provisioning/v1" +) + +// This file contains type, const, and var definitions. + +const ( + SubsysName = "agent-provisioning" + + DNSMasqFilepath = "/etc/NetworkManager/dnsmasq-shared.d/80-viam.conf" + DNSMasqContentsRedirect = "address=/#/10.42.0.1\n" + DNSMasqContentsSetupOnly = "address=/.setup/10.42.0.1\n" + + PortalBindAddr = "10.42.0.1" + + ConnCheckFilepath = "/etc/NetworkManager/conf.d/80-viam.conf" + ConnCheckContents = "[connectivity]\nuri=http://packages.viam.com/check_network_status.txt\ninterval=300\n" + + wifiPowerSaveFilepath = "/etc/NetworkManager/conf.d/81-viam-wifi-powersave.conf" + wifiPowerSaveContentsDefault = "# This file intentionally left blank.\n" + wifiPowerSaveContentsDisable = "[connection]\n# Explicitly disable\nwifi.powersave = 2\n" + wifiPowerSaveContentsEnable = "[connection]\n# Explicitly enable\nwifi.powersave = 3\n" + NetworkTypeWifi = "wifi" + NetworkTypeWired = "wired" + NetworkTypeHotspot = "hotspot" + + HealthCheckTimeout = time.Minute +) + +var ( + ErrBadPassword = errors.New("bad or missing password") + ErrConnCheckDisabled = errors.New("NetworkManager connectivity checking disabled by user, network management will be unavailable") + ErrNoActiveConnectionFound = errors.New("no active connection found") + scanLoopDelay = time.Second * 15 + scanTimeout = time.Second * 30 + connectTimeout = time.Second * 50 // longer than the 45 second timeout in NetworkManager +) + +type lockingNetwork struct { + mu sync.Mutex + network +} + +type network struct { + netType string + ssid string + security string + signal uint8 + priority int32 + isHotspot bool + + firstSeen time.Time + lastSeen time.Time + + lastTried time.Time + connected bool + lastConnected time.Time + lastError error + interfaceName string + + conn gnm.Connection +} + +func (n *network) getInfo() NetworkInfo { + var errStr string + if n.lastError != nil { + errStr = n.lastError.Error() + } + + return NetworkInfo{ + Type: n.netType, + SSID: n.ssid, + Security: n.security, + Signal: int32(n.signal), + Connected: n.connected, + LastError: errStr, + } +} + +type NetworkInfo struct { + Type string + SSID string + Security string + Signal int32 + Connected bool + LastError string +} + +func NetworkInfoToProto(net *NetworkInfo) *pb.NetworkInfo { + return &pb.NetworkInfo{ + Type: net.Type, + Ssid: net.SSID, + Security: net.Security, + Signal: net.Signal, + Connected: net.Connected, + LastError: net.LastError, + } +} + +func NetworkInfoFromProto(buf *pb.NetworkInfo) *NetworkInfo { + return &NetworkInfo{ + Type: buf.GetType(), + SSID: buf.GetSsid(), + Security: buf.GetSecurity(), + Signal: buf.GetSignal(), + Connected: buf.GetConnected(), + LastError: buf.GetLastError(), + } +} + +// MachineConfig represents the minimal needed for /etc/viam.json. +type MachineConfig struct { + Cloud *CloudConfig `json:"cloud"` +} + +type CloudConfig struct { + AppAddress string `json:"app_address"` + ID string `json:"id"` + Secret string `json:"secret"` +} + +func WriteDeviceConfig(file string, input userInput) error { + if input.RawConfig != "" { + return os.WriteFile(file, []byte(input.RawConfig), 0o600) + } + + cfg := &MachineConfig{ + Cloud: &CloudConfig{ + AppAddress: input.AppAddr, + ID: input.PartID, + Secret: input.Secret, + }, + } + + jsonBytes, err := json.Marshal(cfg) + if err != nil { + return err + } + return os.WriteFile(file, jsonBytes, 0o600) +} + +type portalData struct { + mu sync.Mutex + Updated time.Time + + inputChan chan<- userInput + + input *userInput + workers sync.WaitGroup + + // used to cancel background threads + cancel context.CancelFunc +} + +// must be called with p.mu already locked! +func (p *portalData) sendInput(connState *connectionState) { + input := *p.input + + // in case both network and device credentials are being updated + // only send user data if both are already set + if (input.SSID != "" && input.PartID != "") || + (input.SSID != "" && connState.getConfigured()) || + (input.PartID != "" && connState.getOnline()) { + p.input = &userInput{} + p.inputChan <- input + if p.cancel != nil { + p.cancel() + } + return + } + // if not, wait 10 seconds for full input + if p.cancel != nil { + p.cancel() + } + + ctx, cancel := context.WithCancel(context.Background()) + p.cancel = cancel + + p.workers.Add(1) + go func() { + defer p.workers.Done() + p.mu.Lock() + defer p.mu.Unlock() + select { + case <-ctx.Done(): + return + case <-time.After(time.Second * 10): + } + p.input = &userInput{} + p.inputChan <- input + }() +} + +type userInput struct { + // network + SSID string + PSK string + + // device credentials + PartID string + Secret string + AppAddr string + + // raw /etc/viam.json contents + RawConfig string +} + +type health struct { + mu sync.Mutex + last time.Time +} + +func (h *health) MarkGood() { + h.mu.Lock() + defer h.mu.Unlock() + h.last = time.Now() +} + +func (h *health) Sleep(ctx context.Context, timeout time.Duration) bool { + select { + case <-ctx.Done(): + return false + case <-time.After(timeout): + h.mu.Lock() + defer h.mu.Unlock() + h.last = time.Now() + return true + } +} + +func (h *health) IsHealthy() bool { + h.mu.Lock() + defer h.mu.Unlock() + return time.Since(h.last) < HealthCheckTimeout +} + +type errorList struct { + mu sync.Mutex + errors []error +} + +func (e *errorList) Add(err ...error) { + e.mu.Lock() + defer e.mu.Unlock() + e.errors = append(e.errors, err...) +} + +func (e *errorList) Clear() { + e.mu.Lock() + defer e.mu.Unlock() + e.errors = []error{} +} + +func (e *errorList) Errors() []error { + e.mu.Lock() + defer e.mu.Unlock() + return e.errors +} + +type banner struct { + mu sync.Mutex + banner string +} + +func (b *banner) Set(banner string) { + b.mu.Lock() + defer b.mu.Unlock() + b.banner = banner +} + +func (b *banner) Get() string { + b.mu.Lock() + defer b.mu.Unlock() + return b.banner +} diff --git a/subsystems/provisioning/generators.go b/subsystems/networking/generators.go similarity index 94% rename from subsystems/provisioning/generators.go rename to subsystems/networking/generators.go index c4659f1..8127d2e 100644 --- a/subsystems/provisioning/generators.go +++ b/subsystems/networking/generators.go @@ -1,4 +1,4 @@ -package provisioning +package networking import ( "encoding/binary" @@ -10,6 +10,7 @@ import ( gnm "github.com/Otterverse/gonetworkmanager/v2" "github.com/google/uuid" errw "github.com/pkg/errors" + "github.com/viamrobotics/agent/utils" ) // This file contains the wifi/hotspot setting generation functions. @@ -48,7 +49,7 @@ func generateHotspotSettings(id, ssid, psk, ifName string) gnm.ConnectionSetting return settings } -func generateNetworkSettings(id string, cfg NetworkConfig) (gnm.ConnectionSettings, error) { +func generateNetworkSettings(id string, cfg utils.NetworkDefinition) (gnm.ConnectionSettings, error) { settings := gnm.ConnectionSettings{} if id == "" { return nil, errw.New("id cannot be empty") @@ -97,7 +98,7 @@ func generateNetworkSettings(id string, cfg NetworkConfig) (gnm.ConnectionSettin return settings, nil } -func generateIPv4Settings(cfg NetworkConfig) (map[string]any, error) { +func generateIPv4Settings(cfg utils.NetworkDefinition) (map[string]any, error) { // -1 is special for "automatic" if cfg.IPv4RouteMetric == 0 { cfg.IPv4RouteMetric = -1 diff --git a/subsystems/provisioning/grpc.go b/subsystems/networking/grpc.go similarity index 99% rename from subsystems/provisioning/grpc.go rename to subsystems/networking/grpc.go index 5aee5ef..3ebf270 100644 --- a/subsystems/provisioning/grpc.go +++ b/subsystems/networking/grpc.go @@ -1,4 +1,4 @@ -package provisioning +package networking import ( "context" diff --git a/subsystems/provisioning/provisioning.go b/subsystems/networking/networking.go similarity index 70% rename from subsystems/provisioning/provisioning.go rename to subsystems/networking/networking.go index d1aed66..fcafcf7 100644 --- a/subsystems/provisioning/provisioning.go +++ b/subsystems/networking/networking.go @@ -1,49 +1,39 @@ -// Package provisioning is the subsystem responsible for network/wifi management, and initial device setup via hotspot. -package provisioning +// Package networking is the subsystem responsible for network/wifi management, and initial device setup via hotspot. +package networking import ( "context" "errors" "net/http" "reflect" - "strings" "sync" "time" semver "github.com/Masterminds/semver/v3" gnm "github.com/Otterverse/gonetworkmanager/v2" errw "github.com/pkg/errors" - "github.com/viamrobotics/agent" "github.com/viamrobotics/agent/subsystems" - "github.com/viamrobotics/agent/subsystems/registry" - agentpb "go.viam.com/api/app/agent/v1" + "github.com/viamrobotics/agent/utils" pb "go.viam.com/api/provisioning/v1" "go.viam.com/rdk/logging" "google.golang.org/grpc" ) -func init() { - registry.Register(SubsysName, NewProvisioning) -} - type Provisioning struct { monitorWorkers sync.WaitGroup // blocks start/stop/etc operations - opMu sync.Mutex - running bool - disabled bool - noNM bool + opMu sync.Mutex + running bool + noNM bool // used to stop main/bg loops cancel context.CancelFunc // only set during NewProvisioning, no lock - nm gnm.NetworkManager - settings gnm.Settings - hostname string - logger logging.Logger - AppCfgPath string + nm gnm.NetworkManager + settings gnm.Settings + logger logging.Logger // internal locking connState *connectionState @@ -56,7 +46,10 @@ type Provisioning struct { // locking for config updates dataMu sync.Mutex - cfg *Config + + // SMURF process these in Update + cfg utils.NetworkConfiguration + nets utils.AdditionalNetworks // portal webServer *http.Server @@ -66,18 +59,11 @@ type Provisioning struct { pb.UnimplementedProvisioningServiceServer } -func NewProvisioning(ctx context.Context, logger logging.Logger, updateConf *agentpb.DeviceSubsystemConfig) (subsystems.Subsystem, error) { - cfg, err := LoadConfig(updateConf) - if err != nil { - logger.Error(errw.Wrap(err, "loading provisioning config")) - } - logger.Debugf("Provisioning Config: %+v", cfg) - - w := &Provisioning{ - disabled: updateConf.GetDisable(), - cfg: cfg, - AppCfgPath: AppConfigFilePath, - logger: logger, +func NewSubsystem(ctx context.Context, logger logging.Logger, cfg utils.AgentConfig) subsystems.Subsystem { + return &Provisioning{ + cfg: cfg.NetworkConfiguration, + nets: cfg.AdditionalNetworks, + logger: logger, connState: NewConnectionState(logger), netState: NewNetworkState(logger), @@ -89,7 +75,6 @@ func NewProvisioning(ctx context.Context, logger logging.Logger, updateConf *age mainLoopHealth: &health{}, bgLoopHealth: &health{}, } - return w, nil } func (w *Provisioning) getNM() (gnm.NetworkManager, error) { @@ -157,12 +142,6 @@ func (w *Provisioning) init(ctx context.Context) error { w.nm = nm w.settings = settings - w.hostname, err = settings.GetPropertyHostname() - if err != nil { - return errw.Wrap(err, "getting hostname from NetworkManager") - } - - w.updateHotspotSSID(w.cfg) w.netState.SetHotspotInterface(w.cfg.HotspotInterface) if err := w.writeDNSMasq(); err != nil { @@ -194,11 +173,11 @@ func (w *Provisioning) init(ctx context.Context) error { w.warnIfMultiplePrimaryNetworks() - if w.cfg.RoamingMode { - w.logger.Info("Roaming Mode enabled. Will try all connections for global internet connectivity.") + if w.cfg.TurnOnHotspotIfWifiHasNoInternet { + w.logger.Info("Wifi internet checking enabled. Will try all connections for global internet connectivity.") } else { primarySSID := w.netState.PrimarySSID(w.Config().HotspotInterface) - w.logger.Infof("Default (Single Network) Mode enabled. Will directly connect only to primary network: %s", primarySSID) + w.logger.Infof("Internet checks disabled. Will directly connect to primary network: %s", primarySSID) if primarySSID == "" { w.logger.Warnf("cannot find primary SSID for %s", w.Config().HotspotInterface) } @@ -228,10 +207,6 @@ func (w *Provisioning) Start(ctx context.Context) error { return nil } - if w.disabled || w.noNM { - return agent.ErrSubsystemDisabled - } - if w.nm == nil || w.settings == nil { if err := w.init(ctx); err != nil { return err @@ -283,47 +258,28 @@ func (w *Provisioning) Stop(ctx context.Context) error { } // Update validates and/or updates a subsystem, returns true if subsystem should be restarted. -func (w *Provisioning) Update(ctx context.Context, updateConf *agentpb.DeviceSubsystemConfig) (bool, error) { +func (w *Provisioning) Update(ctx context.Context, cfg utils.AgentConfig) (needRestart bool) { w.opMu.Lock() defer w.opMu.Unlock() - var needRestart bool - if w.noNM { - return needRestart, nil - } - - if w.disabled != updateConf.GetDisable() { - w.disabled = updateConf.GetDisable() - if w.disabled { - w.logger.Infof("agent-provisioning disabled") - } - needRestart = true - } - - if w.disabled { - return needRestart, nil + return needRestart } if w.nm == nil || w.settings == nil { if err := w.init(ctx); err != nil { - return true, err + w.logger.Error(err) + return needRestart } } - cfg, err := LoadConfig(updateConf) - if err != nil { - return needRestart, err - } - - w.updateHotspotSSID(cfg) - if cfg.HotspotInterface == "" { - cfg.HotspotInterface = w.Config().HotspotInterface + if cfg.NetworkConfiguration.HotspotInterface == "" { + cfg.NetworkConfiguration.HotspotInterface = w.Config().HotspotInterface } - w.netState.SetHotspotInterface(cfg.HotspotInterface) + w.netState.SetHotspotInterface(cfg.NetworkConfiguration.HotspotInterface) - if reflect.DeepEqual(cfg, w.cfg) { - return needRestart, nil + if reflect.DeepEqual(cfg.NetworkConfiguration, w.cfg) && reflect.DeepEqual(cfg.AdditionalNetworks, w.nets) { + return needRestart } needRestart = true @@ -331,16 +287,17 @@ func (w *Provisioning) Update(ctx context.Context, updateConf *agentpb.DeviceSub w.dataMu.Lock() defer w.dataMu.Unlock() - w.cfg = cfg + w.cfg = cfg.NetworkConfiguration + w.nets = cfg.AdditionalNetworks - return needRestart, nil + return needRestart } // HealthCheck reports if a subsystem is running correctly (it is restarted if not). func (w *Provisioning) HealthCheck(ctx context.Context) error { w.opMu.Lock() defer w.opMu.Unlock() - if w.disabled || w.noNM { + if w.noNM { return nil } @@ -351,23 +308,18 @@ func (w *Provisioning) HealthCheck(ctx context.Context) error { return errw.New("provisioning not responsive") } -// Version returns the current version of the subsystem. -func (w *Provisioning) Version() string { - return agent.GetRevision() -} - -func (w *Provisioning) Config() Config { +func (w *Provisioning) Config() utils.NetworkConfiguration { w.dataMu.Lock() defer w.dataMu.Unlock() - return *w.cfg + return w.cfg } func (w *Provisioning) processAdditionalnetworks(ctx context.Context) { - if !w.cfg.RoamingMode && len(w.cfg.Networks) > 0 { - w.logger.Warn("Additional networks configured, but Roaming Mode is not enabled. Additional wifi networks will likely be unused.") + if !w.cfg.TurnOnHotspotIfWifiHasNoInternet && len(w.nets) > 0 { + w.logger.Warn("Additional networks configured, but internet checking is not enabled. Additional networks may be unused.") } - for _, network := range w.cfg.Networks { + for _, network := range w.nets { _, err := w.addOrUpdateConnection(network) if err != nil { w.logger.Error(errw.Wrapf(err, "adding network %s", network.SSID)) @@ -381,14 +333,6 @@ func (w *Provisioning) processAdditionalnetworks(ctx context.Context) { } } -// must be run inside dataMu lock. -func (w *Provisioning) updateHotspotSSID(cfg *Config) { - cfg.hotspotSSID = cfg.HotspotPrefix + "-" + strings.ToLower(w.hostname) - if len(cfg.hotspotSSID) > 32 { - cfg.hotspotSSID = cfg.hotspotSSID[:32] - } -} - // must be run inside dataMu lock. func (w *Provisioning) writeWifiPowerSave(ctx context.Context) error { contents := wifiPowerSaveContentsDefault @@ -400,7 +344,7 @@ func (w *Provisioning) writeWifiPowerSave(ctx context.Context) error { } } - isNew, err := agent.WriteFileIfNew(wifiPowerSaveFilepath, []byte(contents)) + isNew, err := utils.WriteFileIfNew(wifiPowerSaveFilepath, []byte(contents)) if err != nil { return errw.Wrap(err, "writing wifi-powersave.conf") } diff --git a/subsystems/provisioning/networkmanager.go b/subsystems/networking/networkmanager.go similarity index 91% rename from subsystems/provisioning/networkmanager.go rename to subsystems/networking/networkmanager.go index 572276d..6bb114b 100644 --- a/subsystems/provisioning/networkmanager.go +++ b/subsystems/networking/networkmanager.go @@ -1,4 +1,4 @@ -package provisioning +package networking import ( "context" @@ -11,10 +11,11 @@ import ( gnm "github.com/Otterverse/gonetworkmanager/v2" errw "github.com/pkg/errors" + "github.com/viamrobotics/agent/utils" ) func (w *Provisioning) warnIfMultiplePrimaryNetworks() { - if w.cfg.RoamingMode { + if w.cfg.TurnOnHotspotIfWifiHasNoInternet { return } var primaryCandidates []string @@ -45,7 +46,7 @@ func (w *Provisioning) getVisibleNetworks() []NetworkInfo { var visible []NetworkInfo for _, nw := range w.netState.Networks() { // note this does NOT use VisibleNetworkTimeout (like getCandidates does) - recentlySeen := nw.lastSeen.After(w.connState.getProvisioningChange().Add(time.Duration(w.Config().OfflineTimeout * -2))) + recentlySeen := nw.lastSeen.After(w.connState.getProvisioningChange().Add(time.Duration(w.Config().OfflineBeforeStartingHotspotMinutes * -2))) if !nw.isHotspot && recentlySeen { visible = append(visible, nw.getInfo()) @@ -147,12 +148,12 @@ func (w *Provisioning) checkConnections() error { } // in roaming mode, we don't care WHAT network is connected - if w.cfg.RoamingMode && state == gnm.NmActiveConnectionStateActivated && ssid != w.Config().hotspotSSID { + if w.cfg.TurnOnHotspotIfWifiHasNoInternet && state == gnm.NmActiveConnectionStateActivated && ssid != w.Config().HotspotSSID { connected = true } // in normal (single) mode, we need to be connected to the primary (highest priority) network - if !w.cfg.RoamingMode && state == gnm.NmActiveConnectionStateActivated && ssid == w.netState.PrimarySSID(w.Config().HotspotInterface) { + if !w.cfg.TurnOnHotspotIfWifiHasNoInternet && state == gnm.NmActiveConnectionStateActivated && ssid == w.netState.PrimarySSID(w.Config().HotspotInterface) { connected = true } } @@ -170,21 +171,21 @@ func (w *Provisioning) StartProvisioning(ctx context.Context, inputChan chan<- u defer w.opMu.Unlock() w.logger.Info("Starting provisioning mode.") - _, err := w.addOrUpdateConnection(NetworkConfig{ + _, err := w.addOrUpdateConnection(utils.NetworkDefinition{ Type: NetworkTypeHotspot, Interface: w.Config().HotspotInterface, - SSID: w.Config().hotspotSSID, + SSID: w.Config().HotspotSSID, }) if err != nil { return err } - if err := w.activateConnection(ctx, w.Config().HotspotInterface, w.Config().hotspotSSID); err != nil { + if err := w.activateConnection(ctx, w.Config().HotspotInterface, w.Config().HotspotSSID); err != nil { return errw.Wrap(err, "starting provisioning mode hotspot") } // start portal with ssid list and known connections if err := w.startPortal(inputChan); err != nil { - err = errors.Join(err, w.deactivateConnection(w.Config().HotspotInterface, w.Config().hotspotSSID)) + err = errors.Join(err, w.deactivateConnection(w.Config().HotspotInterface, w.Config().HotspotSSID)) return errw.Wrap(err, "starting web/grpc portal") } @@ -202,7 +203,7 @@ func (w *Provisioning) stopProvisioning() error { w.logger.Info("Stopping provisioning mode.") w.connState.setProvisioning(false) err := w.stopPortal() - err2 := w.deactivateConnection(w.Config().HotspotInterface, w.Config().hotspotSSID) + err2 := w.deactivateConnection(w.Config().HotspotInterface, w.Config().HotspotSSID) if errors.Is(err2, ErrNoActiveConnectionFound) { return err } @@ -270,7 +271,7 @@ func (w *Provisioning) activateConnection(ctx context.Context, ifName, ssid stri if nw.netType != NetworkTypeHotspot { w.netState.SetActiveSSID(ifName, ssid) - if ifName == w.Config().HotspotInterface && (w.cfg.RoamingMode || w.netState.PrimarySSID(ifName) == ssid) { + if ifName == w.Config().HotspotInterface && (w.cfg.TurnOnHotspotIfWifiHasNoInternet || w.netState.PrimarySSID(ifName) == ssid) { w.connState.setConnected(true) } return w.checkOnline(true) @@ -355,14 +356,14 @@ func (w *Provisioning) waitForConnect(ctx context.Context, device gnm.Device) er } } -func (w *Provisioning) AddOrUpdateConnection(cfg NetworkConfig) (bool, error) { +func (w *Provisioning) AddOrUpdateConnection(cfg utils.NetworkDefinition) (bool, error) { w.opMu.Lock() defer w.opMu.Unlock() return w.addOrUpdateConnection(cfg) } // returns true if network was new (added) and not updated. -func (w *Provisioning) addOrUpdateConnection(cfg NetworkConfig) (bool, error) { +func (w *Provisioning) addOrUpdateConnection(cfg utils.NetworkDefinition) (bool, error) { var changesMade bool if cfg.Type != NetworkTypeWifi && cfg.Type != NetworkTypeHotspot && cfg.Type != NetworkTypeWired { @@ -382,11 +383,11 @@ func (w *Provisioning) addOrUpdateConnection(cfg NetworkConfig) (bool, error) { var settings gnm.ConnectionSettings var err error if cfg.Type == NetworkTypeHotspot { - if cfg.SSID != w.Config().hotspotSSID { + if cfg.SSID != w.Config().HotspotSSID { return changesMade, errw.Errorf("only the builtin provisioning hotspot may use the %s network type", NetworkTypeHotspot) } nw.isHotspot = true - settings = generateHotspotSettings(w.cfg.HotspotPrefix, w.Config().hotspotSSID, w.cfg.HotspotPassword, w.Config().HotspotInterface) + settings = generateHotspotSettings(w.cfg.HotspotPrefix, w.Config().HotspotSSID, w.cfg.HotspotPassword, w.Config().HotspotInterface) } else { id := w.cfg.Manufacturer + "-" + netKey settings, err = generateNetworkSettings(id, cfg) @@ -396,7 +397,7 @@ func (w *Provisioning) addOrUpdateConnection(cfg NetworkConfig) (bool, error) { } } - if cfg.Type == NetworkTypeWifi && !w.cfg.RoamingMode && cfg.Priority == 999 { + if cfg.Type == NetworkTypeWifi && !w.cfg.TurnOnHotspotIfWifiHasNoInternet && cfg.Priority == 999 { // lower the priority of any existing/prior primary network w.lowerMaxNetPriorities(cfg.SSID) w.netState.SetPrimarySSID(w.Config().HotspotInterface, cfg.SSID) @@ -442,7 +443,7 @@ func (w *Provisioning) addOrUpdateConnection(cfg NetworkConfig) (bool, error) { func (w *Provisioning) lowerMaxNetPriorities(skip string) { for _, nw := range w.netState.LockingNetworks() { netKey := w.netState.GenNetKey(nw.interfaceName, nw.ssid) - if netKey == skip || netKey == w.netState.GenNetKey(w.Config().HotspotInterface, w.Config().hotspotSSID) || nw.priority < 999 || + if netKey == skip || netKey == w.netState.GenNetKey(w.Config().HotspotInterface, w.Config().HotspotSSID) || nw.priority < 999 || nw.netType != NetworkTypeWifi || (nw.interfaceName != "" && nw.interfaceName != w.Config().HotspotInterface) { continue } @@ -478,7 +479,7 @@ func (w *Provisioning) lowerMaxNetPriorities(skip string) { } func (w *Provisioning) checkConfigured() { - _, err := os.ReadFile(w.AppCfgPath) + _, err := os.ReadFile(utils.AppConfigFilePath) w.connState.setConfigured(err == nil) } @@ -492,7 +493,7 @@ func (w *Provisioning) tryCandidates(ctx context.Context) bool { } // in single mode we just need a connection - if !w.cfg.RoamingMode { + if !w.cfg.TurnOnHotspotIfWifiHasNoInternet { return true } @@ -519,14 +520,14 @@ func (w *Provisioning) getCandidates(ifName string) []string { configured := nw.conn != nil // firstSeen/lastTried are reset if a network disappears for more than a minute, so retry if it comes back (or 10 mins) - recentlyTried := nw.lastTried.After(nw.firstSeen) && nw.lastTried.After(time.Now().Add(time.Duration(w.cfg.FallbackTimeout)*-1)) + recentlyTried := nw.lastTried.After(nw.firstSeen) && nw.lastTried.After(time.Now().Add(time.Duration(w.cfg.RetryConnectionTimeoutMinutes)*-1)) if !nw.isHotspot && visible && configured && !recentlyTried { candidates = append(candidates, nw) } } - if !w.cfg.RoamingMode { + if !w.cfg.TurnOnHotspotIfWifiHasNoInternet { for _, nw := range candidates { if nw.ssid == w.netState.PrimarySSID(w.Config().HotspotInterface) { return []string{nw.ssid} @@ -589,7 +590,7 @@ func (w *Provisioning) mainLoop(ctx context.Context) { case userInput := <-inputChan: if userInput.RawConfig != "" || userInput.PartID != "" { w.logger.Info("Device config received") - err := WriteDeviceConfig(w.AppCfgPath, userInput) + err := WriteDeviceConfig(utils.AppConfigFilePath, userInput) if err != nil { w.errors.Add(err) w.logger.Error(err) @@ -604,10 +605,10 @@ func (w *Provisioning) mainLoop(ctx context.Context) { if userInput.SSID != "" { w.logger.Infof("Wifi settings received for %s", userInput.SSID) priority := int32(999) - if w.cfg.RoamingMode { + if w.cfg.TurnOnHotspotIfWifiHasNoInternet { priority = 100 } - cfg := NetworkConfig{ + cfg := utils.NetworkDefinition{ Type: NetworkTypeWifi, SSID: userInput.SSID, PSK: userInput.PSK, @@ -680,7 +681,7 @@ func (w *Provisioning) mainLoop(ctx context.Context) { } isConfigured := w.connState.getConfigured() allGood := isConfigured && (isConnected || isOnline) - if w.cfg.RoamingMode { + if w.cfg.TurnOnHotspotIfWifiHasNoInternet { allGood = isOnline && isConfigured hasConnectivity = isOnline lastConnectivity = lastOnline @@ -700,13 +701,13 @@ func (w *Provisioning) mainLoop(ctx context.Context) { // complex logic, so wasting some variables for readability // portal interaction time is updated when a user loads a page or makes a grpc request - inactivePortal := w.connState.getLastInteraction().Before(now.Add(time.Duration(w.cfg.UserTimeout)*-1)) || userInputReceived + inactivePortal := w.connState.getLastInteraction().Before(now.Add(time.Duration(w.cfg.UserIdleMinutes)*-1)) || userInputReceived // exit/retry to test networks only if there's no recent user interaction AND configuration is present haveCandidates := len(w.getCandidates(w.Config().HotspotInterface)) > 0 && inactivePortal && isConfigured // exit/retry every FallbackTimeout (10 minute default), unless user is active - fallbackHit := pModeChange.Before(now.Add(time.Duration(w.cfg.FallbackTimeout)*-1)) && inactivePortal + fallbackHit := pModeChange.Before(now.Add(time.Duration(w.cfg.RetryConnectionTimeoutMinutes)*-1)) && inactivePortal shouldExit := allGood || haveCandidates || fallbackHit @@ -731,14 +732,14 @@ func (w *Provisioning) mainLoop(ctx context.Context) { if w.tryCandidates(ctx) { hasConnectivity = w.connState.getConnected() || w.connState.getOnline() // if we're roaming or this network was JUST added, it must have internet - if w.cfg.RoamingMode { + if w.cfg.TurnOnHotspotIfWifiHasNoInternet { hasConnectivity = w.connState.getOnline() } if hasConnectivity { continue } lastConnectivity = w.connState.getLastConnected() - if w.cfg.RoamingMode { + if w.cfg.TurnOnHotspotIfWifiHasNoInternet { lastConnectivity = w.connState.getLastOnline() } } @@ -759,8 +760,8 @@ func (w *Provisioning) mainLoop(ctx context.Context) { w.logger.Errorf("failed to reboot after %s time", time.Minute*5) } - hitOfflineTimeout := lastConnectivity.Before(now.Add(time.Duration(w.cfg.OfflineTimeout)*-1)) && - pModeChange.Before(now.Add(time.Duration(w.cfg.OfflineTimeout)*-1)) + hitOfflineTimeout := lastConnectivity.Before(now.Add(time.Duration(w.cfg.OfflineBeforeStartingHotspotMinutes)*-1)) && + pModeChange.Before(now.Add(time.Duration(w.cfg.OfflineBeforeStartingHotspotMinutes)*-1)) // not in provisioning mode, so start it if not configured (/etc/viam.json) // OR as long as we've been offline AND out of provisioning mode for at least OfflineTimeout (2 minute default) if !isConfigured || hitOfflineTimeout { diff --git a/subsystems/provisioning/networkstate.go b/subsystems/networking/networkstate.go similarity index 99% rename from subsystems/provisioning/networkstate.go rename to subsystems/networking/networkstate.go index 62fd3b9..80ae285 100644 --- a/subsystems/provisioning/networkstate.go +++ b/subsystems/networking/networkstate.go @@ -1,4 +1,4 @@ -package provisioning +package networking import ( "fmt" diff --git a/subsystems/provisioning/portal.go b/subsystems/networking/portal.go similarity index 99% rename from subsystems/provisioning/portal.go rename to subsystems/networking/portal.go index 086a944..10a9f9e 100644 --- a/subsystems/provisioning/portal.go +++ b/subsystems/networking/portal.go @@ -1,4 +1,4 @@ -package provisioning +package networking import ( "embed" diff --git a/subsystems/provisioning/scanning.go b/subsystems/networking/scanning.go similarity index 99% rename from subsystems/provisioning/scanning.go rename to subsystems/networking/scanning.go index 5eeaf13..50ebac3 100644 --- a/subsystems/provisioning/scanning.go +++ b/subsystems/networking/scanning.go @@ -1,4 +1,4 @@ -package provisioning +package networking // This file includes functions used for wifi scans. @@ -215,7 +215,7 @@ func (w *Provisioning) updateKnownConnections(ctx context.Context) error { nw.conn = conn nw.priority = getPriorityFromSettings(settings) - if nw.ssid == w.Config().hotspotSSID { + if nw.ssid == w.Config().HotspotSSID { nw.netType = NetworkTypeHotspot nw.isHotspot = true } else if nw.priority > highestPriority[ifName] { diff --git a/subsystems/provisioning/setup.go b/subsystems/networking/setup.go similarity index 98% rename from subsystems/provisioning/setup.go rename to subsystems/networking/setup.go index 4295730..be6f428 100644 --- a/subsystems/provisioning/setup.go +++ b/subsystems/networking/setup.go @@ -1,4 +1,4 @@ -package provisioning +package networking // This file includes functions used only once during startup in NewNMWrapper() @@ -22,7 +22,7 @@ var ( func (w *Provisioning) writeDNSMasq() error { DNSMasqContents := DNSMasqContentsRedirect - if w.cfg.DisableDNSRedirect { + if w.cfg.DisableCaptivePortalRedirect { DNSMasqContents = DNSMasqContentsSetupOnly } diff --git a/subsystems/provisioning/templates/base.html b/subsystems/networking/templates/base.html similarity index 100% rename from subsystems/provisioning/templates/base.html rename to subsystems/networking/templates/base.html diff --git a/subsystems/provisioning/templates/index.html b/subsystems/networking/templates/index.html similarity index 100% rename from subsystems/provisioning/templates/index.html rename to subsystems/networking/templates/index.html diff --git a/subsystems/provisioning/definitions.go b/subsystems/provisioning/definitions.go deleted file mode 100644 index d838743..0000000 --- a/subsystems/provisioning/definitions.go +++ /dev/null @@ -1,484 +0,0 @@ -package provisioning - -import ( - "context" - "encoding/json" - "errors" - "io/fs" - "os" - "sync" - "time" - - gnm "github.com/Otterverse/gonetworkmanager/v2" - errw "github.com/pkg/errors" - agentpb "go.viam.com/api/app/agent/v1" - pb "go.viam.com/api/provisioning/v1" -) - -// This file contains type, const, and var definitions. - -const ( - SubsysName = "agent-provisioning" - - DNSMasqFilepath = "/etc/NetworkManager/dnsmasq-shared.d/80-viam.conf" - DNSMasqContentsRedirect = "address=/#/10.42.0.1\n" - DNSMasqContentsSetupOnly = "address=/.setup/10.42.0.1\n" - - PortalBindAddr = "10.42.0.1" - - ConnCheckFilepath = "/etc/NetworkManager/conf.d/80-viam.conf" - ConnCheckContents = "[connectivity]\nuri=http://packages.viam.com/check_network_status.txt\ninterval=300\n" - - wifiPowerSaveFilepath = "/etc/NetworkManager/conf.d/81-viam-wifi-powersave.conf" - wifiPowerSaveContentsDefault = "# This file intentionally left blank.\n" - wifiPowerSaveContentsDisable = "[connection]\n# Explicitly disable\nwifi.powersave = 2\n" - wifiPowerSaveContentsEnable = "[connection]\n# Explicitly enable\nwifi.powersave = 3\n" - NetworkTypeWifi = "wifi" - NetworkTypeWired = "wired" - NetworkTypeHotspot = "hotspot" - - HealthCheckTimeout = time.Minute -) - -var ( - DefaultConf = Config{ - Manufacturer: "viam", - Model: "custom", - FragmentID: "", - HotspotPrefix: "viam-setup", - HotspotPassword: "viamsetup", - DisableDNSRedirect: false, - RoamingMode: false, - OfflineTimeout: Timeout(time.Minute * 2), - UserTimeout: Timeout(time.Minute * 5), - FallbackTimeout: Timeout(time.Minute * 10), - DeviceRebootAfterOfflineMinutes: Timeout(0), - Networks: []NetworkConfig{}, - } - - // Can be overwritten via cli arguments. - AppConfigFilePath = "/etc/viam.json" - ProvisioningConfigFilePath = "/etc/viam-provisioning.json" - - ErrBadPassword = errors.New("bad or missing password") - ErrConnCheckDisabled = errors.New("NetworkManager connectivity checking disabled by user, network management will be unavailable") - ErrNoActiveConnectionFound = errors.New("no active connection found") - scanLoopDelay = time.Second * 15 - scanTimeout = time.Second * 30 - connectTimeout = time.Second * 50 // longer than the 45 second timeout in NetworkManager -) - -type lockingNetwork struct { - mu sync.Mutex - network -} - -type network struct { - netType string - ssid string - security string - signal uint8 - priority int32 - isHotspot bool - - firstSeen time.Time - lastSeen time.Time - - lastTried time.Time - connected bool - lastConnected time.Time - lastError error - interfaceName string - - conn gnm.Connection -} - -func (n *network) getInfo() NetworkInfo { - var errStr string - if n.lastError != nil { - errStr = n.lastError.Error() - } - - return NetworkInfo{ - Type: n.netType, - SSID: n.ssid, - Security: n.security, - Signal: int32(n.signal), - Connected: n.connected, - LastError: errStr, - } -} - -type NetworkInfo struct { - Type string - SSID string - Security string - Signal int32 - Connected bool - LastError string -} - -func NetworkInfoToProto(net *NetworkInfo) *pb.NetworkInfo { - return &pb.NetworkInfo{ - Type: net.Type, - Ssid: net.SSID, - Security: net.Security, - Signal: net.Signal, - Connected: net.Connected, - LastError: net.LastError, - } -} - -func NetworkInfoFromProto(buf *pb.NetworkInfo) *NetworkInfo { - return &NetworkInfo{ - Type: buf.GetType(), - SSID: buf.GetSsid(), - Security: buf.GetSecurity(), - Signal: buf.GetSignal(), - Connected: buf.GetConnected(), - LastError: buf.GetLastError(), - } -} - -type NetworkConfig struct { - // "wifi", "wired", "wifi-static", "wired-static" - Type string `json:"type"` - - // name of interface, ex: "wlan0", "eth0", "enp14s0", etc. - Interface string `json:"interface"` - - // Wifi Settings - SSID string `json:"ssid"` - PSK string `json:"psk"` - - // Autoconnect Priority (primarily for wifi) - // higher values are preferred/tried first - // defaults to 0, but wifi networks added via hotspot are set to 999 when not in roaming mode - Priority int32 `json:"priority"` - - // CIDR format address, ex: 192.168.0.1/24 - // If unset, will default to "auto" (dhcp) - IPv4Address string `json:"ipv4_address"` - IPv4Gateway string `json:"ipv4_gateway"` - - // optional - IPv4DNS []string `json:"ipv4_dns"` - - // optional, 0 or -1 is default - // lower values are preferred (lower "cost") - // wired networks default to 100 - // wireless networks default to 600 - IPv4RouteMetric int64 `json:"ipv4_route_metric"` -} - -// MachineConfig represents the minimal needed for /etc/viam.json. -type MachineConfig struct { - Cloud *CloudConfig `json:"cloud"` -} - -type CloudConfig struct { - AppAddress string `json:"app_address"` - ID string `json:"id"` - Secret string `json:"secret"` -} - -func WriteDeviceConfig(file string, input userInput) error { - if input.RawConfig != "" { - return os.WriteFile(file, []byte(input.RawConfig), 0o600) - } - - cfg := &MachineConfig{ - Cloud: &CloudConfig{ - AppAddress: input.AppAddr, - ID: input.PartID, - Secret: input.Secret, - }, - } - - jsonBytes, err := json.Marshal(cfg) - if err != nil { - return err - } - return os.WriteFile(file, jsonBytes, 0o600) -} - -type portalData struct { - mu sync.Mutex - Updated time.Time - - inputChan chan<- userInput - - input *userInput - workers sync.WaitGroup - - // used to cancel background threads - cancel context.CancelFunc -} - -// must be called with p.mu already locked! -func (p *portalData) sendInput(connState *connectionState) { - input := *p.input - - // in case both network and device credentials are being updated - // only send user data if both are already set - if (input.SSID != "" && input.PartID != "") || - (input.SSID != "" && connState.getConfigured()) || - (input.PartID != "" && connState.getOnline()) { - p.input = &userInput{} - p.inputChan <- input - if p.cancel != nil { - p.cancel() - } - return - } - // if not, wait 10 seconds for full input - if p.cancel != nil { - p.cancel() - } - - ctx, cancel := context.WithCancel(context.Background()) - p.cancel = cancel - - p.workers.Add(1) - go func() { - defer p.workers.Done() - p.mu.Lock() - defer p.mu.Unlock() - select { - case <-ctx.Done(): - return - case <-time.After(time.Second * 10): - } - p.input = &userInput{} - p.inputChan <- input - }() -} - -type userInput struct { - // network - SSID string - PSK string - - // device credentials - PartID string - Secret string - AppAddr string - - // raw /etc/viam.json contents - RawConfig string -} - -func ConfigFromJSON(defaultConf Config, jsonBytes []byte) (*Config, error) { - minTimeout := Timeout(time.Second * 15) - conf := defaultConf - if err := json.Unmarshal(jsonBytes, &conf); err != nil { - return &defaultConf, err - } - - if conf.Manufacturer == "" || conf.Model == "" || conf.HotspotPrefix == "" || conf.HotspotPassword == "" { - return &defaultConf, errw.New("values in configs/attributes should not be empty, please omit empty fields entirely") - } - - var haveBadTimeout bool - if conf.OfflineTimeout < minTimeout { - conf.OfflineTimeout = defaultConf.OfflineTimeout - haveBadTimeout = true - } - - if conf.UserTimeout < minTimeout { - conf.UserTimeout = defaultConf.UserTimeout - haveBadTimeout = true - } - - if conf.FallbackTimeout < minTimeout { - conf.FallbackTimeout = defaultConf.FallbackTimeout - haveBadTimeout = true - } - - if haveBadTimeout { - return &conf, errw.Errorf("timeout values cannot be less than %s", time.Duration(minTimeout)) - } - - if conf.DeviceRebootAfterOfflineMinutes != 0 && - (conf.DeviceRebootAfterOfflineMinutes < conf.OfflineTimeout || conf.DeviceRebootAfterOfflineMinutes < conf.UserTimeout) { - badOffline := conf.DeviceRebootAfterOfflineMinutes - conf.DeviceRebootAfterOfflineMinutes = defaultConf.DeviceRebootAfterOfflineMinutes - return &conf, errw.Errorf("device_reboot_after_offline_minutes (%s) cannot be less than offline_timeout (%s) or user_timeout (%s)", - time.Duration(badOffline), time.Duration(conf.OfflineTimeout), time.Duration(conf.UserTimeout)) - } - - return &conf, nil -} - -func LoadConfig(updateConf *agentpb.DeviceSubsystemConfig) (*Config, error) { - newCfg := DefaultConf - cfg := &newCfg - - // config from disk (/etc/viam-provisioning.json) - jsonBytes, err := os.ReadFile(ProvisioningConfigFilePath) - if err != nil { - if !errors.Is(err, fs.ErrNotExist) { - return nil, err - } - } - if err == nil { - cfg, err = ConfigFromJSON(DefaultConf, jsonBytes) - if err != nil { - return cfg, errw.Wrap(err, "parsing viam-provisioning.json") - } - } - - // update with config from cloud (subsys attributes) - jsonBytes, err = updateConf.GetAttributes().MarshalJSON() - if err != nil { - return cfg, errw.Wrap(err, "marshaling JSON from attributes") - } - - cfg, err = ConfigFromJSON(*cfg, jsonBytes) - if err != nil { - return cfg, errw.Wrap(err, "parsing JSON from attributes") - } - - return cfg, nil -} - -// Config represents the json configurations parsed from either agent-provisioning.json OR passed from the "attributes" in the cloud config. -type Config struct { - // Things typically set in agent-provisioning.json - Manufacturer string `json:"manufacturer"` - Model string `json:"model"` - FragmentID string `json:"fragment_id"` - - // The interface to use for hotspot/provisioning/wifi management. Ex: "wlan0" - // Defaults to the first discovered 802.11 device - HotspotInterface string `json:"hotspot_interface"` - // The prefix to prepend to the hotspot name. - HotspotPrefix string `json:"hotspot_prefix"` - // Password required to connect to the hotspot. - HotspotPassword string `json:"hotspot_password"` - // If true, mobile (phone) users connecting to the hotspot won't be automatically redirected to the web portal. - DisableDNSRedirect bool `json:"disable_dns_redirect"` - - // How long without a connection before starting provisioning (hotspot) mode. - OfflineTimeout Timeout `json:"offline_timeout"` - - // How long since the last user interaction (via GRPC/app or web portal) before the state machine can resume. - UserTimeout Timeout `json:"user_timeout"` - - // If not "online", always drop out of hotspot mode and retry everything after this time limit. - FallbackTimeout Timeout `json:"fallback_timeout"` - - // When true, will try all known networks looking for internet (global) connectivity. - // Otherwise, will only try the primary wifi network and consider that sufficient if connected (regardless of global connectivity.) - RoamingMode bool `json:"roaming_mode"` - - // Additional networks to add/configure. Only useful in RoamingMode. - Networks []NetworkConfig `json:"networks"` - - // Computed from HotspotPrefix and Manufacturer - hotspotSSID string - - // If set, will explicitly enable or disable power save for all wifi connections managed by NetworkManager. - WifiPowerSave *bool `json:"wifi_power_save"` - - // If set, will reboot the device after it has been offline for this duration - // 0, default, will disable this feature. - DeviceRebootAfterOfflineMinutes Timeout `json:"device_reboot_after_offline_minutes"` -} - -// Timeout allows parsing golang-style durations (1h20m30s) OR minutes-as-float from/to json. -type Timeout time.Duration - -func (t Timeout) MarshalJSON() ([]byte, error) { - return json.Marshal(time.Duration(t).String()) -} - -func (t *Timeout) UnmarshalJSON(b []byte) error { - var v any - if err := json.Unmarshal(b, &v); err != nil { - return err - } - switch value := v.(type) { - case float64: - *t = Timeout(value * float64(time.Minute)) - return nil - case string: - tmp, err := time.ParseDuration(value) - if err != nil { - return err - } - *t = Timeout(tmp) - return nil - default: - return errw.Errorf("invalid duration: %+v", v) - } -} - -type health struct { - mu sync.Mutex - last time.Time -} - -func (h *health) MarkGood() { - h.mu.Lock() - defer h.mu.Unlock() - h.last = time.Now() -} - -func (h *health) Sleep(ctx context.Context, timeout time.Duration) bool { - select { - case <-ctx.Done(): - return false - case <-time.After(timeout): - h.mu.Lock() - defer h.mu.Unlock() - h.last = time.Now() - return true - } -} - -func (h *health) IsHealthy() bool { - h.mu.Lock() - defer h.mu.Unlock() - return time.Since(h.last) < HealthCheckTimeout -} - -type errorList struct { - mu sync.Mutex - errors []error -} - -func (e *errorList) Add(err ...error) { - e.mu.Lock() - defer e.mu.Unlock() - e.errors = append(e.errors, err...) -} - -func (e *errorList) Clear() { - e.mu.Lock() - defer e.mu.Unlock() - e.errors = []error{} -} - -func (e *errorList) Errors() []error { - e.mu.Lock() - defer e.mu.Unlock() - return e.errors -} - -type banner struct { - mu sync.Mutex - banner string -} - -func (b *banner) Set(banner string) { - b.mu.Lock() - defer b.mu.Unlock() - b.banner = banner -} - -func (b *banner) Get() string { - b.mu.Lock() - defer b.mu.Unlock() - return b.banner -} diff --git a/subsystems/registry/registry.go b/subsystems/registry/registry.go deleted file mode 100644 index f9c62fc..0000000 --- a/subsystems/registry/registry.go +++ /dev/null @@ -1,51 +0,0 @@ -// Package registry is used to register subsystems from other packages. -package registry - -import ( - "context" - "sync" - - "github.com/viamrobotics/agent/subsystems" - pb "go.viam.com/api/app/agent/v1" - "go.viam.com/rdk/logging" -) - -var ( - mu sync.Mutex - creators = map[string]CreatorFunc{} -) - -type CreatorFunc func(ctx context.Context, logger logging.Logger, updateConf *pb.DeviceSubsystemConfig) (subsystems.Subsystem, error) - -func Register(name string, creator CreatorFunc) { - mu.Lock() - defer mu.Unlock() - creators[name] = creator -} - -func Deregister(name string) { - mu.Lock() - defer mu.Unlock() - delete(creators, name) -} - -func GetCreator(name string) CreatorFunc { - mu.Lock() - defer mu.Unlock() - creator, ok := creators[name] - if ok { - return creator - } - return nil -} - -func List() []string { - mu.Lock() - defer mu.Unlock() - //nolint:prealloc - var names []string - for k := range creators { - names = append(names, k) - } - return names -} diff --git a/subsystems/subsystems.go b/subsystems/subsystems.go index fcc0035..373504f 100644 --- a/subsystems/subsystems.go +++ b/subsystems/subsystems.go @@ -4,7 +4,7 @@ package subsystems import ( "context" - pb "go.viam.com/api/app/agent/v1" + "github.com/viamrobotics/agent/utils" ) type Subsystem interface { @@ -15,11 +15,8 @@ type Subsystem interface { Stop(ctx context.Context) error // Update validates and/or updates a subsystem, returns true if subsystem should be restarted - Update(ctx context.Context, cfg *pb.DeviceSubsystemConfig) (bool, error) + Update(ctx context.Context, cfg utils.AgentConfig) bool // HealthCheck reports if a subsystem is running correctly (it is restarted if not) HealthCheck(ctx context.Context) error - - // Version returns the current version of the subsystem - Version() string } diff --git a/subsystems/syscfg/logging.go b/subsystems/syscfg/logging.go index 20e8ef2..3aa3c55 100644 --- a/subsystems/syscfg/logging.go +++ b/subsystems/syscfg/logging.go @@ -4,15 +4,15 @@ package syscfg import ( "errors" + "fmt" "io/fs" "os" "os/exec" - "regexp" errw "github.com/pkg/errors" sysd "github.com/sergeymakinen/go-systemdconf/v2" "github.com/sergeymakinen/go-systemdconf/v2/conf" - "github.com/viamrobotics/agent" + "github.com/viamrobotics/agent/utils" ) var ( @@ -20,17 +20,10 @@ var ( defaultLogLimit = "512M" ) -type LogConfig struct { - Disable bool `json:"disable"` - SystemMaxUse string `json:"system_max_use"` - RuntimeMaxUse string `json:"runtime_max_use"` -} - func (s *syscfg) EnforceLogging() error { s.mu.RLock() - cfg := s.cfg.Logging - s.mu.RUnlock() - if cfg.Disable { + defer s.mu.RUnlock() + if s.cfg.LoggingJournaldRuntimeMaxUseMegabytes < 0 || s.cfg.LoggingJournaldSystemMaxUseMegabytes < 0 { if err := os.Remove(journaldConfPath); err != nil { if errw.Is(err, fs.ErrNotExist) { return nil @@ -39,7 +32,7 @@ func (s *syscfg) EnforceLogging() error { } // if journald is NOT enabled, simply return - //nolint:nilerr + if err := checkJournaldEnabled(); err != nil { return nil } @@ -56,22 +49,17 @@ func (s *syscfg) EnforceLogging() error { return err } - persistSize := cfg.SystemMaxUse - tempSize := cfg.RuntimeMaxUse + persistSize := fmt.Sprintf("%dM", s.cfg.LoggingJournaldSystemMaxUseMegabytes) + tempSize := fmt.Sprintf("%dM", s.cfg.LoggingJournaldRuntimeMaxUseMegabytes) - if persistSize == "" { + if persistSize == "0M" { persistSize = defaultLogLimit } - if tempSize == "" { + if tempSize == "0M" { tempSize = defaultLogLimit } - sizeRegEx := regexp.MustCompile(`^[0-9]+[KMGTPE]$`) - if !(sizeRegEx.MatchString(persistSize) && sizeRegEx.MatchString(tempSize)) { - return errw.New("logfile size limits must be specificed in bytes, with one optional suffix character [KMGTPE]") - } - journalConf := &conf.JournaldFile{ Journal: conf.JournaldJournalSection{ SystemMaxUse: sysd.Value{persistSize}, @@ -84,7 +72,7 @@ func (s *syscfg) EnforceLogging() error { return errw.Wrapf(err, "marshaling new file for %s", journaldConfPath) } - isNew, err1 := agent.WriteFileIfNew(journaldConfPath, newFileBytes) + isNew, err1 := utils.WriteFileIfNew(journaldConfPath, newFileBytes) if err1 != nil { // We may have written a corrupt file, try to remove to salvage at least default behavior. if err := os.RemoveAll(journaldConfPath); err != nil { diff --git a/subsystems/syscfg/syscfg.go b/subsystems/syscfg/syscfg.go index 7e83598..a64d0fd 100644 --- a/subsystems/syscfg/syscfg.go +++ b/subsystems/syscfg/syscfg.go @@ -8,79 +8,46 @@ import ( "sync" errw "github.com/pkg/errors" - "github.com/viamrobotics/agent" "github.com/viamrobotics/agent/subsystems" - "github.com/viamrobotics/agent/subsystems/registry" - pb "go.viam.com/api/app/agent/v1" + "github.com/viamrobotics/agent/utils" "go.viam.com/rdk/logging" ) -func init() { - registry.Register(SubsysName, NewSubsystem) -} - const ( SubsysName = "agent-syscfg" ) -type Config struct { - Logging LogConfig `json:"logging"` - Upgrades UpgradesConfig `json:"upgrades"` -} - type syscfg struct { - mu sync.RWMutex - healthy bool - cfg Config - logger logging.Logger - running bool - disabled bool - cancel context.CancelFunc - workers sync.WaitGroup + mu sync.RWMutex + healthy bool + cfg utils.SystemConfiguration + logger logging.Logger + running bool + cancel context.CancelFunc + workers sync.WaitGroup } -func NewSubsystem(ctx context.Context, logger logging.Logger, updateConf *pb.DeviceSubsystemConfig) (subsystems.Subsystem, error) { - cfg, err := agent.ConvertAttributes[Config](updateConf.GetAttributes()) - if err != nil { - return nil, err +func NewSubsystem(ctx context.Context, logger logging.Logger, cfg utils.AgentConfig) subsystems.Subsystem { + return &syscfg{ + logger: logger, + cfg: cfg.SystemConfiguration, } - - return &syscfg{cfg: *cfg, logger: logger, disabled: updateConf.GetDisable()}, nil } -func (s *syscfg) Update(ctx context.Context, cfg *pb.DeviceSubsystemConfig) (bool, error) { +func (s *syscfg) Update(ctx context.Context, cfg utils.AgentConfig) (needRestart bool) { s.mu.Lock() defer s.mu.Unlock() - var needRestart bool - if cfg.GetDisable() != s.disabled { - s.disabled = cfg.GetDisable() - if s.disabled { - s.logger.Infof("agent-syscfg disabled") - } + if !reflect.DeepEqual(cfg.SystemConfiguration, s.cfg) { needRestart = true } - if s.disabled { - return needRestart, nil - } - - newConf, err := agent.ConvertAttributes[Config](cfg.GetAttributes()) - if err != nil { - return needRestart, err - } - - if reflect.DeepEqual(newConf, s.cfg) { - return needRestart, nil - } - - needRestart = true - s.cfg = *newConf - return needRestart, nil + s.cfg = cfg.SystemConfiguration + return } func (s *syscfg) Version() string { - return agent.GetVersion() + return utils.GetVersion() } func (s *syscfg) Start(ctx context.Context) error { @@ -92,10 +59,6 @@ func (s *syscfg) Start(ctx context.Context) error { return errors.New("already running") } - if s.disabled { - return agent.ErrSubsystemDisabled - } - cancelCtx, cancelFunc := context.WithCancel(ctx) s.cancel = cancelFunc s.running = true @@ -148,7 +111,7 @@ func (s *syscfg) Stop(ctx context.Context) error { func (s *syscfg) HealthCheck(ctx context.Context) error { s.mu.RLock() defer s.mu.RUnlock() - if s.healthy || s.disabled { + if s.healthy { return nil } return errors.New("healthcheck failed") diff --git a/subsystems/syscfg/upgrades.go b/subsystems/syscfg/upgrades.go index b33d534..f4529ad 100644 --- a/subsystems/syscfg/upgrades.go +++ b/subsystems/syscfg/upgrades.go @@ -11,7 +11,7 @@ import ( "strings" errw "github.com/pkg/errors" - "github.com/viamrobotics/agent" + "github.com/viamrobotics/agent/utils" ) const ( @@ -22,21 +22,12 @@ const ( unattendedUpgradesPath = "/etc/apt/apt.conf.d/50unattended-upgrades" ) -type UpgradesConfig struct { - // Type can be - // Empty/missing ("") to make no changes - // "disable" (or "disabled") to disable auto-upgrades - // "security" to enable ONLY security upgrades - // "all" to enable upgrades from all configured sources - Type string `json:"type"` -} - func (s *syscfg) EnforceUpgrades(ctx context.Context) error { s.mu.RLock() - cfg := s.cfg.Upgrades + cfg := s.cfg.OSAutoUpgradeType s.mu.RUnlock() - if cfg.Type == "" { + if cfg == "" { return nil } @@ -45,8 +36,8 @@ func (s *syscfg) EnforceUpgrades(ctx context.Context) error { return err } - if cfg.Type == "disable" || cfg.Type == "disabled" { - isNew, err := agent.WriteFileIfNew(autoUpgradesPath, []byte(autoUpgradesContentsDisabled)) + if cfg == "disable" || cfg == "disabled" { + isNew, err := utils.WriteFileIfNew(autoUpgradesPath, []byte(autoUpgradesContentsDisabled)) if err != nil { return err } @@ -64,18 +55,18 @@ func (s *syscfg) EnforceUpgrades(ctx context.Context) error { } } - securityOnly := cfg.Type == "security" + securityOnly := cfg == "security" confContents, err := generateOrigins(securityOnly) if err != nil { return err } - isNew1, err := agent.WriteFileIfNew(autoUpgradesPath, []byte(autoUpgradesContentsEnabled)) + isNew1, err := utils.WriteFileIfNew(autoUpgradesPath, []byte(autoUpgradesContentsEnabled)) if err != nil { return err } - isNew2, err := agent.WriteFileIfNew(unattendedUpgradesPath, []byte(confContents)) + isNew2, err := utils.WriteFileIfNew(unattendedUpgradesPath, []byte(confContents)) if err != nil { return err } diff --git a/subsystems/viamserver/viamserver.go b/subsystems/viamserver/viamserver.go index 59af3a7..eccc362 100644 --- a/subsystems/viamserver/viamserver.go +++ b/subsystems/viamserver/viamserver.go @@ -12,46 +12,23 @@ import ( "regexp" "strings" "sync" - "sync/atomic" "syscall" "time" errw "github.com/pkg/errors" - "github.com/viamrobotics/agent" "github.com/viamrobotics/agent/subsystems" - "github.com/viamrobotics/agent/subsystems/registry" - pb "go.viam.com/api/app/agent/v1" + "github.com/viamrobotics/agent/utils" "go.viam.com/rdk/logging" - "go.viam.com/utils" - "google.golang.org/protobuf/types/known/structpb" + goutils "go.viam.com/utils" ) -func init() { - globalConfig.Store(&viamServerConfig{startTimeout: defaultStartTimeout}) - registry.Register(SubsysName, NewSubsystem) -} - -type viamServerConfig struct { - startTimeout time.Duration -} - const ( - defaultStartTimeout = time.Minute * 5 // stopTermTimeout must be higher than viam-server shutdown timeout of 90 secs. stopTermTimeout = time.Minute * 2 stopKillTimeout = time.Second * 10 - fastStartName = "fast_start" SubsysName = "viam-server" ) -var ( - ConfigFilePath = "/etc/viam.json" - - // Set if (cached or cloud) config has the "fast_start" attribute set on the viam-server subsystem. - FastStart atomic.Bool - globalConfig atomic.Pointer[viamServerConfig] -) - // RestartStatusResponse is the http/json response from viam_server's /health_check URL // This MUST remain in sync with RDK. type RestartStatusResponse struct { @@ -61,14 +38,15 @@ type RestartStatusResponse struct { } type viamServer struct { - mu sync.Mutex - cmd *exec.Cmd - running bool - shouldRun bool - lastExit int - exitChan chan struct{} - checkURL string - checkURLAlt string + mu sync.Mutex + cmd *exec.Cmd + running bool + shouldRun bool + lastExit int + exitChan chan struct{} + startTimeout time.Duration + checkURL string + checkURLAlt string // for blocking start/stop/check ops while another is in progress startStopMu sync.Mutex @@ -76,39 +54,6 @@ type viamServer struct { logger logging.Logger } -// helper to parse a duration, otherwise return a default. -func durationFromProtoStruct( - logger logging.Logger, protoStruct *structpb.Struct, key string, defaultValue time.Duration, -) time.Duration { - if protoStruct == nil { - return defaultValue - } - asMap := protoStruct.AsMap() - raw, ok := asMap[key] - if !ok { - return defaultValue - } - str, ok := raw.(string) - if !ok { - return defaultValue - } - durt, err := time.ParseDuration(str) - if err != nil { - logger.Warnf("unparseable duration string at %s: %s, error %s", key, str, err) - return defaultValue - } - logger.Debugf("parsed duration %s from key %s", durt.String(), key) - return durt -} - -func configFromProto(logger logging.Logger, updateConf *pb.DeviceSubsystemConfig) *viamServerConfig { - ret := &viamServerConfig{} - if updateConf != nil { - ret.startTimeout = durationFromProtoStruct(logger, updateConf.GetAttributes(), "start_timeout", defaultStartTimeout) - } - return ret -} - func (s *viamServer) Start(ctx context.Context) error { s.startStopMu.Lock() defer s.startStopMu.Unlock() @@ -126,11 +71,11 @@ func (s *viamServer) Start(ctx context.Context) error { s.shouldRun = true } - stdio := agent.NewMatchingLogger(s.logger, false, false) - stderr := agent.NewMatchingLogger(s.logger, true, false) + stdio := utils.NewMatchingLogger(s.logger, false, false) + stderr := utils.NewMatchingLogger(s.logger, true, false) //nolint:gosec - s.cmd = exec.Command(path.Join(agent.ViamDirs["bin"], SubsysName), "-config", ConfigFilePath) - s.cmd.Dir = agent.ViamDirs["viam"] + s.cmd = exec.Command(path.Join(utils.ViamDirs["bin"], SubsysName), "-config", utils.AppConfigFilePath) + s.cmd.Dir = utils.ViamDirs["viam"] s.cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} s.cmd.Stdout = stdio s.cmd.Stderr = stderr @@ -187,7 +132,7 @@ func (s *viamServer) Start(ctx context.Context) error { return nil case <-ctx.Done(): return ctx.Err() - case <-time.After(globalConfig.Load().startTimeout): + case <-time.After(s.startTimeout): return errw.New("startup timed out") case <-s.exitChan: return errw.New("startup failed") @@ -293,7 +238,7 @@ func (s *viamServer) HealthCheck(ctx context.Context) (errRet error) { } defer func() { - utils.UncheckedError(resp.Body.Close()) + goutils.UncheckedError(resp.Body.Close()) }() if resp.StatusCode < 200 || resp.StatusCode >= 300 { @@ -338,7 +283,7 @@ func (s *viamServer) isRestartAllowed(ctx context.Context) (bool, error) { } defer func() { - utils.UncheckedError(resp.Body.Close()) + goutils.UncheckedError(resp.Body.Close()) }() if resp.StatusCode < 200 || resp.StatusCode >= 300 { @@ -358,57 +303,51 @@ func (s *viamServer) isRestartAllowed(ctx context.Context) (bool, error) { return false, nil } -func (s *viamServer) Update(ctx context.Context, cfg *pb.DeviceSubsystemConfig, newVersion bool) (bool, error) { +func (s *viamServer) Update(ctx context.Context, cfg utils.AgentConfig) (needRestart bool) { s.mu.Lock() defer s.mu.Unlock() - setFastStart(cfg) - - // By default, return false on the needRestart flag, as we await the user to - // kill/restart viam-server directly. - var needRestart bool + s.startTimeout = time.Duration(cfg.AdvancedSettings.ViamServerStartTimeoutMinutes) + return false +} - if newVersion && s.running { - s.shouldRun = false +func (s *viamServer) SafeToRestart(ctx context.Context) bool { + s.mu.Lock() + defer s.mu.Unlock() - // viam-server can be safely restarted even while running if the process - // has reported it is safe to do so through its `restart_status` HTTP - // endpoint. - restartAllowed, err := s.isRestartAllowed(ctx) - if err != nil { - return needRestart, err - } - if restartAllowed { - s.logger.Infof("will restart %s to run new version, as it has reported allowance of a restart", - SubsysName) - needRestart = true - } else { - s.logger.Infof("will not restart %s version to run new version, as it has not reported"+ - "allowance of a restart", SubsysName) - } + if !s.running { + return true } - globalConfig.Store(configFromProto(s.logger, cfg)) + s.shouldRun = false - return needRestart, nil + // viam-server can be safely restarted even while running if the process + // has reported it is safe to do so through its `restart_status` HTTP + // endpoint. + restartAllowed, err := s.isRestartAllowed(ctx) + if err != nil { + s.logger.Error(err) + return restartAllowed + } + if restartAllowed { + s.logger.Infof("will restart %s to run new version, as it has reported allowance of a restart", SubsysName) + } else { + s.logger.Infof("will not restart %s version to run new version, as it has not reported allowance of a restart", SubsysName) + } + return restartAllowed } -func NewSubsystem(ctx context.Context, logger logging.Logger, updateConf *pb.DeviceSubsystemConfig) (subsystems.Subsystem, error) { - setFastStart(updateConf) - - globalConfig.Store(configFromProto(logger, updateConf)) - return agent.NewAgentSubsystem(ctx, SubsysName, logger, &viamServer{logger: logger}) +// SMURF IMPLEMENT. +func (s *viamServer) Version() string { + return "" } -func setFastStart(cfg *pb.DeviceSubsystemConfig) { - if cfg != nil { - cfgVal, ok := cfg.GetAttributes().AsMap()[fastStartName] - if ok { - cfgBool, ok := cfgVal.(bool) - if ok { - FastStart.Store(cfgBool) - return - } - } +func NewSubsystem(ctx context.Context, logger logging.Logger, cfg utils.AgentConfig) subsystems.Subsystem { + return &viamServer{ + logger: logger, + startTimeout: time.Duration(cfg.AdvancedSettings.ViamServerStartTimeoutMinutes), } - FastStart.Store(false) +} + +type RestartCheck interface { + SafeToRestart(ctx context.Context) bool } diff --git a/utils/config.go b/utils/config.go new file mode 100644 index 0000000..e8a3095 --- /dev/null +++ b/utils/config.go @@ -0,0 +1,426 @@ +package utils + +import ( + "encoding/json" + "errors" + "io/fs" + "os" + "path/filepath" + "strings" + "time" + + errw "github.com/pkg/errors" + "github.com/tidwall/jsonc" + pb "go.viam.com/api/app/agent/v1" + "google.golang.org/protobuf/types/known/structpb" +) + +var ( + DefaultConfiguration = AgentConfig{ + AdvancedSettings{ + Debug: false, + WaitForUpdateCheck: false, + DisableViamServer: false, + DisableNetworkConfiguration: false, + DisableSystemConfiguration: false, + ViamServerStartTimeoutMinutes: Timeout(time.Minute * 10), + }, + SystemConfiguration{ + LoggingJournaldSystemMaxUseMegabytes: 512, + LoggingJournaldRuntimeMaxUseMegabytes: 512, + OSAutoUpgradeType: "", + }, + NetworkConfiguration{ + Manufacturer: "viam", + Model: "custom", + FragmentID: "", + HotspotInterface: "", + HotspotPrefix: "viam-setup", + HotspotPassword: "viamsetup", + DisableCaptivePortalRedirect: false, + TurnOnHotspotIfWifiHasNoInternet: false, + WifiPowerSave: nil, + OfflineBeforeStartingHotspotMinutes: Timeout(time.Minute * 2), + UserIdleMinutes: Timeout(time.Minute * 5), + RetryConnectionTimeoutMinutes: Timeout(time.Minute * 10), + DeviceRebootAfterOfflineMinutes: Timeout(0), + HotspotSSID: "", + }, + AdditionalNetworks{}, + } + + configCacheFilename = "config_cache.json" + + // Can be overwritten via cli arguments. + AppConfigFilePath = "/etc/viam.json" + DefaultsFilePath = "/etc/viam-defaults.json" + CLIDebug = false + CLIWaitForUpdateCheck = false +) + +type AgentConfig struct { + AdvancedSettings AdvancedSettings `json:"advanced_settings"` + SystemConfiguration SystemConfiguration `json:"system_configuration"` + NetworkConfiguration NetworkConfiguration `json:"network_configuration"` + AdditionalNetworks AdditionalNetworks `json:"additional_networks"` +} + +type AdvancedSettings struct { + Debug bool `json:"debug"` + WaitForUpdateCheck bool `json:"wait_for_update_check"` + DisableViamServer bool `json:"disable_viam_server"` + DisableNetworkConfiguration bool `json:"disable_network_configuration"` + DisableSystemConfiguration bool `json:"disable_system_configuration"` + ViamServerStartTimeoutMinutes Timeout `json:"viam_server_start_timeout_minutes"` +} + +type SystemConfiguration struct { + // can set either to -1 to disable, defaults to 512M (when int is 0) + LoggingJournaldSystemMaxUseMegabytes int `json:"logging_journald_system_max_use_megabytes"` + LoggingJournaldRuntimeMaxUseMegabytes int `json:"logging_journald_runtime_max_use_megabytes"` + + // UpgradeType can be + // Empty/missing ("") to make no changes + // "disable" (or "disabled") to disable auto-upgrades + // "security" to enable ONLY security upgrades + // "all" to enable upgrades from all configured sources + OSAutoUpgradeType string `json:"os_auto_upgrade_type"` +} + +type NetworkConfiguration struct { + // Things typically set in viam-defaults.json + Manufacturer string `json:"manufacturer"` + Model string `json:"model"` + FragmentID string `json:"fragment_id"` + + // The interface to use for hotspot/provisioning/wifi management. Ex: "wlan0" + // Defaults to the first discovered 802.11 device + HotspotInterface string `json:"hotspot_interface"` + // The prefix to prepend to the hotspot name. + HotspotPrefix string `json:"hotspot_prefix"` + // Normally left blank, and computed from HotspotPrefix and Manufacturer + HotspotSSID string `json:"hotspot_ssid"` + // Password required to connect to the hotspot. + HotspotPassword string `json:"hotspot_password"` + // If true, mobile (phone) users connecting to the hotspot won't be automatically redirected to the web portal. + DisableCaptivePortalRedirect bool `json:"disable_captive_portal_redirect"` + + // When true, will try all known networks looking for internet (global) connectivity. + // Otherwise, will only try the primary wifi network and consider that sufficient if connected (regardless of global connectivity.) + TurnOnHotspotIfWifiHasNoInternet bool `json:"turn_on_hotspot_if_wifi_has_no_internet"` + + // If set, will explicitly enable or disable power save for all wifi connections managed by NetworkManager. + WifiPowerSave *bool `json:"wifi_power_save"` + + // How long without a connection before starting provisioning (hotspot) mode. + OfflineBeforeStartingHotspotMinutes Timeout `json:"offline_before_starting_hotspot_minutes"` + + // How long since the last user interaction (via GRPC/app or web portal) before the state machine can resume. + UserIdleMinutes Timeout `json:"user_idle_minutes"` + + // If not "online", always drop out of hotspot mode and retry everything after this time limit. + RetryConnectionTimeoutMinutes Timeout `json:"retry_connection_timeout_minutes"` + + // If set, will reboot the device after it has been offline for this duration + // 0, default, will disable this feature. + DeviceRebootAfterOfflineMinutes Timeout `json:"device_reboot_after_offline_minutes"` +} + +type AdditionalNetworks map[string]NetworkDefinition + +type NetworkDefinition struct { + // "wifi", "wired", "wifi-static", "wired-static" + Type string `json:"type"` + + // name of interface, ex: "wlan0", "eth0", "enp14s0", etc. + Interface string `json:"interface"` + + // Wifi Settings + SSID string `json:"ssid"` + PSK string `json:"psk"` + + // Autoconnect Priority (primarily for wifi) + // higher values are preferred/tried first + // defaults to 0, but wifi networks added via hotspot are set to 999 when not in roaming mode + Priority int32 `json:"priority"` + + // CIDR format address, ex: 192.168.0.1/24 + // If unset, will default to "auto" (dhcp) + IPv4Address string `json:"ipv4_address"` + IPv4Gateway string `json:"ipv4_gateway"` + + // optional + IPv4DNS []string `json:"ipv4_dns"` + + // optional, 0 or -1 is default + // lower values are preferred (lower "cost") + // wired networks default to 100 + // wireless networks default to 600 + IPv4RouteMetric int64 `json:"ipv4_route_metric"` +} + +func DefaultConfig() AgentConfig { + cfg := AgentConfig{} + // round-trip to get a deep copy of the default config + defBytes, err := json.Marshal(DefaultConfiguration) + if err != nil { + panic(err) + } + err = json.Unmarshal(defBytes, &cfg) + if err != nil { + panic(err) + } + return cfg +} + +func SaveConfigToCache(cfg AgentConfig) error { + cachePath := filepath.Join(ViamDirs["cache"], configCacheFilename) + + js, err := json.Marshal(cfg) + if err != nil { + return errw.Wrap(err, "marshalling config for caching") + } + + _, err = WriteFileIfNew(cachePath, js) + return errw.Wrapf(err, "writing config cache to %s", cachePath) +} + +func LoadConfigFromCache() (AgentConfig, error) { + cachePath := filepath.Join(ViamDirs["cache"], configCacheFilename) + + cfg := AgentConfig{} + + //nolint:gosec + cacheBytes, err := os.ReadFile(cachePath) + if err != nil { + if !errors.Is(err, fs.ErrNotExist) { + cfg, newErr := StackConfigs(&pb.DeviceAgentConfigResponse{}) + return cfg, errors.Join(errw.Wrap(err, "reading config cache"), newErr) + } + } else { + err = json.Unmarshal(cacheBytes, &cfg) + if err != nil { + cfg, newErr := StackConfigs(&pb.DeviceAgentConfigResponse{}) + return cfg, errors.Join(errw.Wrap(err, "parsing config cache"), newErr) + } + } + + return cfg, nil +} + +func ApplyCLIArgs(cfg AgentConfig) AgentConfig { + if CLIDebug { + cfg.AdvancedSettings.Debug = true + } + if CLIWaitForUpdateCheck { + cfg.AdvancedSettings.WaitForUpdateCheck = true + } + return cfg +} + +func StackConfigs(proto *pb.DeviceAgentConfigResponse) (AgentConfig, error) { + cfg := DefaultConfig() + var errOut error + + // parse/apply deprecated /etc/viam-provisioning.json + oldCfg, err := LoadOldProvisioningConfig() + if err != nil { + if !errors.Is(err, fs.ErrNotExist) { + errOut = errors.Join(errOut, errw.Wrap(err, "reading deprecated /etc/viam-provisioning.json")) + } + } else { + cfg.NetworkConfiguration = *oldCfg + } + + // manufacturer config from local disk (/etc/viam-defaults.json) + jsonBytes, err := os.ReadFile(DefaultsFilePath) + if err != nil { + if !errors.Is(err, fs.ErrNotExist) { + errOut = errors.Join(errOut, err) + } + } else { + if err := json.Unmarshal(jsonc.ToJSON(jsonBytes), &cfg); err != nil { + errOut = errors.Join(errOut, err) + } + } + + // cloud-provided config + cloudCfg, err := ProtoToConfig(proto) + if err != nil { + errOut = errors.Join(errOut, err) + } else { + jsonBytes, err = json.Marshal(cloudCfg) + if err != nil { + errOut = errors.Join(errOut, err) + } else { + if err := json.Unmarshal(jsonBytes, &cfg); err != nil { + errOut = errors.Join(errOut, err) + } + } + } + + // validate/enforce/limit values + validatedCfg, err := validateConfig(cfg) + errOut = errors.Join(errOut, err) + + return validatedCfg, errOut +} + +// validateConfig enforces min/max values, returning a "corrected" config and error(s) for each issue encountered. +// Should only be called where input will NEVER be reused due to direct modification of struct fields. +func validateConfig(cfg AgentConfig) (AgentConfig, error) { + var errOut error + + // AdvancedSettings + if time.Duration(cfg.AdvancedSettings.ViamServerStartTimeoutMinutes) < time.Minute { + errOut = errors.Join(errOut, errw.Errorf("agent.advanced_settings.viam_server_start_timeout_minutes must be >= 1m (was: %s)", + time.Duration(cfg.AdvancedSettings.ViamServerStartTimeoutMinutes))) + cfg.AdvancedSettings.ViamServerStartTimeoutMinutes = Timeout(time.Minute) + } + + // SystemConfiguration + // zero isn't allowed, revert to default, but don't warn + if cfg.SystemConfiguration.LoggingJournaldSystemMaxUseMegabytes == 0 { + cfg.SystemConfiguration.LoggingJournaldSystemMaxUseMegabytes = DefaultConfiguration.SystemConfiguration.LoggingJournaldSystemMaxUseMegabytes + } + if cfg.SystemConfiguration.LoggingJournaldRuntimeMaxUseMegabytes == 0 { + cfg.SystemConfiguration.LoggingJournaldRuntimeMaxUseMegabytes = DefaultConfiguration.SystemConfiguration.LoggingJournaldRuntimeMaxUseMegabytes + } + if cfg.SystemConfiguration.OSAutoUpgradeType != "" && + cfg.SystemConfiguration.OSAutoUpgradeType != "security" && + cfg.SystemConfiguration.OSAutoUpgradeType != "all" { + errOut = errors.Join(errOut, errw.Errorf("agent.system_configuration.os_auto_upgrade_type can only be 'security' or 'all' (was: %s)", + cfg.SystemConfiguration.OSAutoUpgradeType)) + cfg.SystemConfiguration.OSAutoUpgradeType = DefaultConfiguration.SystemConfiguration.OSAutoUpgradeType + } + + // NetworkConfiguration + if cfg.NetworkConfiguration.Manufacturer == "" { + cfg.NetworkConfiguration.Manufacturer = DefaultConfiguration.NetworkConfiguration.Manufacturer + errOut = errors.Join(errOut, errw.New("network_configuration.manufacturer should not be empty, please omit empty fields entirely")) + } + if cfg.NetworkConfiguration.Model == "" { + cfg.NetworkConfiguration.Model = DefaultConfiguration.NetworkConfiguration.Model + errOut = errors.Join(errOut, errw.New("network_configuration.model should not be empty, please omit empty fields entirely")) + } + if cfg.NetworkConfiguration.HotspotPrefix == "" { + cfg.NetworkConfiguration.HotspotPrefix = DefaultConfiguration.NetworkConfiguration.HotspotPrefix + errOut = errors.Join(errOut, errw.New("network_configuration.hotspot_prefix should not be empty, please omit empty fields entirely")) + } + if cfg.NetworkConfiguration.HotspotPassword == "" { + cfg.NetworkConfiguration.HotspotPassword = DefaultConfiguration.NetworkConfiguration.HotspotPassword + errOut = errors.Join(errOut, errw.New("network_configuration.hotspot_password should not be empty, please omit empty fields entirely")) + } + + if cfg.NetworkConfiguration.HotspotSSID == "" { + hostname, err := os.Hostname() + if err != nil { + errOut = errors.Join(errOut, errw.Wrap(err, "getting hostname")) + hostname = "unknown" + } + cfg.NetworkConfiguration.HotspotSSID = cfg.NetworkConfiguration.HotspotPrefix + "-" + strings.ToLower(hostname) + } + if len(cfg.NetworkConfiguration.HotspotSSID) > 32 { + cfg.NetworkConfiguration.HotspotSSID = cfg.NetworkConfiguration.HotspotSSID[:32] + errOut = errors.Join(errOut, errw.New("network_configuration.hotspot_ssid is being truncated to 32 characters")) + } + + var haveBadTimeout bool + minTimeout := Timeout(time.Minute) + if cfg.NetworkConfiguration.OfflineBeforeStartingHotspotMinutes < minTimeout { + cfg.NetworkConfiguration.OfflineBeforeStartingHotspotMinutes = DefaultConfiguration.NetworkConfiguration.OfflineBeforeStartingHotspotMinutes + haveBadTimeout = true + } + + if cfg.NetworkConfiguration.UserIdleMinutes < minTimeout { + cfg.NetworkConfiguration.UserIdleMinutes = DefaultConfiguration.NetworkConfiguration.UserIdleMinutes + haveBadTimeout = true + } + + if cfg.NetworkConfiguration.RetryConnectionTimeoutMinutes < minTimeout { + cfg.NetworkConfiguration.RetryConnectionTimeoutMinutes = DefaultConfiguration.NetworkConfiguration.RetryConnectionTimeoutMinutes + haveBadTimeout = true + } + + if haveBadTimeout { + errOut = errors.Join(errOut, errw.New("timeout values cannot be less than 1 minute")) + } + + if cfg.NetworkConfiguration.DeviceRebootAfterOfflineMinutes != 0 && + (cfg.NetworkConfiguration.DeviceRebootAfterOfflineMinutes < cfg.NetworkConfiguration.OfflineBeforeStartingHotspotMinutes || + cfg.NetworkConfiguration.DeviceRebootAfterOfflineMinutes < cfg.NetworkConfiguration.UserIdleMinutes) { + badOffline := cfg.NetworkConfiguration.DeviceRebootAfterOfflineMinutes + cfg.NetworkConfiguration.DeviceRebootAfterOfflineMinutes = DefaultConfiguration.NetworkConfiguration.DeviceRebootAfterOfflineMinutes + errOut = errors.Join(errOut, + errw.Errorf("device_reboot_after_offline_minutes (%s) cannot be less than offline_before_starting_hotspot_minutes (%s) or user_idle_minutes (%s)", + time.Duration(badOffline), time.Duration(cfg.NetworkConfiguration.OfflineBeforeStartingHotspotMinutes), time.Duration(cfg.NetworkConfiguration.UserIdleMinutes)), + ) + } + + // SMURF validate additional_networks + return cfg, errOut +} + +func ProtoToConfig(proto *pb.DeviceAgentConfigResponse) (AgentConfig, error) { + var ( + conf AgentConfig + errOut, err error + ) + + conf.AdvancedSettings, err = ConvertStruct[AdvancedSettings](proto.GetAdvancedSettings()) + errOut = errors.Join(errOut, err) + + conf.SystemConfiguration, err = ConvertStruct[SystemConfiguration](proto.GetSystemConfiguration()) + errOut = errors.Join(errOut, err) + + conf.NetworkConfiguration, err = ConvertStruct[NetworkConfiguration](proto.GetNetworkConfiguration()) + errOut = errors.Join(errOut, err) + + conf.AdditionalNetworks, err = ConvertStruct[AdditionalNetworks](proto.GetAdditionalNetworks()) + errOut = errors.Join(errOut, err) + + return conf, errOut +} + +func ConvertStruct[T any](proto *structpb.Struct) (T, error) { + newConfig := new(T) + + jsonBytes, err := proto.MarshalJSON() + if err != nil { + return *newConfig, err + } + + if err = json.Unmarshal(jsonBytes, newConfig); err != nil { + return *newConfig, err + } + + return *newConfig, nil +} + +// Timeout allows parsing golang-style durations (1h20m30s) OR minutes-as-float from/to json. +type Timeout time.Duration + +func (t Timeout) MarshalJSON() ([]byte, error) { + return json.Marshal(time.Duration(t).String()) +} + +func (t *Timeout) UnmarshalJSON(b []byte) error { + var v any + if err := json.Unmarshal(b, &v); err != nil { + return err + } + switch value := v.(type) { + case float64: + *t = Timeout(value * float64(time.Minute)) + return nil + case string: + tmp, err := time.ParseDuration(value) + if err != nil { + return err + } + *t = Timeout(tmp) + return nil + default: + return errw.Errorf("invalid duration: %+v", v) + } +} diff --git a/utils/config_old.go b/utils/config_old.go new file mode 100644 index 0000000..e7c6c34 --- /dev/null +++ b/utils/config_old.go @@ -0,0 +1,105 @@ +package utils + +import ( + "encoding/json" + "os" + "time" + + "github.com/tidwall/jsonc" +) + +var ( + DefaultOldProvisioningConfig = OldProvisioningConfig{ + Manufacturer: "viam", + Model: "custom", + FragmentID: "", + HotspotPrefix: "viam-setup", + HotspotPassword: "viamsetup", + DisableDNSRedirect: false, + RoamingMode: false, + OfflineTimeout: Timeout(time.Minute * 2), + UserTimeout: Timeout(time.Minute * 5), + FallbackTimeout: Timeout(time.Minute * 10), + WifiPowerSave: nil, + DeviceRebootAfterOfflineMinutes: Timeout(0), + } + OldProvisioningConfigFilePath = "/etc/viam-provisioning.json" +) + +// OldProvisioningConfig represents the json configurations parsed from either agent-provisioning.json. +type OldProvisioningConfig struct { + // Things typically set in agent-provisioning.json + Manufacturer string `json:"manufacturer"` + Model string `json:"model"` + FragmentID string `json:"fragment_id"` + + // The interface to use for hotspot/provisioning/wifi management. Ex: "wlan0" + // Defaults to the first discovered 802.11 device + HotspotInterface string `json:"hotspot_interface"` + // The prefix to prepend to the hotspot name. + HotspotPrefix string `json:"hotspot_prefix"` + // Password required to connect to the hotspot. + HotspotPassword string `json:"hotspot_password"` + // If true, mobile (phone) users connecting to the hotspot won't be automatically redirected to the web portal. + DisableDNSRedirect bool `json:"disable_dns_redirect"` + + // How long without a connection before starting provisioning (hotspot) mode. + OfflineTimeout Timeout `json:"offline_timeout"` + + // How long since the last user interaction (via GRPC/app or web portal) before the state machine can resume. + UserTimeout Timeout `json:"user_timeout"` + + // If not "online", always drop out of hotspot mode and retry everything after this time limit. + FallbackTimeout Timeout `json:"fallback_timeout"` + + // When true, will try all known networks looking for internet (global) connectivity. + // Otherwise, will only try the primary wifi network and consider that sufficient if connected (regardless of global connectivity.) + RoamingMode bool `json:"roaming_mode"` + + // If set, will explicitly enable or disable power save for all wifi connections managed by NetworkManager. + WifiPowerSave *bool `json:"wifi_power_save"` + + // If set, will reboot the device after it has been offline for this duration + // 0, default, will disable this feature. + DeviceRebootAfterOfflineMinutes Timeout `json:"device_reboot_after_offline_minutes"` +} + +func LoadOldProvisioningConfig() (*NetworkConfiguration, error) { + oldCfg := OldProvisioningConfig{} + + // round-trip to get a deep copy of the default config + defBytes, err := json.Marshal(DefaultOldProvisioningConfig) + if err != nil { + panic(err) + } + err = json.Unmarshal(defBytes, &oldCfg) + if err != nil { + panic(err) + } + + // config from disk (/etc/viam-provisioning.json) + jsonBytes, err := os.ReadFile(OldProvisioningConfigFilePath) + if err != nil { + return nil, err + } + + if err := json.Unmarshal(jsonc.ToJSON(jsonBytes), &oldCfg); err != nil { + return nil, err + } + + return &NetworkConfiguration{ + Manufacturer: oldCfg.Manufacturer, + Model: oldCfg.Model, + FragmentID: oldCfg.FragmentID, + HotspotInterface: oldCfg.HotspotInterface, + HotspotPrefix: oldCfg.HotspotPrefix, + HotspotPassword: oldCfg.HotspotPassword, + DisableCaptivePortalRedirect: oldCfg.DisableDNSRedirect, + TurnOnHotspotIfWifiHasNoInternet: oldCfg.RoamingMode, + WifiPowerSave: oldCfg.WifiPowerSave, + OfflineBeforeStartingHotspotMinutes: oldCfg.OfflineTimeout, + UserIdleMinutes: oldCfg.UserTimeout, + RetryConnectionTimeoutMinutes: oldCfg.FallbackTimeout, + DeviceRebootAfterOfflineMinutes: oldCfg.DeviceRebootAfterOfflineMinutes, + }, nil +} diff --git a/utils/config_test.go b/utils/config_test.go new file mode 100644 index 0000000..a7678e0 --- /dev/null +++ b/utils/config_test.go @@ -0,0 +1,71 @@ +package utils + +import ( + "encoding/json" + "testing" + + "go.viam.com/test" +) + +// SMURF TODO lots of tests + +func TestConvertJson(t *testing.T) { + jsonBytes := ` +{ + "version_control": { + "agent": "stable", + "viam-server": "0.52.1" + }, + "advanced_settings": { + "debug": false, + "wait_for_update_check": false, + "viam_server_start_timeout_minutes": 10, + "disable_viam_server": false, + "disable_network_configuration": false, + "disable_system_configuration": false + }, + "network_configuration": { + "manufacturer": "viam", + "model": "custom", + "fragment_id": "", + "hotspot_prefix": "viam-setup", + "hotspot_password": "viamsetup", + "disable_captive_portal_redirect": false, + "offline_before_starting_hotspot_minutes": 2, + "user_idle_minutes": 5, + "retry_connection_timeout_minutes": 10, + "turn_on_hotspot_if_wifi_has_no_internet": false, + "wifi_power_save": null + }, + "additional_networks": { + "network1": { + "type": "", + "interface": "", + "ssid": "foo", + "psk": "bar", + "priority": 0, + "ipv4_address": "", + "ipv4_gateway": "", + "ipv4_dns": [], + "ipv4_route_metric": 0 + }, + "network2": { + "ssid": "moo", + "psk": "cow" + } + }, + "system_configuration": { + "logging_journald_system_max_use_megabytes": 512, + "logging_journald_runtime_max_use_megabytes": 512, + "os_auto_upgrade_type": "security" + } +} +` + + newConfig := &AgentConfig{} + + err := json.Unmarshal([]byte(jsonBytes), newConfig) + + test.That(t, err, test.ShouldBeNil) + test.That(t, newConfig, test.ShouldResemble, &AgentConfig{}) +} diff --git a/logger.go b/utils/logger.go similarity index 99% rename from logger.go rename to utils/logger.go index 52997a4..41f3ef1 100644 --- a/logger.go +++ b/utils/logger.go @@ -1,4 +1,4 @@ -package agent +package utils import ( "bytes" diff --git a/logger_test.go b/utils/logger_test.go similarity index 99% rename from logger_test.go rename to utils/logger_test.go index 276a489..3ea58b1 100644 --- a/logger_test.go +++ b/utils/logger_test.go @@ -1,4 +1,4 @@ -package agent +package utils import ( "testing" diff --git a/utils.go b/utils/utils.go similarity index 93% rename from utils.go rename to utils/utils.go index 4872259..83a164b 100644 --- a/utils.go +++ b/utils/utils.go @@ -1,12 +1,11 @@ -// Package agent contains the public interfaces, functions, consts, and vars for the viam-server agent. -package agent +// Package utils contains helper functions shared between the main agent and subsystems +package utils import ( "bufio" "bytes" "context" "crypto/sha256" - "encoding/json" "errors" "io" "io/fs" @@ -23,7 +22,6 @@ import ( errw "github.com/pkg/errors" "github.com/ulikunitz/xz" "golang.org/x/sys/unix" - "google.golang.org/protobuf/types/known/structpb" ) var ( @@ -32,6 +30,8 @@ var ( GitRevision = "" ViamDirs = map[string]string{"viam": "/opt/viam"} + + ErrSubsystemDisabled = errors.New("subsystem disabled") ) // GetVersion returns the version embedded at build time. @@ -227,7 +227,7 @@ func GetFileSum(filepath string) (outSum []byte, errRet error) { return h.Sum(nil), errRet } -func fuzzTime(duration time.Duration, pct float64) time.Duration { +func FuzzTime(duration time.Duration, pct float64) time.Duration { // pct is fuzz factor percentage 0.0 - 1.0 // example +/- 5% is 0.05 //nolint:gosec @@ -295,6 +295,7 @@ func SyncFS(syncPath string) (errRet error) { return errors.Join(errRet, file.Close()) } +// WriteFileIfNew returns true if contents changed and a write happened. func WriteFileIfNew(outPath string, data []byte) (bool, error) { //nolint:gosec curFileBytes, err := os.ReadFile(outPath) @@ -316,19 +317,5 @@ func WriteFileIfNew(outPath string, data []byte) (bool, error) { return true, errw.Wrapf(err, "writing %s", outPath) } - return true, nil -} - -func ConvertAttributes[T any](attributes *structpb.Struct) (*T, error) { - jsonBytes, err := attributes.MarshalJSON() - if err != nil { - return new(T), err - } - - newConfig := new(T) - if err = json.Unmarshal(jsonBytes, newConfig); err != nil { - return new(T), err - } - - return newConfig, nil + return true, SyncFS(outPath) } diff --git a/version_control.go b/version_control.go new file mode 100644 index 0000000..532f75c --- /dev/null +++ b/version_control.go @@ -0,0 +1,251 @@ +package agent + +import ( + "bytes" + "context" + "encoding/base64" + "encoding/json" + "errors" + "fmt" + "io/fs" + "os" + "path" + "path/filepath" + "sync" + "time" + + errw "github.com/pkg/errors" + "github.com/viamrobotics/agent/subsystems/viamserver" + "github.com/viamrobotics/agent/utils" + pb "go.viam.com/api/app/agent/v1" + "go.viam.com/rdk/logging" +) + +const ( + versionCacheFilename = "version_cache.json" +) + +func NewVersionCache(logger logging.Logger) *VersionCache { + cache := &VersionCache{ + ViamAgent: &Versions{Versions: map[string]*VersionInfo{}}, + ViamServer: &Versions{Versions: map[string]*VersionInfo{}}, + logger: logger, + } + cache.load() + return cache +} + +type VersionCache struct { + mu sync.Mutex + ViamAgent *Versions `json:"viam_agent"` + ViamServer *Versions `json:"viam_server"` + logger logging.Logger +} + +// Versions stores VersionInfo and the current/previous versions for (TODO) rollback. +type Versions struct { + TargetVersion string `json:"target_version"` + CurrentVersion string `json:"current_version"` + PreviousVersion string `json:"previous_version"` + Versions map[string]*VersionInfo `json:"versions"` +} + +// VersionInfo records details about each version of a subsystem. +type VersionInfo struct { + Version string + URL string + DlPath string + DlSHA []byte + UnpackedPath string + UnpackedSHA []byte + SymlinkPath string + Installed time.Time +} + +func (c *VersionCache) AgentVersion() string { + c.mu.Lock() + defer c.mu.Unlock() + return c.ViamAgent.CurrentVersion +} + +func (c *VersionCache) ViamServerVersion() string { + c.mu.Lock() + defer c.mu.Unlock() + return c.ViamServer.CurrentVersion +} + +// LoadCache loads the cached data for the subsystem from disk. +func (c *VersionCache) load() { + c.mu.Lock() + defer c.mu.Unlock() + + cacheFilePath := filepath.Join(utils.ViamDirs["cache"], versionCacheFilename) + //nolint:gosec + cacheBytes, err := os.ReadFile(cacheFilePath) + if err != nil { + if !errors.Is(err, fs.ErrNotExist) { + c.logger.Error(err) + return + } + } else { + err = json.Unmarshal(cacheBytes, c) + if err != nil { + c.logger.Error(errw.Wrap(err, "parsing version cache")) + return + } + } +} + +// save should only be run when protected by mutex locks. Use SaveCache() for normal use. +func (c *VersionCache) save() error { + cacheFilePath := filepath.Join(utils.ViamDirs["cache"], versionCacheFilename) + + cacheData, err := json.Marshal(c) + if err != nil { + return err + } + + _, err = utils.WriteFileIfNew(cacheFilePath, cacheData) + return err +} + +// Save saves the cached data to disk. +func (c *VersionCache) Save() error { + c.mu.Lock() + defer c.mu.Unlock() + return c.save() +} + +// Update processes data for the two binaries, agent itself, and viam-server. +func (c *VersionCache) Update(cfg *pb.UpdateInfo, binary string) error { + c.mu.Lock() + defer c.mu.Unlock() + + var data *Versions + if binary == SubsystemName { + data = c.ViamAgent + } else if binary == viamserver.SubsysName { + data = c.ViamServer + } + newVersion := cfg.GetVersion() + if newVersion == "customURL" { + newVersion = "customURL" + cfg.GetUrl() + } + + if newVersion == data.TargetVersion { + return nil + } + + data.TargetVersion = newVersion + info, ok := data.Versions[newVersion] + if !ok { + info = &VersionInfo{} + data.Versions[newVersion] = info + } + + info.Version = newVersion + info.URL = cfg.GetUrl() + info.SymlinkPath = path.Join(utils.ViamDirs["bin"], cfg.GetFilename()) + info.UnpackedSHA = cfg.GetSha256() + + return c.save() +} + +// UpdateBinary actually downloads and/or validates the targeted version. Returns true if a restart is needed. +// +//nolint:gocognit +func (c *VersionCache) UpdateBinary(ctx context.Context, binary string) (bool, error) { + c.mu.Lock() + defer c.mu.Unlock() + + var data *Versions + switch binary { + case SubsystemName: + data = c.ViamAgent + case viamserver.SubsysName: + data = c.ViamServer + default: + return false, errw.Errorf("unknown binary name for update request: %s", binary) + } + + var needRestart bool + + verData, ok := data.Versions[data.TargetVersion] + if !ok { + return needRestart, errw.Errorf("version data not found for %s %s", binary, data.TargetVersion) + } + + if data.TargetVersion == data.CurrentVersion { + // if a known version, make sure the symlink is correct + same, err := utils.CheckIfSame(verData.DlPath, verData.SymlinkPath) + if err != nil { + return needRestart, err + } + if !same { + if err := utils.ForceSymlink(verData.UnpackedPath, verData.SymlinkPath); err != nil { + return needRestart, err + } + } + + shasum, err := utils.GetFileSum(verData.UnpackedPath) + if err == nil && bytes.Equal(shasum, verData.UnpackedSHA) { + return false, nil + } + if err != nil { + c.logger.Error(err) + } + } + + // this is a new version + c.logger.Infof("new version (%s) found for %s", verData.Version, binary) + + // download and record the sha of the download itself + var err error + verData.DlPath, err = utils.DownloadFile(ctx, verData.URL) + if err != nil { + return needRestart, errw.Wrapf(err, "downloading %s", binary) + } + actualSha, err := utils.GetFileSum(verData.DlPath) + if err != nil { + return needRestart, errw.Wrap(err, "getting file shasum") + } + + // TODO handle compressed formats, for now, the raw download is the same file + verData.UnpackedPath = verData.DlPath + verData.DlSHA = actualSha + + if len(verData.UnpackedSHA) > 1 && !bytes.Equal(verData.UnpackedSHA, actualSha) { + //nolint:goerr113 + return needRestart, fmt.Errorf( + "sha256 (%s) of downloaded file (%s) does not match provided (%s)", + base64.StdEncoding.EncodeToString(actualSha), + verData.UnpackedPath, + base64.StdEncoding.EncodeToString(verData.UnpackedSHA), + ) + } + + // chmod with execute permissions if the file is executable + //nolint:gosec + if err := os.Chmod(verData.UnpackedPath, 0o755); err != nil { + return needRestart, err + } + + // symlink the extracted file to bin + if err = utils.ForceSymlink(verData.UnpackedPath, verData.SymlinkPath); err != nil { + return needRestart, errw.Wrap(err, "creating symlink") + } + + // update current and previous versions + if data.CurrentVersion != data.PreviousVersion { + data.PreviousVersion = data.CurrentVersion + } + data.CurrentVersion = data.TargetVersion + verData.Installed = time.Now() + + // if we made it here we performed an update and need to restart + c.logger.Infof("%s updated from %s to %s", binary, data.PreviousVersion, data.CurrentVersion) + needRestart = true + + // record the cache + return needRestart, c.save() +} diff --git a/subsystems/viamagent/viam-agent.service b/viam-agent.service similarity index 100% rename from subsystems/viamagent/viam-agent.service rename to viam-agent.service From 55cbd9b5f9313052de1e6ba01c82522fa1e34f9b Mon Sep 17 00:00:00 2001 From: James Otting Date: Mon, 27 Jan 2025 12:48:29 -0600 Subject: [PATCH 2/6] Lint --- cmd/viam-agent/main.go | 6 +++-- subsystems/networking/networkmanager.go | 9 ++++--- subsystems/syscfg/logging.go | 1 - utils/config.go | 32 ++++++++++++++++++------- 4 files changed, 33 insertions(+), 15 deletions(-) diff --git a/cmd/viam-agent/main.go b/cmd/viam-agent/main.go index 8b9a0fc..9fdddb7 100644 --- a/cmd/viam-agent/main.go +++ b/cmd/viam-agent/main.go @@ -141,7 +141,8 @@ func main() { //nolint:nestif if err != nil { if cfg.AdvancedSettings.DisableNetworkConfiguration { - globalLogger.Errorf("Cannot read %s and network configuration is diabled. Please correct and restart viam-agent.", utils.AppConfigFilePath) + globalLogger.Errorf("Cannot read %s and network configuration is diabled. Please correct and restart viam-agent.", + utils.AppConfigFilePath) manager.CloseAll() return } @@ -165,7 +166,8 @@ func main() { globalLogger.Infof("main config file %s missing or corrupt, entering provisioning mode", utils.AppConfigFilePath) if err := manager.StartSubsystem(ctx, networking.SubsysName); err != nil { - globalLogger.Error(errors.Wrapf(err, "could not start provisioning subsystem, please manually update /etc/viam.json and connect to internet")) + globalLogger.Error(errors.Wrapf(err, "could not start provisioning subsystem, "+ + "please manually update /etc/viam.json and connect to internet")) manager.CloseAll() return } diff --git a/subsystems/networking/networkmanager.go b/subsystems/networking/networkmanager.go index 6bb114b..0bfd369 100644 --- a/subsystems/networking/networkmanager.go +++ b/subsystems/networking/networkmanager.go @@ -46,7 +46,8 @@ func (w *Provisioning) getVisibleNetworks() []NetworkInfo { var visible []NetworkInfo for _, nw := range w.netState.Networks() { // note this does NOT use VisibleNetworkTimeout (like getCandidates does) - recentlySeen := nw.lastSeen.After(w.connState.getProvisioningChange().Add(time.Duration(w.Config().OfflineBeforeStartingHotspotMinutes * -2))) + recentlySeen := nw.lastSeen.After(w.connState.getProvisioningChange().Add( + time.Duration(w.Config().OfflineBeforeStartingHotspotMinutes * -2))) if !nw.isHotspot && recentlySeen { visible = append(visible, nw.getInfo()) @@ -153,7 +154,8 @@ func (w *Provisioning) checkConnections() error { } // in normal (single) mode, we need to be connected to the primary (highest priority) network - if !w.cfg.TurnOnHotspotIfWifiHasNoInternet && state == gnm.NmActiveConnectionStateActivated && ssid == w.netState.PrimarySSID(w.Config().HotspotInterface) { + if !w.cfg.TurnOnHotspotIfWifiHasNoInternet && state == gnm.NmActiveConnectionStateActivated && + ssid == w.netState.PrimarySSID(w.Config().HotspotInterface) { connected = true } } @@ -520,7 +522,8 @@ func (w *Provisioning) getCandidates(ifName string) []string { configured := nw.conn != nil // firstSeen/lastTried are reset if a network disappears for more than a minute, so retry if it comes back (or 10 mins) - recentlyTried := nw.lastTried.After(nw.firstSeen) && nw.lastTried.After(time.Now().Add(time.Duration(w.cfg.RetryConnectionTimeoutMinutes)*-1)) + recentlyTried := nw.lastTried.After(nw.firstSeen) && + nw.lastTried.After(time.Now().Add(time.Duration(w.cfg.RetryConnectionTimeoutMinutes)*-1)) if !nw.isHotspot && visible && configured && !recentlyTried { candidates = append(candidates, nw) diff --git a/subsystems/syscfg/logging.go b/subsystems/syscfg/logging.go index 3aa3c55..d690033 100644 --- a/subsystems/syscfg/logging.go +++ b/subsystems/syscfg/logging.go @@ -32,7 +32,6 @@ func (s *syscfg) EnforceLogging() error { } // if journald is NOT enabled, simply return - if err := checkJournaldEnabled(); err != nil { return nil } diff --git a/utils/config.go b/utils/config.go index e8a3095..4bd6d2f 100644 --- a/utils/config.go +++ b/utils/config.go @@ -281,15 +281,20 @@ func validateConfig(cfg AgentConfig) (AgentConfig, error) { // SystemConfiguration // zero isn't allowed, revert to default, but don't warn if cfg.SystemConfiguration.LoggingJournaldSystemMaxUseMegabytes == 0 { - cfg.SystemConfiguration.LoggingJournaldSystemMaxUseMegabytes = DefaultConfiguration.SystemConfiguration.LoggingJournaldSystemMaxUseMegabytes + //nolint:gofumpt + cfg.SystemConfiguration.LoggingJournaldSystemMaxUseMegabytes = + DefaultConfiguration.SystemConfiguration.LoggingJournaldSystemMaxUseMegabytes } if cfg.SystemConfiguration.LoggingJournaldRuntimeMaxUseMegabytes == 0 { - cfg.SystemConfiguration.LoggingJournaldRuntimeMaxUseMegabytes = DefaultConfiguration.SystemConfiguration.LoggingJournaldRuntimeMaxUseMegabytes + //nolint:gofumpt + cfg.SystemConfiguration.LoggingJournaldRuntimeMaxUseMegabytes = + DefaultConfiguration.SystemConfiguration.LoggingJournaldRuntimeMaxUseMegabytes } if cfg.SystemConfiguration.OSAutoUpgradeType != "" && cfg.SystemConfiguration.OSAutoUpgradeType != "security" && cfg.SystemConfiguration.OSAutoUpgradeType != "all" { - errOut = errors.Join(errOut, errw.Errorf("agent.system_configuration.os_auto_upgrade_type can only be 'security' or 'all' (was: %s)", + errOut = errors.Join(errOut, errw.Errorf( + "agent.system_configuration.os_auto_upgrade_type can only be 'security' or 'all' (was: %s)", cfg.SystemConfiguration.OSAutoUpgradeType)) cfg.SystemConfiguration.OSAutoUpgradeType = DefaultConfiguration.SystemConfiguration.OSAutoUpgradeType } @@ -305,11 +310,13 @@ func validateConfig(cfg AgentConfig) (AgentConfig, error) { } if cfg.NetworkConfiguration.HotspotPrefix == "" { cfg.NetworkConfiguration.HotspotPrefix = DefaultConfiguration.NetworkConfiguration.HotspotPrefix - errOut = errors.Join(errOut, errw.New("network_configuration.hotspot_prefix should not be empty, please omit empty fields entirely")) + errOut = errors.Join(errOut, + errw.New("network_configuration.hotspot_prefix should not be empty, please omit empty fields entirely")) } if cfg.NetworkConfiguration.HotspotPassword == "" { cfg.NetworkConfiguration.HotspotPassword = DefaultConfiguration.NetworkConfiguration.HotspotPassword - errOut = errors.Join(errOut, errw.New("network_configuration.hotspot_password should not be empty, please omit empty fields entirely")) + errOut = errors.Join(errOut, + errw.New("network_configuration.hotspot_password should not be empty, please omit empty fields entirely")) } if cfg.NetworkConfiguration.HotspotSSID == "" { @@ -328,7 +335,9 @@ func validateConfig(cfg AgentConfig) (AgentConfig, error) { var haveBadTimeout bool minTimeout := Timeout(time.Minute) if cfg.NetworkConfiguration.OfflineBeforeStartingHotspotMinutes < minTimeout { - cfg.NetworkConfiguration.OfflineBeforeStartingHotspotMinutes = DefaultConfiguration.NetworkConfiguration.OfflineBeforeStartingHotspotMinutes + //nolint:gofumpt + cfg.NetworkConfiguration.OfflineBeforeStartingHotspotMinutes = + DefaultConfiguration.NetworkConfiguration.OfflineBeforeStartingHotspotMinutes haveBadTimeout = true } @@ -338,7 +347,9 @@ func validateConfig(cfg AgentConfig) (AgentConfig, error) { } if cfg.NetworkConfiguration.RetryConnectionTimeoutMinutes < minTimeout { - cfg.NetworkConfiguration.RetryConnectionTimeoutMinutes = DefaultConfiguration.NetworkConfiguration.RetryConnectionTimeoutMinutes + //nolint:gofumpt + cfg.NetworkConfiguration.RetryConnectionTimeoutMinutes = + DefaultConfiguration.NetworkConfiguration.RetryConnectionTimeoutMinutes haveBadTimeout = true } @@ -352,8 +363,11 @@ func validateConfig(cfg AgentConfig) (AgentConfig, error) { badOffline := cfg.NetworkConfiguration.DeviceRebootAfterOfflineMinutes cfg.NetworkConfiguration.DeviceRebootAfterOfflineMinutes = DefaultConfiguration.NetworkConfiguration.DeviceRebootAfterOfflineMinutes errOut = errors.Join(errOut, - errw.Errorf("device_reboot_after_offline_minutes (%s) cannot be less than offline_before_starting_hotspot_minutes (%s) or user_idle_minutes (%s)", - time.Duration(badOffline), time.Duration(cfg.NetworkConfiguration.OfflineBeforeStartingHotspotMinutes), time.Duration(cfg.NetworkConfiguration.UserIdleMinutes)), + errw.Errorf("device_reboot_after_offline_minutes (%s) cannot be less than offline_before_starting_hotspot_minutes (%s) "+ + "or user_idle_minutes (%s)", + time.Duration(badOffline), + time.Duration(cfg.NetworkConfiguration.OfflineBeforeStartingHotspotMinutes), + time.Duration(cfg.NetworkConfiguration.UserIdleMinutes)), ) } From a10747d4fb67e5f4b054070a61464f22d83113f5 Mon Sep 17 00:00:00 2001 From: James Otting Date: Mon, 27 Jan 2025 15:56:33 -0600 Subject: [PATCH 3/6] Smurf-complete, needs LOTS of QA --- manager.go | 41 ++++++++++++--- subsystems/networking/networking.go | 10 ++-- subsystems/networking/setup.go | 28 ++-------- subsystems/viamserver/viamserver.go | 5 -- utils/config.go | 80 ++++++++++++++++++++++++++++- utils/config_test.go | 30 +++++++---- version_control.go | 6 +++ 7 files changed, 150 insertions(+), 50 deletions(-) diff --git a/manager.go b/manager.go index cfa07df..f54d128 100644 --- a/manager.go +++ b/manager.go @@ -51,6 +51,7 @@ type Manager struct { cfgMu sync.RWMutex cfg utils.AgentConfig + // also guarded by cfgMu viamServerNeedsRestart bool viamServer subsystems.Subsystem @@ -256,11 +257,13 @@ func (m *Manager) CheckUpdates(ctx context.Context) time.Duration { interval = minimalCheckInterval } + m.cfgMu.RLock() if m.cfg.AdvancedSettings.Debug { m.logger.SetLevel(logging.DEBUG) } else { m.logger.SetLevel(logging.INFO) } + m.cfgMu.RUnlock() // randomly fuzz the interval by +/- 5% interval = utils.FuzzTime(interval, 0.05) @@ -284,6 +287,9 @@ func (m *Manager) SubsystemHealthChecks(ctx context.Context) { } m.logger.Debug("Starting health checks for all subsystems") + m.cfgMu.RLock() + defer m.cfgMu.RUnlock() + for subsystemName, sub := range map[string]subsystems.Subsystem{ "viam-server": m.viamServer, "sysconfig": m.sysConfig, @@ -292,6 +298,22 @@ func (m *Manager) SubsystemHealthChecks(ctx context.Context) { if ctx.Err() != nil { return } + + switch subsystemName { + case "viam-server": + if m.cfg.AdvancedSettings.DisableViamServer { + return + } + case "sysconfig": + if m.cfg.AdvancedSettings.DisableSystemConfiguration { + return + } + case "networking": + if m.cfg.AdvancedSettings.DisableNetworkConfiguration { + return + } + } + ctxTimeout, cancelFunc := context.WithTimeout(ctx, time.Second*15) defer cancelFunc() if err := sub.HealthCheck(ctxTimeout); err != nil { @@ -306,7 +328,6 @@ func (m *Manager) SubsystemHealthChecks(ctx context.Context) { return } - // SMURF check if disabled! if err := sub.Start(ctx); err != nil && !errors.Is(err, utils.ErrSubsystemDisabled) { m.logger.Error(errw.Wrapf(err, "restarting subsystem %s", subsystemName)) } @@ -358,7 +379,10 @@ func (m *Manager) StartBackgroundChecks(ctx context.Context) { m.activeBackgroundWorkers.Add(1) go func() { checkInterval := minimalCheckInterval - if m.cfg.AdvancedSettings.WaitForUpdateCheck { + m.cfgMu.RLock() + wait := m.cfg.AdvancedSettings.WaitForUpdateCheck + m.cfgMu.RUnlock() + if wait { checkInterval = m.CheckUpdates(ctx) } @@ -516,12 +540,17 @@ func (m *Manager) getHostInfo() *pb.HostInfo { } func (m *Manager) getVersions() *pb.VersionInfo { - // SMURF TODO + m.cfgMu.RLock() + defer m.cfgMu.RUnlock() vers := &pb.VersionInfo{ AgentRunning: Version, - AgentInstalled: "", - ViamServerRunning: "", - ViamServerInstalled: "", + AgentInstalled: m.cache.AgentVersion(), + ViamServerRunning: m.cache.ViamServerVersion(), + ViamServerInstalled: m.cache.ViamServerVersion(), + } + + if m.viamServerNeedsRestart { + vers.ViamServerRunning = m.cache.ViamServerPreviousVersion() } return vers diff --git a/subsystems/networking/networking.go b/subsystems/networking/networking.go index fcafcf7..a5fd1d0 100644 --- a/subsystems/networking/networking.go +++ b/subsystems/networking/networking.go @@ -46,10 +46,8 @@ type Provisioning struct { // locking for config updates dataMu sync.Mutex - - // SMURF process these in Update - cfg utils.NetworkConfiguration - nets utils.AdditionalNetworks + cfg utils.NetworkConfiguration + nets utils.AdditionalNetworks // portal webServer *http.Server @@ -290,6 +288,10 @@ func (w *Provisioning) Update(ctx context.Context, cfg utils.AgentConfig) (needR w.cfg = cfg.NetworkConfiguration w.nets = cfg.AdditionalNetworks + if err := w.writeDNSMasq(); err != nil { + w.logger.Error(errw.Wrap(err, "writing dnsmasq configuration")) + } + return needRestart } diff --git a/subsystems/networking/setup.go b/subsystems/networking/setup.go index be6f428..61aba75 100644 --- a/subsystems/networking/setup.go +++ b/subsystems/networking/setup.go @@ -3,15 +3,13 @@ package networking // This file includes functions used only once during startup in NewNMWrapper() import ( - "bytes" "context" "errors" - "io/fs" - "os" "time" gnm "github.com/Otterverse/gonetworkmanager/v2" errw "github.com/pkg/errors" + "github.com/viamrobotics/agent/utils" ) var ( @@ -26,16 +24,8 @@ func (w *Provisioning) writeDNSMasq() error { DNSMasqContents = DNSMasqContentsSetupOnly } - fileBytes, err := os.ReadFile(DNSMasqFilepath) - if err == nil && bytes.Equal(fileBytes, []byte(DNSMasqContents)) { - return nil - } - - if err != nil && !errors.Is(err, fs.ErrNotExist) { - return err - } - //nolint:gosec - return os.WriteFile(DNSMasqFilepath, []byte(DNSMasqContents), 0o644) + _, err := utils.WriteFileIfNew(DNSMasqFilepath, []byte(DNSMasqContents)) + return err } func (w *Provisioning) testConnCheck() error { @@ -80,16 +70,8 @@ func (w *Provisioning) testConnCheck() error { } func (w *Provisioning) writeConnCheck() error { - fileBytes, err := os.ReadFile(ConnCheckFilepath) - if err == nil && bytes.Equal(fileBytes, []byte(ConnCheckContents)) { - return nil - } - - if err != nil && !errors.Is(err, fs.ErrNotExist) { - return err - } - //nolint:gosec - return os.WriteFile(ConnCheckFilepath, []byte(ConnCheckContents), 0o644) + _, err := utils.WriteFileIfNew(ConnCheckFilepath, []byte(ConnCheckContents)) + return err } // must be run inside dataMu lock. diff --git a/subsystems/viamserver/viamserver.go b/subsystems/viamserver/viamserver.go index eccc362..ee9084e 100644 --- a/subsystems/viamserver/viamserver.go +++ b/subsystems/viamserver/viamserver.go @@ -336,11 +336,6 @@ func (s *viamServer) SafeToRestart(ctx context.Context) bool { return restartAllowed } -// SMURF IMPLEMENT. -func (s *viamServer) Version() string { - return "" -} - func NewSubsystem(ctx context.Context, logger logging.Logger, cfg utils.AgentConfig) subsystems.Subsystem { return &viamServer{ logger: logger, diff --git a/utils/config.go b/utils/config.go index 4bd6d2f..1eb061e 100644 --- a/utils/config.go +++ b/utils/config.go @@ -4,8 +4,10 @@ import ( "encoding/json" "errors" "io/fs" + netlib "net" "os" "path/filepath" + "regexp" "strings" "time" @@ -129,7 +131,7 @@ type NetworkConfiguration struct { type AdditionalNetworks map[string]NetworkDefinition type NetworkDefinition struct { - // "wifi", "wired", "wifi-static", "wired-static" + // "wifi", "wired" Type string `json:"type"` // name of interface, ex: "wlan0", "eth0", "enp14s0", etc. @@ -371,7 +373,81 @@ func validateConfig(cfg AgentConfig) (AgentConfig, error) { ) } - // SMURF validate additional_networks + // Additional Networks + for name, net := range cfg.AdditionalNetworks { + if net.Type != "wifi" && net.Type != "wired" { + errOut = errors.Join(errOut, errw.Errorf("network %s has invalid type (%s), must be one of "+ + "wifi or wired", name, net.Type)) + delete(cfg.AdditionalNetworks, name) + continue + } + + if len(net.Interface) > 15 || regexp.MustCompile(`\s`).MatchString(net.Interface) { + errOut = errors.Join(errOut, errw.Errorf("network %s has invalid interface name (%s), "+ + "must be 15 characters or less, without spaces", name, net.Interface)) + delete(cfg.AdditionalNetworks, name) + continue + } + + if len(net.SSID) > 32 { + errOut = errors.Join(errOut, errw.Errorf("network %s has invalid SSID (%s), "+ + "must be 32 characters or less", name, net.SSID)) + delete(cfg.AdditionalNetworks, name) + continue + } + + if len(net.PSK) > 64 || (net.PSK != "" && len(net.PSK) < 8) { + errOut = errors.Join(errOut, errw.Errorf("network %s has invalid PSK (%s), "+ + "must be between 8 and 63 characters, or exactly 64 hex characters", name, net.PSK)) + delete(cfg.AdditionalNetworks, name) + continue + } + + if net.Priority > 999 || net.Priority < -999 { + errOut = errors.Join(errOut, errw.Errorf("network %s has invalid priority (%d), "+ + "must be between -999 and 999", name, net.Priority)) + delete(cfg.AdditionalNetworks, name) + continue + } + + if net.IPv4Address != "" { + _, _, err := netlib.ParseCIDR(net.IPv4Address) + if err != nil { + errOut = errors.Join(errOut, errw.Errorf("network %s has invalid ipv4_address, "+ + "%s", name, err)) + delete(cfg.AdditionalNetworks, name) + continue + } + } + + if net.IPv4Gateway != "" { + ip := netlib.ParseIP(net.IPv4Gateway) + if ip == nil { + errOut = errors.Join(errOut, errw.Errorf("network %s has invalid ipv4_gateway (%s), "+ + "must be ipv4 address", name, net.IPv4Gateway)) + delete(cfg.AdditionalNetworks, name) + continue + } + } + + for _, dns := range net.IPv4DNS { + ip := netlib.ParseIP(dns) + if ip == nil { + errOut = errors.Join(errOut, errw.Errorf("network %s has invalid ipv4_dns entry (%s), "+ + "must be ipv4 address", name, dns)) + delete(cfg.AdditionalNetworks, name) + continue + } + } + + if net.IPv4RouteMetric < 0 { + errOut = errors.Join(errOut, errw.Errorf("network %s has invalid ipv4_route_metric (%d), "+ + "must be >= 0", name, net.IPv4RouteMetric)) + delete(cfg.AdditionalNetworks, name) + continue + } + } + return cfg, errOut } diff --git a/utils/config_test.go b/utils/config_test.go index a7678e0..7d76144 100644 --- a/utils/config_test.go +++ b/utils/config_test.go @@ -7,8 +7,7 @@ import ( "go.viam.com/test" ) -// SMURF TODO lots of tests - +// basic test for the config structure names. func TestConvertJson(t *testing.T) { jsonBytes := ` { @@ -39,14 +38,14 @@ func TestConvertJson(t *testing.T) { }, "additional_networks": { "network1": { - "type": "", - "interface": "", + "type": "wifi", + "interface": "wlan0", "ssid": "foo", "psk": "bar", "priority": 0, - "ipv4_address": "", - "ipv4_gateway": "", - "ipv4_dns": [], + "ipv4_address": "192.168.0.1/24", + "ipv4_gateway": "192.168.0.255", + "ipv4_dns": ["192.168.0.255"], "ipv4_route_metric": 0 }, "network2": { @@ -57,15 +56,26 @@ func TestConvertJson(t *testing.T) { "system_configuration": { "logging_journald_system_max_use_megabytes": 512, "logging_journald_runtime_max_use_megabytes": 512, - "os_auto_upgrade_type": "security" + "os_auto_upgrade_type": "" } } ` newConfig := &AgentConfig{} - err := json.Unmarshal([]byte(jsonBytes), newConfig) + testConfig := DefaultConfig() + testConfig.AdditionalNetworks["network1"] = NetworkDefinition{ + Type: "wifi", + Interface: "wlan0", + SSID: "foo", + PSK: "bar", + IPv4Address: "192.168.0.1/24", + IPv4Gateway: "192.168.0.255", + IPv4DNS: []string{"192.168.0.255"}, + } + testConfig.AdditionalNetworks["network2"] = NetworkDefinition{SSID: "moo", PSK: "cow"} + test.That(t, err, test.ShouldBeNil) - test.That(t, newConfig, test.ShouldResemble, &AgentConfig{}) + test.That(t, *newConfig, test.ShouldResemble, testConfig) } diff --git a/version_control.go b/version_control.go index 532f75c..c632e34 100644 --- a/version_control.go +++ b/version_control.go @@ -74,6 +74,12 @@ func (c *VersionCache) ViamServerVersion() string { return c.ViamServer.CurrentVersion } +func (c *VersionCache) ViamServerPreviousVersion() string { + c.mu.Lock() + defer c.mu.Unlock() + return c.ViamServer.PreviousVersion +} + // LoadCache loads the cached data for the subsystem from disk. func (c *VersionCache) load() { c.mu.Lock() From edab286ba55a267707c441e704114b496724d73c Mon Sep 17 00:00:00 2001 From: James Otting Date: Wed, 29 Jan 2025 11:46:53 -0600 Subject: [PATCH 4/6] Handle running version better --- manager.go | 11 ++++++----- version_control.go | 11 ++++++++++- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/manager.go b/manager.go index f54d128..c795c53 100644 --- a/manager.go +++ b/manager.go @@ -133,6 +133,7 @@ func (m *Manager) StartSubsystem(ctx context.Context, name string) error { switch name { case viamserver.SubsysName: + m.cache.MarkViamServerRunningVersion() return m.viamServer.Start(ctx) case networking.SubsysName: return m.networking.Start(ctx) @@ -207,6 +208,7 @@ func (m *Manager) SubsystemUpdates(ctx context.Context) { m.viamServerNeedsRestart = true } } + m.cache.MarkViamServerRunningVersion() if err := m.viamServer.Start(ctx); err != nil { m.logger.Error(err) } @@ -328,6 +330,9 @@ func (m *Manager) SubsystemHealthChecks(ctx context.Context) { return } + if subsystemName == "viam-server" { + m.cache.MarkViamServerRunningVersion() + } if err := sub.Start(ctx); err != nil && !errors.Is(err, utils.ErrSubsystemDisabled) { m.logger.Error(errw.Wrapf(err, "restarting subsystem %s", subsystemName)) } @@ -545,14 +550,10 @@ func (m *Manager) getVersions() *pb.VersionInfo { vers := &pb.VersionInfo{ AgentRunning: Version, AgentInstalled: m.cache.AgentVersion(), - ViamServerRunning: m.cache.ViamServerVersion(), + ViamServerRunning: m.cache.ViamServerRunningVersion(), ViamServerInstalled: m.cache.ViamServerVersion(), } - if m.viamServerNeedsRestart { - vers.ViamServerRunning = m.cache.ViamServerPreviousVersion() - } - return vers } diff --git a/version_control.go b/version_control.go index c632e34..745873d 100644 --- a/version_control.go +++ b/version_control.go @@ -48,6 +48,9 @@ type Versions struct { CurrentVersion string `json:"current_version"` PreviousVersion string `json:"previous_version"` Versions map[string]*VersionInfo `json:"versions"` + + // temporary, so not exported for json/caching + runningVersion string } // VersionInfo records details about each version of a subsystem. @@ -74,12 +77,18 @@ func (c *VersionCache) ViamServerVersion() string { return c.ViamServer.CurrentVersion } -func (c *VersionCache) ViamServerPreviousVersion() string { +func (c *VersionCache) ViamServerRunningVersion() string { c.mu.Lock() defer c.mu.Unlock() return c.ViamServer.PreviousVersion } +func (c *VersionCache) MarkViamServerRunningVersion() { + c.mu.Lock() + defer c.mu.Unlock() + c.ViamServer.runningVersion = c.ViamServer.CurrentVersion +} + // LoadCache loads the cached data for the subsystem from disk. func (c *VersionCache) load() { c.mu.Lock() From 29e9cefe22ced7af4a3766860cb9c4cea921fba0 Mon Sep 17 00:00:00 2001 From: James Otting Date: Mon, 3 Feb 2025 00:46:36 -0600 Subject: [PATCH 5/6] Initial QA, basic startup working --- agent.go | 2 +- go.mod | 8 +++--- go.sum | 16 +++++------ manager.go | 15 ++++++++++ utils/config.go | 72 +++++++++++++++++++++++----------------------- version_control.go | 27 ++++++++++++----- 6 files changed, 84 insertions(+), 56 deletions(-) diff --git a/agent.go b/agent.go index af23610..0b8609a 100644 --- a/agent.go +++ b/agent.go @@ -40,7 +40,7 @@ func InstallNewVersion(ctx context.Context, logger logging.Logger) (bool, error) cmd := exec.Command(expectedPath, "--install") output, err := cmd.CombinedOutput() logger.Info("running viam-agent --install for new version") - logger.Info(output) + logger.Info(string(output)) if err != nil { return false, errw.Wrapf(err, "running post install step %s", output) } diff --git a/go.mod b/go.mod index b3bee8b..7a9b30f 100644 --- a/go.mod +++ b/go.mod @@ -17,7 +17,7 @@ require ( go.viam.com/rdk v0.51.0 go.viam.com/test v1.2.3 go.viam.com/utils v0.1.112 - golang.org/x/sys v0.26.0 + golang.org/x/sys v0.28.0 google.golang.org/grpc v1.67.1 google.golang.org/protobuf v1.35.1 ) @@ -81,12 +81,12 @@ require ( go.opencensus.io v0.24.0 // indirect go.uber.org/goleak v1.3.0 // indirect go.uber.org/multierr v1.11.0 // indirect - golang.org/x/crypto v0.28.0 // indirect + golang.org/x/crypto v0.31.0 // indirect golang.org/x/mod v0.21.0 // indirect golang.org/x/net v0.30.0 // indirect golang.org/x/oauth2 v0.22.0 // indirect - golang.org/x/sync v0.8.0 // indirect - golang.org/x/text v0.19.0 // indirect + golang.org/x/sync v0.10.0 // indirect + golang.org/x/text v0.21.0 // indirect golang.org/x/tools v0.24.0 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20240827150818-7e3bb234dfed // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 // indirect diff --git a/go.sum b/go.sum index f35156c..da8e5b1 100644 --- a/go.sum +++ b/go.sum @@ -747,8 +747,8 @@ golang.org/x/crypto v0.12.0/go.mod h1:NF0Gs7EO5K4qLn+Ylc+fih8BSTeIjAP05siRnAh98y golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg= golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= -golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw= -golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U= +golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U= +golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -837,8 +837,8 @@ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= -golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= +golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -894,8 +894,8 @@ golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= -golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= +golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= @@ -917,8 +917,8 @@ golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.12.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= -golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= +golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= diff --git a/manager.go b/manager.go index c795c53..15b5aa2 100644 --- a/manager.go +++ b/manager.go @@ -484,11 +484,26 @@ func (m *Manager) GetConfig(ctx context.Context) (time.Duration, error) { return minimalCheckInterval, err } + m.logger.Infof("SMURF CONFIG RESP: %+v", resp) + + // Store update data in cache, actual binaries are updated later + err = m.cache.Update(resp.GetAgentUpdateInfo(), SubsystemName) + if err != nil { + m.logger.Error(errw.Wrapf(err, "processing update data for %s", SubsystemName)) + } + + err = m.cache.Update(resp.GetViamServerUpdateInfo(), viamserver.SubsysName) + if err != nil { + m.logger.Error(errw.Wrapf(err, "processing update data for %s", viamserver.SubsysName)) + } + cfg, err := utils.StackConfigs(resp) if err != nil { m.logger.Error(errw.Wrap(err, "processing config")) } + m.logger.Infof("SMURF STACKED CONFIG: %+v", cfg) + if err := utils.SaveConfigToCache(cfg); err != nil { m.logger.Error(err) } diff --git a/utils/config.go b/utils/config.go index 1eb061e..724aa6c 100644 --- a/utils/config.go +++ b/utils/config.go @@ -61,104 +61,104 @@ var ( ) type AgentConfig struct { - AdvancedSettings AdvancedSettings `json:"advanced_settings"` - SystemConfiguration SystemConfiguration `json:"system_configuration"` - NetworkConfiguration NetworkConfiguration `json:"network_configuration"` - AdditionalNetworks AdditionalNetworks `json:"additional_networks"` + AdvancedSettings AdvancedSettings `json:"advanced_settings,omitempty"` + SystemConfiguration SystemConfiguration `json:"system_configuration,omitempty"` + NetworkConfiguration NetworkConfiguration `json:"network_configuration,omitempty"` + AdditionalNetworks AdditionalNetworks `json:"additional_networks,omitempty"` } type AdvancedSettings struct { - Debug bool `json:"debug"` - WaitForUpdateCheck bool `json:"wait_for_update_check"` - DisableViamServer bool `json:"disable_viam_server"` - DisableNetworkConfiguration bool `json:"disable_network_configuration"` - DisableSystemConfiguration bool `json:"disable_system_configuration"` - ViamServerStartTimeoutMinutes Timeout `json:"viam_server_start_timeout_minutes"` + Debug bool `json:"debug,omitempty"` + WaitForUpdateCheck bool `json:"wait_for_update_check,omitempty"` + DisableViamServer bool `json:"disable_viam_server,omitempty"` + DisableNetworkConfiguration bool `json:"disable_network_configuration,omitempty"` + DisableSystemConfiguration bool `json:"disable_system_configuration,omitempty"` + ViamServerStartTimeoutMinutes Timeout `json:"viam_server_start_timeout_minutes,omitempty"` } type SystemConfiguration struct { // can set either to -1 to disable, defaults to 512M (when int is 0) - LoggingJournaldSystemMaxUseMegabytes int `json:"logging_journald_system_max_use_megabytes"` - LoggingJournaldRuntimeMaxUseMegabytes int `json:"logging_journald_runtime_max_use_megabytes"` + LoggingJournaldSystemMaxUseMegabytes int `json:"logging_journald_system_max_use_megabytes,omitempty"` + LoggingJournaldRuntimeMaxUseMegabytes int `json:"logging_journald_runtime_max_use_megabytes,omitempty"` // UpgradeType can be // Empty/missing ("") to make no changes // "disable" (or "disabled") to disable auto-upgrades // "security" to enable ONLY security upgrades // "all" to enable upgrades from all configured sources - OSAutoUpgradeType string `json:"os_auto_upgrade_type"` + OSAutoUpgradeType string `json:"os_auto_upgrade_type,omitempty"` } type NetworkConfiguration struct { // Things typically set in viam-defaults.json - Manufacturer string `json:"manufacturer"` - Model string `json:"model"` - FragmentID string `json:"fragment_id"` + Manufacturer string `json:"manufacturer,omitempty"` + Model string `json:"model,omitempty"` + FragmentID string `json:"fragment_id,omitempty"` // The interface to use for hotspot/provisioning/wifi management. Ex: "wlan0" // Defaults to the first discovered 802.11 device - HotspotInterface string `json:"hotspot_interface"` + HotspotInterface string `json:"hotspot_interface,omitempty"` // The prefix to prepend to the hotspot name. - HotspotPrefix string `json:"hotspot_prefix"` + HotspotPrefix string `json:"hotspot_prefix,omitempty"` // Normally left blank, and computed from HotspotPrefix and Manufacturer - HotspotSSID string `json:"hotspot_ssid"` + HotspotSSID string `json:"hotspot_ssid,omitempty"` // Password required to connect to the hotspot. - HotspotPassword string `json:"hotspot_password"` + HotspotPassword string `json:"hotspot_password,omitempty"` // If true, mobile (phone) users connecting to the hotspot won't be automatically redirected to the web portal. - DisableCaptivePortalRedirect bool `json:"disable_captive_portal_redirect"` + DisableCaptivePortalRedirect bool `json:"disable_captive_portal_redirect,omitempty"` // When true, will try all known networks looking for internet (global) connectivity. // Otherwise, will only try the primary wifi network and consider that sufficient if connected (regardless of global connectivity.) - TurnOnHotspotIfWifiHasNoInternet bool `json:"turn_on_hotspot_if_wifi_has_no_internet"` + TurnOnHotspotIfWifiHasNoInternet bool `json:"turn_on_hotspot_if_wifi_has_no_internet,omitempty"` // If set, will explicitly enable or disable power save for all wifi connections managed by NetworkManager. - WifiPowerSave *bool `json:"wifi_power_save"` + WifiPowerSave *bool `json:"wifi_power_save,omitempty"` // How long without a connection before starting provisioning (hotspot) mode. - OfflineBeforeStartingHotspotMinutes Timeout `json:"offline_before_starting_hotspot_minutes"` + OfflineBeforeStartingHotspotMinutes Timeout `json:"offline_before_starting_hotspot_minutes,omitempty"` // How long since the last user interaction (via GRPC/app or web portal) before the state machine can resume. - UserIdleMinutes Timeout `json:"user_idle_minutes"` + UserIdleMinutes Timeout `json:"user_idle_minutes,omitempty"` // If not "online", always drop out of hotspot mode and retry everything after this time limit. - RetryConnectionTimeoutMinutes Timeout `json:"retry_connection_timeout_minutes"` + RetryConnectionTimeoutMinutes Timeout `json:"retry_connection_timeout_minutes,omitempty"` // If set, will reboot the device after it has been offline for this duration // 0, default, will disable this feature. - DeviceRebootAfterOfflineMinutes Timeout `json:"device_reboot_after_offline_minutes"` + DeviceRebootAfterOfflineMinutes Timeout `json:"device_reboot_after_offline_minutes,omitempty"` } type AdditionalNetworks map[string]NetworkDefinition type NetworkDefinition struct { // "wifi", "wired" - Type string `json:"type"` + Type string `json:"type,omitempty"` // name of interface, ex: "wlan0", "eth0", "enp14s0", etc. - Interface string `json:"interface"` + Interface string `json:"interface,omitempty"` // Wifi Settings - SSID string `json:"ssid"` - PSK string `json:"psk"` + SSID string `json:"ssid,omitempty"` + PSK string `json:"psk,omitempty"` // Autoconnect Priority (primarily for wifi) // higher values are preferred/tried first // defaults to 0, but wifi networks added via hotspot are set to 999 when not in roaming mode - Priority int32 `json:"priority"` + Priority int32 `json:"priority,omitempty"` // CIDR format address, ex: 192.168.0.1/24 // If unset, will default to "auto" (dhcp) - IPv4Address string `json:"ipv4_address"` - IPv4Gateway string `json:"ipv4_gateway"` + IPv4Address string `json:"ipv4_address,omitempty"` + IPv4Gateway string `json:"ipv4_gateway,omitempty"` // optional - IPv4DNS []string `json:"ipv4_dns"` + IPv4DNS []string `json:"ipv4_dns,omitempty"` // optional, 0 or -1 is default // lower values are preferred (lower "cost") // wired networks default to 100 // wireless networks default to 600 - IPv4RouteMetric int64 `json:"ipv4_route_metric"` + IPv4RouteMetric int64 `json:"ipv4_route_metric,omitempty"` } func DefaultConfig() AgentConfig { diff --git a/version_control.go b/version_control.go index 745873d..37ca5fd 100644 --- a/version_control.go +++ b/version_control.go @@ -11,6 +11,7 @@ import ( "os" "path" "path/filepath" + "strings" "sync" "time" @@ -124,14 +125,15 @@ func (c *VersionCache) save() error { return err } +//SMURF uneeded? // Save saves the cached data to disk. -func (c *VersionCache) Save() error { - c.mu.Lock() - defer c.mu.Unlock() - return c.save() -} +// func (c *VersionCache) Save() error { +// c.mu.Lock() +// defer c.mu.Unlock() +// return c.save() +// } -// Update processes data for the two binaries, agent itself, and viam-server. +// Update processes data for the two binaries: agent itself, and viam-server. func (c *VersionCache) Update(cfg *pb.UpdateInfo, binary string) error { c.mu.Lock() defer c.mu.Unlock() @@ -144,7 +146,7 @@ func (c *VersionCache) Update(cfg *pb.UpdateInfo, binary string) error { } newVersion := cfg.GetVersion() if newVersion == "customURL" { - newVersion = "customURL" + cfg.GetUrl() + newVersion = "customURL+" + cfg.GetUrl() } if newVersion == data.TargetVersion { @@ -190,6 +192,8 @@ func (c *VersionCache) UpdateBinary(ctx context.Context, binary string) (bool, e return needRestart, errw.Errorf("version data not found for %s %s", binary, data.TargetVersion) } + isCustomURL := strings.HasPrefix(verData.Version, "customURL+") + if data.TargetVersion == data.CurrentVersion { // if a known version, make sure the symlink is correct same, err := utils.CheckIfSame(verData.DlPath, verData.SymlinkPath) @@ -209,6 +213,11 @@ func (c *VersionCache) UpdateBinary(ctx context.Context, binary string) (bool, e if err != nil { c.logger.Error(err) } + + // if we're here, we have a mismatched checksum, as likely the URL changed, so wipe it and recompute later + if isCustomURL { + verData.UnpackedSHA = []byte{} + } } // this is a new version @@ -229,6 +238,10 @@ func (c *VersionCache) UpdateBinary(ctx context.Context, binary string) (bool, e verData.UnpackedPath = verData.DlPath verData.DlSHA = actualSha + if len(verData.UnpackedSHA) <= 1 && isCustomURL { + verData.UnpackedSHA = actualSha + } + if len(verData.UnpackedSHA) > 1 && !bytes.Equal(verData.UnpackedSHA, actualSha) { //nolint:goerr113 return needRestart, fmt.Errorf( From cbf7d321161e2d7a2cc31dd3c5c23c5662bc89bf Mon Sep 17 00:00:00 2001 From: James Otting Date: Tue, 4 Feb 2025 13:17:43 -0600 Subject: [PATCH 6/6] rc1 --- Makefile | 2 +- agent.go | 10 ++-------- examples/agent-config.jsonc | 2 +- go.mod | 3 ++- go.sum | 6 ++++-- manager.go | 2 +- subsystems/syscfg/logging.go | 17 +++++++++++------ utils/utils.go | 8 ++++++-- version_control.go | 27 +++++++++++++++++++-------- 9 files changed, 47 insertions(+), 30 deletions(-) diff --git a/Makefile b/Makefile index 35edd59..0bcc57e 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ else PATH_VERSION = v$(TAG_VERSION) endif -LDFLAGS = "-s -w -X 'github.com/viamrobotics/agent.Version=${TAG_VERSION}' -X 'github.com/viamrobotics/agent.GitRevision=${GIT_REVISION}'" +LDFLAGS = "-s -w -X 'github.com/viamrobotics/agent/utils.Version=${TAG_VERSION}' -X 'github.com/viamrobotics/agent/utils.GitRevision=${GIT_REVISION}'" TAGS = osusergo,netgo diff --git a/agent.go b/agent.go index 0b8609a..cdd77d4 100644 --- a/agent.go +++ b/agent.go @@ -22,14 +22,8 @@ const ( serviceFileName = "viam-agent.service" ) -var ( - // versions embedded at build time. - Version = "" - GitRevision = "" - - //go:embed viam-agent.service - serviceFileContents []byte -) +//go:embed viam-agent.service +var serviceFileContents []byte // InstallNewVersion runs the newly downloaded binary's Install() for installation of systemd files and the like. func InstallNewVersion(ctx context.Context, logger logging.Logger) (bool, error) { diff --git a/examples/agent-config.jsonc b/examples/agent-config.jsonc index 18935a9..8bfbf8a 100644 --- a/examples/agent-config.jsonc +++ b/examples/agent-config.jsonc @@ -45,7 +45,7 @@ "system_configuration": { "logging_journald_system_max_use_megabytes": 512, // can be -1 to disable "logging_journald_runtime_max_use_megabytes": 512, // can be -1 to disable - "os_auto_upgrade_type": "security" // can be "" to disable + "os_auto_upgrade_type": "security" // can be "" to do nothing, or "disable" to remove customization, "all", or "security" } } } diff --git a/go.mod b/go.mod index 7a9b30f..4438056 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.23.1 require ( github.com/Masterminds/semver/v3 v3.3.0 github.com/Otterverse/gonetworkmanager/v2 v2.2.1 + github.com/gabriel-vasile/mimetype v1.4.8 github.com/google/uuid v1.6.0 github.com/jessevdk/go-flags v1.6.1 github.com/nightlyone/lockfile v1.0.0 @@ -83,7 +84,7 @@ require ( go.uber.org/multierr v1.11.0 // indirect golang.org/x/crypto v0.31.0 // indirect golang.org/x/mod v0.21.0 // indirect - golang.org/x/net v0.30.0 // indirect + golang.org/x/net v0.33.0 // indirect golang.org/x/oauth2 v0.22.0 // indirect golang.org/x/sync v0.10.0 // indirect golang.org/x/text v0.21.0 // indirect diff --git a/go.sum b/go.sum index da8e5b1..91665cd 100644 --- a/go.sum +++ b/go.sum @@ -143,6 +143,8 @@ github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4 github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY= github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw= github.com/fzipp/gocyclo v0.3.1/go.mod h1:DJHO6AUmbdqj2ET4Z9iArSuwWgYDRryYt2wASxc7x3E= +github.com/gabriel-vasile/mimetype v1.4.8 h1:FfZ3gj38NjllZIeJAmMhr+qKL8Wu+nOoI3GqacKw1NM= +github.com/gabriel-vasile/mimetype v1.4.8/go.mod h1:ByKUIKGjh1ODkGM1asKUbQZOLGrPjydw3hYPU2YU9t8= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= github.com/gin-gonic/gin v1.6.3/go.mod h1:75u5sXoLsGZoRN5Sgbi1eraJ4GU3++wFwWzhwvtwp4M= @@ -819,8 +821,8 @@ golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= golang.org/x/net v0.14.0/go.mod h1:PpSgVXXLK0OxS0F31C1/tv6XNguvCrnXIDrFMspZIUI= golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY= golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= -golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= -golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= +golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= +golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= diff --git a/manager.go b/manager.go index 15b5aa2..6c12b99 100644 --- a/manager.go +++ b/manager.go @@ -563,7 +563,7 @@ func (m *Manager) getVersions() *pb.VersionInfo { m.cfgMu.RLock() defer m.cfgMu.RUnlock() vers := &pb.VersionInfo{ - AgentRunning: Version, + AgentRunning: utils.GetVersion(), AgentInstalled: m.cache.AgentVersion(), ViamServerRunning: m.cache.ViamServerRunningVersion(), ViamServerInstalled: m.cache.ViamServerVersion(), diff --git a/subsystems/syscfg/logging.go b/subsystems/syscfg/logging.go index d690033..f8e370a 100644 --- a/subsystems/syscfg/logging.go +++ b/subsystems/syscfg/logging.go @@ -23,7 +23,7 @@ var ( func (s *syscfg) EnforceLogging() error { s.mu.RLock() defer s.mu.RUnlock() - if s.cfg.LoggingJournaldRuntimeMaxUseMegabytes < 0 || s.cfg.LoggingJournaldSystemMaxUseMegabytes < 0 { + if s.cfg.LoggingJournaldRuntimeMaxUseMegabytes < 0 && s.cfg.LoggingJournaldSystemMaxUseMegabytes < 0 { if err := os.Remove(journaldConfPath); err != nil { if errw.Is(err, fs.ErrNotExist) { return nil @@ -48,6 +48,10 @@ func (s *syscfg) EnforceLogging() error { return err } + journalConf := &conf.JournaldFile{ + Journal: conf.JournaldJournalSection{}, + } + persistSize := fmt.Sprintf("%dM", s.cfg.LoggingJournaldSystemMaxUseMegabytes) tempSize := fmt.Sprintf("%dM", s.cfg.LoggingJournaldRuntimeMaxUseMegabytes) @@ -59,11 +63,12 @@ func (s *syscfg) EnforceLogging() error { tempSize = defaultLogLimit } - journalConf := &conf.JournaldFile{ - Journal: conf.JournaldJournalSection{ - SystemMaxUse: sysd.Value{persistSize}, - RuntimeMaxUse: sysd.Value{tempSize}, - }, + if s.cfg.LoggingJournaldSystemMaxUseMegabytes >= 0 { + journalConf.Journal.SystemMaxUse = sysd.Value{persistSize} + } + + if s.cfg.LoggingJournaldRuntimeMaxUseMegabytes >= 0 { + journalConf.Journal.RuntimeMaxUse = sysd.Value{tempSize} } newFileBytes, err := sysd.Marshal(journalConf) diff --git a/utils/utils.go b/utils/utils.go index 83a164b..73a45f8 100644 --- a/utils/utils.go +++ b/utils/utils.go @@ -138,17 +138,21 @@ func DownloadFile(ctx context.Context, rawURL string) (outPath string, errRet er req, err := http.NewRequestWithContext(ctx, http.MethodGet, parsedURL.String(), nil) if err != nil { - return "", errw.Wrap(err, "checking viam-server status") + return "", errw.Wrap(err, "downloading file") } resp, err := http.DefaultClient.Do(req) if err != nil { - return "", errw.Wrap(err, "checking viam-server status") + return "", errw.Wrap(err, "downloading file") } defer func() { errRet = errors.Join(errRet, resp.Body.Close()) }() + if resp.StatusCode < 200 || resp.StatusCode >= 400 { + return "", errw.Errorf("got response '%s' while downloading %s", resp.Status, parsedURL) + } + //nolint:gosec if err := os.MkdirAll(ViamDirs["tmp"], 0o755); err != nil { return "", err diff --git a/version_control.go b/version_control.go index 37ca5fd..bcabb22 100644 --- a/version_control.go +++ b/version_control.go @@ -15,6 +15,7 @@ import ( "sync" "time" + "github.com/gabriel-vasile/mimetype" errw "github.com/pkg/errors" "github.com/viamrobotics/agent/subsystems/viamserver" "github.com/viamrobotics/agent/utils" @@ -52,6 +53,7 @@ type Versions struct { // temporary, so not exported for json/caching runningVersion string + brokenTarget bool } // VersionInfo records details about each version of a subsystem. @@ -125,14 +127,6 @@ func (c *VersionCache) save() error { return err } -//SMURF uneeded? -// Save saves the cached data to disk. -// func (c *VersionCache) Save() error { -// c.mu.Lock() -// defer c.mu.Unlock() -// return c.save() -// } - // Update processes data for the two binaries: agent itself, and viam-server. func (c *VersionCache) Update(cfg *pb.UpdateInfo, binary string) error { c.mu.Lock() @@ -154,6 +148,7 @@ func (c *VersionCache) Update(cfg *pb.UpdateInfo, binary string) error { } data.TargetVersion = newVersion + data.brokenTarget = false info, ok := data.Versions[newVersion] if !ok { info = &VersionInfo{} @@ -187,6 +182,10 @@ func (c *VersionCache) UpdateBinary(ctx context.Context, binary string) (bool, e var needRestart bool + if data.brokenTarget { + return needRestart, nil + } + verData, ok := data.Versions[data.TargetVersion] if !ok { return needRestart, errw.Errorf("version data not found for %s %s", binary, data.TargetVersion) @@ -227,6 +226,9 @@ func (c *VersionCache) UpdateBinary(ctx context.Context, binary string) (bool, e var err error verData.DlPath, err = utils.DownloadFile(ctx, verData.URL) if err != nil { + if isCustomURL { + data.brokenTarget = true + } return needRestart, errw.Wrapf(err, "downloading %s", binary) } actualSha, err := utils.GetFileSum(verData.DlPath) @@ -239,6 +241,15 @@ func (c *VersionCache) UpdateBinary(ctx context.Context, binary string) (bool, e verData.DlSHA = actualSha if len(verData.UnpackedSHA) <= 1 && isCustomURL { + // new custom download, so need to check the file is an executable binary and use locally generated sha + mtype, err := mimetype.DetectFile(verData.UnpackedPath) + if err != nil { + return needRestart, errw.Wrapf(err, "determining file type of download") + } + if !mtype.Is("application/x-elf") && !mtype.Is("application/x-executable") { + data.brokenTarget = true + return needRestart, errw.Errorf("downloaded file is %s, not application/x-elf or application/x-executable, skipping", mtype) + } verData.UnpackedSHA = actualSha }