From 4aa5b29be7201f60dd4f2a45d28fac5b51275e96 Mon Sep 17 00:00:00 2001 From: Petri Autero Date: Tue, 8 Jan 2019 21:45:37 +0200 Subject: [PATCH 1/9] Update README first in RDD -style. --- README.md | 40 +++++++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 4647ac8..0ee4c5d 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # health-checker -A simple HTTP server that will return `200 OK` if the given TCP ports are all successfully accepting connections. +A simple HTTP server that will return `200 OK` if the configured checks are all successful. If any of the checks fail, +it will return `HTTP 504 Gateway Not Found`. ## Motivation @@ -14,15 +15,23 @@ a single TCP port, or an HTTP(S) endpoint. As a result, our use case just isn't We wrote health-checker so that we could run a daemon on the server that reports the true health of the server by attempting to open a TCP connection to more than one port when it receives an inbound HTTP request on the given listener. +Using the `--script` -option, the `health-checker` can be extended to check many other targets. One concrete exeample is monitoring +`ZooKeeper` node status during rolling deployment. Just polling the `ZooKeeper`'s TCP client port doesn't necessarily guarantee +that the node has (re-)joined the cluster. Using the `health-check` with a custom script target, we can +[monitor ZooKeeper](https://zookeeper.apache.org/doc/r3.4.8/zookeeperAdmin.html#sc_monitoring) using the +[4 letter words](https://zookeeper.apache.org/doc/r3.4.8/zookeeperAdmin.html#sc_zkCommands), ensuring we report health back to the +[Load Balancer](https://aws.amazon.com/documentation/elastic-load-balancing/) correctly. + ## How It Works When health-checker is started, it will listen for inbound HTTP requests for any URL on the IP address and port specified by `--listener`. When it receives a request, it will attempt to open TCP connections to each of the ports specified by -an instance of `--port`. If all TCP connections succeed, it will return `HTTP 200 OK`. If any TCP connection fails, it -will return `HTTP 504 Gateway Not Found`. +an instance of `--port` and/or execute the script target specified by `--script`. If all configured checks - all TCP +connections and zero exit status for the script - succeed, it will return `HTTP 200 OK`. If any of the checks fail, +it will return `HTTP 504 Gateway Not Found`. Configure your AWS Health Check to only pass the Health Check on `HTTP 200 OK`. Now when an HTTP Health Check request -comes in, all desired TCP ports will be checked. +comes in, all desired TCP ports will be checked and the script target executed. For stability, we recommend running health-checker under a process supervisor such as [supervisord](http://supervisord.org/) or [systemd](https://www.freedesktop.org/wiki/Software/systemd/) to automatically restart health-checker in the unlikely @@ -46,9 +55,10 @@ health-checker [options] | `--listener` | The IP address and port on which inbound HTTP connections will be accepted. | `0.0.0.0:5000` | `--log-level` | Set the log level to LEVEL. Must be one of: `panic`, `fatal`, `error,` `warning`, `info`, or `debug` | `info` | `--help` | Show the help screen | | +| `--script` | Path to script to run - will PASS if it completes within 5s with a zero exit status | | | `--version` | Show the program's version | | -#### Example +#### Example 1 Run a listener on port 6000 that accepts all inbound HTTP connections for any URL. When the request is received, attempt to open TCP connections to port 5432 and 3306. If both succeed, return `HTTP 200 OK`. If any fails, return `HTTP @@ -58,3 +68,23 @@ attempt to open TCP connections to port 5432 and 3306. If both succeed, return ` health-checker --listener "0.0.0.0:6000" --port 5432 --port 3306 ``` +#### Example 2 + +Run a listener on port 6000 that accepts all inbound HTTP connections for any URL. When the request is received, +attempt to run the script. If exit code is zero, return `HTTP 200 OK`. If any other exit code, return `HTTP +504 Gateway Not Found`. + +``` +health-checker --listener "0.0.0.0:6000" --script /path/to/script.sh +``` + +#### Example 3 + +Run a listener on port 6000 that accepts all inbound HTTP connections for any URL. When the request is received, +attempt to open TCP connection to port 8000 and run the script. If both succeed, return `HTTP 200 OK`. If either fails, return `HTTP +504 Gateway Not Found`. + +``` +health-checker --listener "0.0.0.0:6000" --port 8000 --script /usr/local/bin/zk-health-check.sh +``` + From dcfe3974e19df7afa12daf0ba2e9b11fa3fad823 Mon Sep 17 00:00:00 2001 From: Petri Autero Date: Wed, 9 Jan 2019 17:57:11 +0200 Subject: [PATCH 2/9] Recommended updates to documentation. --- README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 0ee4c5d..9bd128e 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,8 @@ health-checker [options] | `--listener` | The IP address and port on which inbound HTTP connections will be accepted. | `0.0.0.0:5000` | `--log-level` | Set the log level to LEVEL. Must be one of: `panic`, `fatal`, `error,` `warning`, `info`, or `debug` | `info` | `--help` | Show the help screen | | -| `--script` | Path to script to run - will PASS if it completes within 5s with a zero exit status | | +| `--script` | Path to script to run - will pass if it completes within configured timeout with a zero exit status. Specify one or more times. | | +| `--script-timeout` | Timeout, in seconds, to wait for the scripts to exit. Applies to all configured script targets. | `5` | | `--version` | Show the program's version | | #### Example 1 @@ -71,20 +72,20 @@ health-checker --listener "0.0.0.0:6000" --port 5432 --port 3306 #### Example 2 Run a listener on port 6000 that accepts all inbound HTTP connections for any URL. When the request is received, -attempt to run the script. If exit code is zero, return `HTTP 200 OK`. If any other exit code, return `HTTP +attempt to open TCP connection to port 5432 and run the script with a 10 second timout. If TCP connection succeeds and script exit code is zero, return `HTTP 200 OK`. If TCP connection fails or non-zero exit code for the script, return `HTTP 504 Gateway Not Found`. ``` -health-checker --listener "0.0.0.0:6000" --script /path/to/script.sh +health-checker --listener "0.0.0.0:6000" --port 5432 --script /path/to/script.sh --script-timeout 10 ``` #### Example 3 Run a listener on port 6000 that accepts all inbound HTTP connections for any URL. When the request is received, -attempt to open TCP connection to port 8000 and run the script. If both succeed, return `HTTP 200 OK`. If either fails, return `HTTP +attempt to run the configured scripts. If both return exit code zero, return `HTTP 200 OK`. If either returns non-zero exit code, return `HTTP 504 Gateway Not Found`. ``` -health-checker --listener "0.0.0.0:6000" --port 8000 --script /usr/local/bin/zk-health-check.sh +health-checker --listener "0.0.0.0:6000" --script "/usr/local/bin/exhibitor-health-check.sh --exhibitor-port 8080" --script "/usr/local/bin/zk-health-check.sh --zk-port 2191" ``` From 2af902b5f0cacab2b9fe9b38ad047c4ca0c6d736 Mon Sep 17 00:00:00 2001 From: Petri Autero Date: Thu, 10 Jan 2019 03:14:27 +0200 Subject: [PATCH 3/9] Implement options parsing and add tests --- Gopkg.lock | 56 ++++++++++++++++- commands/cli.go | 4 +- commands/flags.go | 50 ++++++++++++---- commands/flags_test.go | 132 +++++++++++++++++++++++++++++++++++++++++ options/options.go | 8 ++- 5 files changed, 232 insertions(+), 18 deletions(-) create mode 100644 commands/flags_test.go diff --git a/Gopkg.lock b/Gopkg.lock index 9c93e2e..5a668a5 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -1,45 +1,95 @@ # This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'. +[[projects]] + digest = "1:0deddd908b6b4b768cfc272c16ee61e7088a60f7fe2f06c547bd3d8e1f8b8e77" + name = "github.com/davecgh/go-spew" + packages = ["spew"] + pruneopts = "" + revision = "8991bc29aa16c548c550c7ff78260e27b9ab7c73" + version = "v1.1.1" + [[projects]] branch = "master" + digest = "1:e424c4e8ca4d251ecec17ae6520040812a204ad934bd5e11a6d8cc088a004e68" name = "github.com/go-errors/errors" packages = ["."] + pruneopts = "" revision = "3afebba5a48dbc89b574d890b6b34d9ee10b4785" [[projects]] + digest = "1:945bd89ef3393fc20dc43aeec26104306f76923b171efb0f3456ffc7f5164314" name = "github.com/gruntwork-io/gruntwork-cli" - packages = ["entrypoint","errors","logging"] + packages = [ + "entrypoint", + "errors", + "logging", + ] + pruneopts = "" revision = "94044eeeb0a48b5e8dd52190fa0d0daba53e157f" version = "v0.1.2" [[projects]] + digest = "1:256484dbbcd271f9ecebc6795b2df8cad4c458dd0f5fd82a8c2fa0c29f233411" + name = "github.com/pmezard/go-difflib" + packages = ["difflib"] + pruneopts = "" + revision = "792786c7400a136282c1664665ae0a8db921c6c2" + version = "v1.0.0" + +[[projects]] + digest = "1:42a42c4bc67bed17f40fddf0f24d4403e25e7b96488456cf4248e6d16659d370" name = "github.com/sirupsen/logrus" packages = ["."] + pruneopts = "" revision = "d682213848ed68c0a260ca37d6dd5ace8423f5ba" version = "v1.0.4" [[projects]] + digest = "1:381bcbeb112a51493d9d998bbba207a529c73dbb49b3fd789e48c63fac1f192c" + name = "github.com/stretchr/testify" + packages = ["assert"] + pruneopts = "" + revision = "ffdc059bfe9ce6a4e144ba849dbedead332c6053" + version = "v1.3.0" + +[[projects]] + digest = "1:e85837cb04b78f61688c6eba93ea9d14f60d611e2aaf8319999b1a60d2dafbfa" name = "github.com/urfave/cli" packages = ["."] + pruneopts = "" revision = "cfb38830724cc34fedffe9a2a29fb54fa9169cd1" version = "v1.20.0" [[projects]] branch = "master" + digest = "1:c3415eeb330bf30a2d8181e516ec79804c198f3d171ab9c9364f29dbe76c05d9" name = "golang.org/x/crypto" packages = ["ssh/terminal"] + pruneopts = "" revision = "94eea52f7b742c7cbe0b03b22f0c4c8631ece122" [[projects]] branch = "master" + digest = "1:6565b083c9a57815d2d05438244bb01a0a62efdc656dea8cfe2700b1e43aa6e9" name = "golang.org/x/sys" - packages = ["unix","windows"] + packages = [ + "unix", + "windows", + ] + pruneopts = "" revision = "53aa286056ef226755cd898109dbcdaba8ac0b81" [solve-meta] analyzer-name = "dep" analyzer-version = 1 - inputs-digest = "693562fc110c90f6529b2fc832c2a8548498219ae04ef05dafbc858eec4e1643" + input-imports = [ + "github.com/gruntwork-io/gruntwork-cli/entrypoint", + "github.com/gruntwork-io/gruntwork-cli/errors", + "github.com/gruntwork-io/gruntwork-cli/logging", + "github.com/sirupsen/logrus", + "github.com/stretchr/testify/assert", + "github.com/urfave/cli", + ] solver-name = "gps-cdcl" solver-version = 1 diff --git a/commands/cli.go b/commands/cli.go index 1a384d8..1a08edb 100644 --- a/commands/cli.go +++ b/commands/cli.go @@ -34,7 +34,7 @@ func CreateCli(version string) *cli.App { app.HelpName = app.Name app.Author = "Gruntwork, Inc. | https://github.com/gruntwork-io/health-checker" app.Version = version - app.Usage = "A simple HTTP server that returns a 200 OK when all given TCP ports accept inbound connections." + app.Usage = "A simple HTTP server that will return 200 OK if the configured checks are all successful." app.Commands = nil app.Flags = defaultFlags app.Action = runHealthChecker @@ -52,7 +52,7 @@ func runHealthChecker(cliContext *cli.Context) error { opts.Logger.Infof("Note: To enable debug mode, set %s to \"true\"", ENV_VAR_NAME_DEBUG_MODE) return err } - if err != nil { + if err != nil { return errors.WithStackTrace(err) } diff --git a/commands/flags.go b/commands/flags.go index 1eaaae6..674bbac 100644 --- a/commands/flags.go +++ b/commands/flags.go @@ -2,21 +2,33 @@ package commands import ( "fmt" - "github.com/gruntwork-io/health-checker/options" "github.com/gruntwork-io/gruntwork-cli/logging" - "github.com/urfave/cli" + "github.com/gruntwork-io/health-checker/options" "github.com/sirupsen/logrus" + "github.com/urfave/cli" "os" "strings" ) const DEFAULT_LISTENER_IP_ADDRESS = "0.0.0.0" const DEFAULT_LISTENER_PORT = 5500 +const DEFAULT_SCRIPT_TIMEOUT_SEC = 5 const ENV_VAR_NAME_DEBUG_MODE = "HEALTH_CHECKER_DEBUG" var portFlag = cli.IntSliceFlag{ - Name: "port", - Usage: fmt.Sprintf("[Required] The port number on which a TCP connection will be attempted. Specify one or more times. Example: 8000"), + Name: "port", + Usage: fmt.Sprintf("[One of port/script Required] The port number on which a TCP connection will be attempted. Specify one or more times. Example: 8000"), +} + +var scriptFlag = cli.StringSliceFlag{ + Name: "script", + Usage: fmt.Sprintf("[One of port/script Required] The path to script that will be run. Specify one or more times. Example: \"/usr/local/bin/health-check.sh --http-port 8000\""), +} + +var scriptTimeoutFlag = cli.IntFlag{ + Name: "script-timeout", + Usage: fmt.Sprintf("[Optional] Timeout, in seconds, to wait for the scripts to complete. Example: 10"), + Value: DEFAULT_SCRIPT_TIMEOUT_SEC, } var listenerFlag = cli.StringFlag{ @@ -33,6 +45,8 @@ var logLevelFlag = cli.StringFlag{ var defaultFlags = []cli.Flag{ portFlag, + scriptFlag, + scriptTimeoutFlag, listenerFlag, logLevelFlag, } @@ -58,19 +72,26 @@ func parseOptions(cliContext *cli.Context) (*options.Options, error) { logger.SetLevel(level) ports := cliContext.IntSlice("port") - if len(ports) == 0 { - return nil, MissingParam(portFlag.Name) + + scripts := cliContext.StringSlice("script") + + if len(ports) == 0 && len(scripts) == 0 { + return nil, OneOfParamsRequired{portFlag.Name, scriptFlag.Name} } + scriptTimeout := cliContext.Int("script-timeout") + listener := cliContext.String("listener") if listener == "" { return nil, MissingParam(listenerFlag.Name) } return &options.Options{ - Ports: ports, - Listener: listener, - Logger: logger, + Ports: ports, + Scripts: scripts, + ScriptTimeout: scriptTimeout, + Listener: listener, + Logger: logger, }, nil } @@ -95,4 +116,13 @@ type MissingParam string func (paramName MissingParam) Error() string { return fmt.Sprintf("Missing required parameter --%s", string(paramName)) -} \ No newline at end of file +} + +type OneOfParamsRequired struct { + param1 string + param2 string +} + +func (paramNames OneOfParamsRequired) Error() string { + return fmt.Sprintf("Missing required parameter, one of --%s / --%s required", string(paramNames.param1), string(paramNames.param2)) +} diff --git a/commands/flags_test.go b/commands/flags_test.go new file mode 100644 index 0000000..16886c5 --- /dev/null +++ b/commands/flags_test.go @@ -0,0 +1,132 @@ +package commands + +import ( + "flag" + "fmt" + "github.com/gruntwork-io/health-checker/options" + "github.com/stretchr/testify/assert" + "github.com/urfave/cli" + "strings" + "testing" +) + +func TestParseChecksFromConfig(t *testing.T) { + t.Parallel() + + testCases := []struct { + name string + args []string + expectedOptions *options.Options + expectedErr string + }{ + { + "no options", + []string{}, + nil, + "Missing required parameter, one of", + }, + { + "invalid log-level", + []string{"--log-level", "notreally"}, + nil, + "The log-level value", + }, + { + "invalid listener", + []string{"--listener"}, + optionsForTest(t, DEFAULT_SCRIPT_TIMEOUT_SEC, []string{}, defaultListener(), []int{8080}), + "Missing required parameter --listener", + }, + { + "single port", + []string{"--port", "8080"}, + optionsForTest(t, DEFAULT_SCRIPT_TIMEOUT_SEC, []string{}, defaultListener(), []int{8080}), + "", + }, + { + "multiple ports", + []string{"--port", "8080", "--port", "8081"}, + optionsForTest(t, DEFAULT_SCRIPT_TIMEOUT_SEC, []string{}, defaultListener(), []int{8080, 8081}), + "", + }, + { + "both port and script", + []string{"--port", "8080", "--script", "\"/usr/local/bin/check.sh 1234\""}, + optionsForTest(t, DEFAULT_SCRIPT_TIMEOUT_SEC, []string{"\"/usr/local/bin/check.sh 1234\""}, defaultListener(), []int{8080}), + "", + }, + { + "single script", + []string{"--script", "/usr/local/bin/check.sh"}, + optionsForTest(t, DEFAULT_SCRIPT_TIMEOUT_SEC, []string{"/usr/local/bin/check.sh"}, defaultListener(), []int{}), + "", + }, + { + "single script with custom timeout", + []string{"--script", "/usr/local/bin/check.sh", "--script-timeout", "11"}, + optionsForTest(t, 11, []string{"/usr/local/bin/check.sh"}, defaultListener(), []int{}), + "", + }, + { + "multiple scripts", + []string{"--script", "/usr/local/bin/check1.sh", "--script", "/usr/local/bin/check2.sh"}, + optionsForTest(t, DEFAULT_SCRIPT_TIMEOUT_SEC, []string{"/usr/local/bin/check1.sh", "/usr/local/bin/check2.sh"}, defaultListener(), []int{}), + "", + }, + } + + for _, testCase := range testCases { + t.Run(testCase.name, func(t *testing.T) { + t.Parallel() + context := getContextForTesting(testCase.args) + + actualOptions, actualErr := parseOptions(context) + + if testCase.expectedErr != "" { + if actualErr == nil { + assert.FailNow(t, "Expected error %v but got nothing.", testCase.expectedErr) + } + assert.True(t, strings.Contains(actualErr.Error(), testCase.expectedErr), "Expected error %v but got error %v", testCase.expectedErr, actualErr) + } else { + assert.Nil(t, actualErr, "Unexpected error: %v", actualErr) + assertOptionsEqual(t, *testCase.expectedOptions, *actualOptions, "For args %v", testCase.args) + } + }) + } + +} + +func getContextForTesting(args []string) *cli.Context { + flagSet := flag.NewFlagSet("test", flag.ContinueOnError) + c := CreateCli("0.0.0") + ctx := cli.NewContext(c, flagSet, nil) + for _, f := range c.Flags { + f.Apply(flagSet) + } + flagSet.Parse(args) + return ctx +} + +func optionsForTest(t *testing.T, scriptTimeout int, scripts []string, listener string, ports []int) *options.Options { + opts := &options.Options{} + opts.ScriptTimeout = scriptTimeout + opts.Scripts = scripts + opts.Listener = listener + opts.Ports = ports + return opts +} + +func assertOptionsEqual(t *testing.T, expected options.Options, actual options.Options, msgAndArgs ...interface{}) { + assert.Equal(t, expected.ScriptTimeout, actual.ScriptTimeout, msgAndArgs...) + assert.Equal(t, expected.Scripts, actual.Scripts, msgAndArgs...) + assert.Equal(t, expected.Listener, actual.Listener, msgAndArgs...) + assert.Equal(t, expected.Ports, actual.Ports, msgAndArgs...) +} + +func defaultListener() string { + return listenerString(DEFAULT_LISTENER_IP_ADDRESS, DEFAULT_LISTENER_PORT) +} + +func listenerString(address string, port int) string { + return fmt.Sprintf("%s:%d", address, port) +} diff --git a/options/options.go b/options/options.go index 4e34f21..ee687a6 100644 --- a/options/options.go +++ b/options/options.go @@ -4,7 +4,9 @@ import "github.com/sirupsen/logrus" // The options accepted by this CLI tool type Options struct { - Ports []int - Listener string - Logger *logrus.Logger + Ports []int + Scripts []string + ScriptTimeout int + Listener string + Logger *logrus.Logger } From d90076691db82e14ed3487c8651df3d4a9b79dfb Mon Sep 17 00:00:00 2001 From: Petri Autero Date: Thu, 10 Jan 2019 09:06:53 +0200 Subject: [PATCH 4/9] More tests --- commands/flags_test.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/commands/flags_test.go b/commands/flags_test.go index 16886c5..10058de 100644 --- a/commands/flags_test.go +++ b/commands/flags_test.go @@ -37,6 +37,12 @@ func TestParseChecksFromConfig(t *testing.T) { optionsForTest(t, DEFAULT_SCRIPT_TIMEOUT_SEC, []string{}, defaultListener(), []int{8080}), "Missing required parameter --listener", }, + { + "invalid listener", + []string{"--listener", "1234", "--port", "4321"}, + optionsForTest(t, DEFAULT_SCRIPT_TIMEOUT_SEC, []string{}, listenerString(DEFAULT_LISTENER_IP_ADDRESS, 1234), []int{4321}), + "", + }, { "single port", []string{"--port", "8080"}, From 9de20d3669d8ec3220f32469b59abd773002fe54 Mon Sep 17 00:00:00 2001 From: Petri Autero Date: Thu, 10 Jan 2019 15:17:38 +0200 Subject: [PATCH 5/9] Add script execution target and corresponding tests --- README.md | 2 + commands/cli.go | 8 ++- commands/flags.go | 3 +- commands/flags_test.go | 52 +++++++------- options/options.go | 26 ++++++- server/server.go | 53 ++++++++++---- server/server_test.go | 156 +++++++++++++++++++++++++++++++++++++++++ test/main_test.go | 9 --- test/test_util.go | 31 ++++++++ 9 files changed, 285 insertions(+), 55 deletions(-) create mode 100644 server/server_test.go delete mode 100644 test/main_test.go create mode 100644 test/test_util.go diff --git a/README.md b/README.md index 9bd128e..a47fe28 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,8 @@ health-checker [options] | `--script-timeout` | Timeout, in seconds, to wait for the scripts to exit. Applies to all configured script targets. | `5` | | `--version` | Show the program's version | | +If you execute a shell script, ensure you have a `shebang` line in your script, otherwise the script will fail with an `exec format error`. + #### Example 1 Run a listener on port 6000 that accepts all inbound HTTP connections for any URL. When the request is received, diff --git a/commands/cli.go b/commands/cli.go index 1a08edb..dff374a 100644 --- a/commands/cli.go +++ b/commands/cli.go @@ -55,8 +55,12 @@ func runHealthChecker(cliContext *cli.Context) error { if err != nil { return errors.WithStackTrace(err) } - - opts.Logger.Infof("The Health Check will attempt to connect to the following ports via TCP: %v", opts.Ports) + if len(opts.Ports) > 0 { + opts.Logger.Infof("The Health Check will attempt to connect to the following ports via TCP: %v", opts.Ports) + } + if len(opts.Scripts) > 0 { + opts.Logger.Infof("The Health Check will attempt to run the following scripts: %v", opts.Scripts) + } opts.Logger.Infof("Listening on Port %s...", opts.Listener) err = server.StartHttpServer(opts) if err != nil { diff --git a/commands/flags.go b/commands/flags.go index 674bbac..4f6d28e 100644 --- a/commands/flags.go +++ b/commands/flags.go @@ -73,7 +73,8 @@ func parseOptions(cliContext *cli.Context) (*options.Options, error) { ports := cliContext.IntSlice("port") - scripts := cliContext.StringSlice("script") + scriptArr := cliContext.StringSlice("script") + scripts := options.ParseScripts(scriptArr) if len(ports) == 0 && len(scripts) == 0 { return nil, OneOfParamsRequired{portFlag.Name, scriptFlag.Name} diff --git a/commands/flags_test.go b/commands/flags_test.go index 10058de..bf09b79 100644 --- a/commands/flags_test.go +++ b/commands/flags_test.go @@ -2,8 +2,8 @@ package commands import ( "flag" - "fmt" "github.com/gruntwork-io/health-checker/options" + "github.com/gruntwork-io/health-checker/test" "github.com/stretchr/testify/assert" "github.com/urfave/cli" "strings" @@ -34,49 +34,49 @@ func TestParseChecksFromConfig(t *testing.T) { { "invalid listener", []string{"--listener"}, - optionsForTest(t, DEFAULT_SCRIPT_TIMEOUT_SEC, []string{}, defaultListener(), []int{8080}), + createOptionsForTest(t, DEFAULT_SCRIPT_TIMEOUT_SEC, []string{}, defaultListener(), []int{8080}), "Missing required parameter --listener", }, { "invalid listener", []string{"--listener", "1234", "--port", "4321"}, - optionsForTest(t, DEFAULT_SCRIPT_TIMEOUT_SEC, []string{}, listenerString(DEFAULT_LISTENER_IP_ADDRESS, 1234), []int{4321}), + createOptionsForTest(t, DEFAULT_SCRIPT_TIMEOUT_SEC, []string{}, test.ListenerString(DEFAULT_LISTENER_IP_ADDRESS, 1234), []int{4321}), "", }, { "single port", []string{"--port", "8080"}, - optionsForTest(t, DEFAULT_SCRIPT_TIMEOUT_SEC, []string{}, defaultListener(), []int{8080}), + createOptionsForTest(t, DEFAULT_SCRIPT_TIMEOUT_SEC, []string{}, defaultListener(), []int{8080}), "", }, { "multiple ports", []string{"--port", "8080", "--port", "8081"}, - optionsForTest(t, DEFAULT_SCRIPT_TIMEOUT_SEC, []string{}, defaultListener(), []int{8080, 8081}), + createOptionsForTest(t, DEFAULT_SCRIPT_TIMEOUT_SEC, []string{}, defaultListener(), []int{8080, 8081}), "", }, { "both port and script", []string{"--port", "8080", "--script", "\"/usr/local/bin/check.sh 1234\""}, - optionsForTest(t, DEFAULT_SCRIPT_TIMEOUT_SEC, []string{"\"/usr/local/bin/check.sh 1234\""}, defaultListener(), []int{8080}), + createOptionsForTest(t, DEFAULT_SCRIPT_TIMEOUT_SEC, []string{"\"/usr/local/bin/check.sh 1234\""}, defaultListener(), []int{8080}), "", }, { "single script", []string{"--script", "/usr/local/bin/check.sh"}, - optionsForTest(t, DEFAULT_SCRIPT_TIMEOUT_SEC, []string{"/usr/local/bin/check.sh"}, defaultListener(), []int{}), + createOptionsForTest(t, DEFAULT_SCRIPT_TIMEOUT_SEC, []string{"/usr/local/bin/check.sh"}, defaultListener(), []int{}), "", }, { "single script with custom timeout", []string{"--script", "/usr/local/bin/check.sh", "--script-timeout", "11"}, - optionsForTest(t, 11, []string{"/usr/local/bin/check.sh"}, defaultListener(), []int{}), + createOptionsForTest(t, 11, []string{"/usr/local/bin/check.sh"}, defaultListener(), []int{}), "", }, { "multiple scripts", []string{"--script", "/usr/local/bin/check1.sh", "--script", "/usr/local/bin/check2.sh"}, - optionsForTest(t, DEFAULT_SCRIPT_TIMEOUT_SEC, []string{"/usr/local/bin/check1.sh", "/usr/local/bin/check2.sh"}, defaultListener(), []int{}), + createOptionsForTest(t, DEFAULT_SCRIPT_TIMEOUT_SEC, []string{"/usr/local/bin/check1.sh", "/usr/local/bin/check2.sh"}, defaultListener(), []int{}), "", }, } @@ -84,7 +84,7 @@ func TestParseChecksFromConfig(t *testing.T) { for _, testCase := range testCases { t.Run(testCase.name, func(t *testing.T) { t.Parallel() - context := getContextForTesting(testCase.args) + context := createContextForTesting(testCase.args) actualOptions, actualErr := parseOptions(context) @@ -102,7 +102,18 @@ func TestParseChecksFromConfig(t *testing.T) { } -func getContextForTesting(args []string) *cli.Context { +func defaultListener() string { + return test.ListenerString(DEFAULT_LISTENER_IP_ADDRESS, DEFAULT_LISTENER_PORT) +} + +func assertOptionsEqual(t *testing.T, expected options.Options, actual options.Options, msgAndArgs ...interface{}) { + assert.Equal(t, expected.ScriptTimeout, actual.ScriptTimeout, msgAndArgs...) + assert.Equal(t, expected.Scripts, actual.Scripts, msgAndArgs...) + assert.Equal(t, expected.Listener, actual.Listener, msgAndArgs...) + assert.Equal(t, expected.Ports, actual.Ports, msgAndArgs...) +} + +func createContextForTesting(args []string) *cli.Context { flagSet := flag.NewFlagSet("test", flag.ContinueOnError) c := CreateCli("0.0.0") ctx := cli.NewContext(c, flagSet, nil) @@ -113,26 +124,11 @@ func getContextForTesting(args []string) *cli.Context { return ctx } -func optionsForTest(t *testing.T, scriptTimeout int, scripts []string, listener string, ports []int) *options.Options { +func createOptionsForTest(t *testing.T, scriptTimeout int, scripts []string, listener string, ports []int) *options.Options { opts := &options.Options{} opts.ScriptTimeout = scriptTimeout - opts.Scripts = scripts + opts.Scripts = options.ParseScripts(scripts) opts.Listener = listener opts.Ports = ports return opts } - -func assertOptionsEqual(t *testing.T, expected options.Options, actual options.Options, msgAndArgs ...interface{}) { - assert.Equal(t, expected.ScriptTimeout, actual.ScriptTimeout, msgAndArgs...) - assert.Equal(t, expected.Scripts, actual.Scripts, msgAndArgs...) - assert.Equal(t, expected.Listener, actual.Listener, msgAndArgs...) - assert.Equal(t, expected.Ports, actual.Ports, msgAndArgs...) -} - -func defaultListener() string { - return listenerString(DEFAULT_LISTENER_IP_ADDRESS, DEFAULT_LISTENER_PORT) -} - -func listenerString(address string, port int) string { - return fmt.Sprintf("%s:%d", address, port) -} diff --git a/options/options.go b/options/options.go index ee687a6..ea9c624 100644 --- a/options/options.go +++ b/options/options.go @@ -1,12 +1,34 @@ package options -import "github.com/sirupsen/logrus" +import ( + "github.com/sirupsen/logrus" + "strings" +) // The options accepted by this CLI tool type Options struct { Ports []int - Scripts []string + Scripts []Script ScriptTimeout int Listener string Logger *logrus.Logger } + +type Script struct { + Name string + Args []string +} + +func ParseScripts(scriptStrings []string) []Script { + rv := []Script{} + for _, s := range scriptStrings { + commandArr := strings.Split(s, " ") + scriptName := commandArr[0] + scriptParams := []string{} + if len(commandArr) > 1 { + scriptParams = commandArr[1:] + } + rv = append(rv, Script{scriptName, scriptParams}) + } + return rv +} diff --git a/server/server.go b/server/server.go index 7cb5774..fad532f 100644 --- a/server/server.go +++ b/server/server.go @@ -1,23 +1,25 @@ package server import ( - "net/http" - "net" + "context" "fmt" + "github.com/gruntwork-io/gruntwork-cli/errors" + "github.com/gruntwork-io/health-checker/options" + "net" + "net/http" + "os/exec" "sync" "time" - "github.com/gruntwork-io/health-checker/options" - "github.com/gruntwork-io/gruntwork-cli/errors" ) type httpResponse struct { StatusCode int - Body string + Body string } func StartHttpServer(opts *options.Options) error { http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { - resp := checkTcpPorts(opts) + resp := runChecks(opts) err := writeHttpResponse(w, resp) if err != nil { opts.Logger.Error("Failed to send HTTP response. Exiting.") @@ -33,11 +35,11 @@ func StartHttpServer(opts *options.Options) error { } // Check that we can open a TPC connection to all the ports in opts.Ports -func checkTcpPorts(opts *options.Options) *httpResponse { +func runChecks(opts *options.Options) *httpResponse { logger := opts.Logger logger.Infof("Received inbound request. Beginning health checks...") - allPortsValid := true + allChecksOk := true var waitGroup = sync.WaitGroup{} @@ -47,7 +49,7 @@ func checkTcpPorts(opts *options.Options) *httpResponse { err := attemptTcpConnection(port, opts) if err != nil { logger.Warnf("TCP connection to port %d FAILED: %s", port, err) - allPortsValid = false + allChecksOk = false } else { logger.Infof("TCP connection to port %d successful", port) } @@ -56,14 +58,40 @@ func checkTcpPorts(opts *options.Options) *httpResponse { }(port) } + for _, script := range opts.Scripts { + waitGroup.Add(1) + go func(script options.Script) { + logger.Infof("Executing '%v' with a timeout of %v seconds...", script, opts.ScriptTimeout) + + timeout := time.Second * time.Duration(opts.ScriptTimeout) + + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + cmd := exec.CommandContext(ctx, script.Name, script.Args...) + + output, err := cmd.Output() + + if err != nil { + logger.Warnf("Script %v FAILED: %s", script.Name, err) + logger.Warnf("Command output: %s", output) + allChecksOk = false + } else { + logger.Infof("Script %v successful", script) + } + + waitGroup.Done() + }(script) + } + waitGroup.Wait() - if allPortsValid { + if allChecksOk { logger.Infof("All health checks passed. Returning HTTP 200 response.\n") - return &httpResponse{ StatusCode: http.StatusOK, Body: "OK" } + return &httpResponse{StatusCode: http.StatusOK, Body: "OK"} } else { logger.Infof("At least one health check failed. Returning HTTP 504 response.\n") - return &httpResponse{ StatusCode: http.StatusGatewayTimeout, Body: "At least one health check failed" } + return &httpResponse{StatusCode: http.StatusGatewayTimeout, Body: "At least one health check failed"} } } @@ -93,4 +121,3 @@ func writeHttpResponse(w http.ResponseWriter, resp *httpResponse) error { return nil } - diff --git a/server/server_test.go b/server/server_test.go new file mode 100644 index 0000000..a17cdb5 --- /dev/null +++ b/server/server_test.go @@ -0,0 +1,156 @@ +package server + +import ( + "github.com/gruntwork-io/gruntwork-cli/logging" + "github.com/gruntwork-io/health-checker/options" + "github.com/gruntwork-io/health-checker/test" + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/assert" + "net" + "os" + "testing" +) + +func TestParseChecksFromConfig(t *testing.T) { + // Will *not* run parallel because we're opening random tcp ports + // and want to avoid port clashes + testCases := []struct { + name string + numports int + failport bool + scripts []string + scriptTimeout int + expectedStatus int + }{ + { + "port check", + 1, + false, + []string{}, + 5, + 200, + }, + { + "multiport check", + 3, + false, + []string{}, + 5, + 200, + }, + { + "multiport check one fails", + 3, + true, + []string{}, + 5, + 504, + }, + { + "script ok", + 0, + false, + []string{"echo 'hello'"}, + 5, + 200, + }, + { + "multi script ok", + 0, + false, + []string{"echo 'hello1'", "echo 'hello2'"}, + 5, + 200, + }, + } + + for _, testCase := range testCases { + t.Run(testCase.name, func(t *testing.T) { + ports, err := test.GetFreePorts(1 + testCase.numports) + + if err != nil { + assert.FailNow(t, "Failed to get free ports: %v", err.Error()) + } + + listenerString := test.ListenerString(test.DEFAULT_LISTENER_ADDRESS, ports[0]) + + checkPorts := []int{} + listenPorts := []int{} + + // If we're monitoring tcp ports, prepare them + if testCase.numports > 0 { + checkPorts = ports[1:] + listenPorts = make([]int, len(checkPorts)) + copy(listenPorts, checkPorts) + + // If we want to fail one check, remove the first port from the listen ports + // So the health-check cannot connect + if testCase.failport { + listenPorts = listenPorts[1:] + } + } + + listeners := []net.Listener{} + + for _, port := range listenPorts { + t.Logf("Creating listener for port %d", port) + l, err := net.Listen("tcp", test.ListenerString(test.DEFAULT_LISTENER_ADDRESS, port)) + if err != nil { + t.Logf("Error creating listener for port %d: %s", port, err.Error()) + assert.FailNow(t, "Failed to start listening: %s", err.Error()) + } + + listeners = append(listeners, l) + + // Separate goroutine for the tcp listeners + go handleRequests(t, l) + } + + defer closeListeners(t, listeners) + + opts := createOptionsForTest(t, testCase.scriptTimeout, testCase.scripts, listenerString, checkPorts) + + // Run the checks and verify the status code + response := runChecks(opts) + assert.True(t, testCase.expectedStatus == response.StatusCode, "Got expected status code") + }) + } + +} + +func closeListeners(t *testing.T, listeners []net.Listener) { + for _, l := range listeners { + err := l.Close() + if err != nil { + t.Fatal("Failed to close listener: ", err) + } + } +} + +func handleRequests(t *testing.T, l net.Listener) { + for { + // Listen for an incoming connection. + l.Accept() + // We don't log these when testing because we're forcibly closing the socket + // from the outside. If you're debugging and wish to enable the logging, + // uncomment the lines below + //_, err := l.Accept() + //if err != nil { + // t.Logf("Error accepting: %s", err.Error()) + //} + } +} + +func createOptionsForTest(t *testing.T, scriptTimeout int, scripts []string, listener string, ports []int) *options.Options { + logger := logging.GetLogger("health-checker") + logger.Out = os.Stdout + logger.Level = logrus.InfoLevel + + opts := &options.Options{} + opts.Logger = logger + opts.ScriptTimeout = scriptTimeout + opts.Scripts = options.ParseScripts(scripts) + opts.Listener = listener + opts.Ports = ports + return opts +} diff --git a/test/main_test.go b/test/main_test.go deleted file mode 100644 index b7cadde..0000000 --- a/test/main_test.go +++ /dev/null @@ -1,9 +0,0 @@ -package test - -import ( - "testing" -) - -func TestPlaceholder(t *testing.T) { - t.Log("Add tests here!") -} diff --git a/test/test_util.go b/test/test_util.go new file mode 100644 index 0000000..1833005 --- /dev/null +++ b/test/test_util.go @@ -0,0 +1,31 @@ +package test + +import ( + "fmt" + "net" +) + +const DEFAULT_LISTENER_ADDRESS = "0.0.0.0" + +// GetFreePort asks the kernel for free open ports that are ready to use. +func GetFreePorts(count int) ([]int, error) { + var ports []int + for i := 0; i < count; i++ { + addr, err := net.ResolveTCPAddr("tcp", "localhost:0") + if err != nil { + return nil, err + } + + l, err := net.ListenTCP("tcp", addr) + if err != nil { + return nil, err + } + defer l.Close() + ports = append(ports, l.Addr().(*net.TCPAddr).Port) + } + return ports, nil +} + +func ListenerString(address string, port int) string { + return fmt.Sprintf("%s:%d", address, port) +} From 87fa1cbfb9211f449b9287e40a0f37830a6210eb Mon Sep 17 00:00:00 2001 From: Petri Autero Date: Thu, 10 Jan 2019 15:23:36 +0200 Subject: [PATCH 6/9] Add more permutations to tests --- server/server_test.go | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/server/server_test.go b/server/server_test.go index a17cdb5..a18e878 100644 --- a/server/server_test.go +++ b/server/server_test.go @@ -54,6 +54,14 @@ func TestParseChecksFromConfig(t *testing.T) { 5, 200, }, + { + "script fail", + 0, + false, + []string{"lskdf"}, + 5, + 504, + }, { "multi script ok", 0, @@ -62,6 +70,22 @@ func TestParseChecksFromConfig(t *testing.T) { 5, 200, }, + { + "multi script one fail", + 0, + false, + []string{"echo 'hello1'", "lskdf"}, + 5, + 504, + }, + { + "script and port", + 1, + false, + []string{"echo 'hello1'"}, + 5, + 200, + }, } for _, testCase := range testCases { From 770a126ff56d81b2855a4ceab7a8a3065e2c84bf Mon Sep 17 00:00:00 2001 From: Petri Autero Date: Thu, 10 Jan 2019 16:26:06 +0200 Subject: [PATCH 7/9] Run tests from root instead --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 11e31be..a5b6132 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -30,7 +30,7 @@ jobs: - checkout - attach_workspace: at: /go/src/github.com/gruntwork-io/health-checker - - run: run-go-tests --circle-ci-2 --path test + - run: run-go-tests --circle-ci-2 build: <<: *defaults From 2286fe7b140c1cec067ba40f6780575743a842de Mon Sep 17 00:00:00 2001 From: Petri Autero Date: Thu, 10 Jan 2019 16:33:48 +0200 Subject: [PATCH 8/9] Fix a typo in one of the test names --- commands/flags_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commands/flags_test.go b/commands/flags_test.go index bf09b79..6a8f3cb 100644 --- a/commands/flags_test.go +++ b/commands/flags_test.go @@ -38,7 +38,7 @@ func TestParseChecksFromConfig(t *testing.T) { "Missing required parameter --listener", }, { - "invalid listener", + "valid listener", []string{"--listener", "1234", "--port", "4321"}, createOptionsForTest(t, DEFAULT_SCRIPT_TIMEOUT_SEC, []string{}, test.ListenerString(DEFAULT_LISTENER_IP_ADDRESS, 1234), []int{4321}), "", From 02fe94cd515fa718eb68316ec3545510ea17a210 Mon Sep 17 00:00:00 2001 From: Petri Autero Date: Fri, 11 Jan 2019 09:31:11 +0200 Subject: [PATCH 9/9] Small improvements --- README.md | 2 +- server/server.go | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index a47fe28..98a6ef1 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ a single TCP port, or an HTTP(S) endpoint. As a result, our use case just isn't We wrote health-checker so that we could run a daemon on the server that reports the true health of the server by attempting to open a TCP connection to more than one port when it receives an inbound HTTP request on the given listener. -Using the `--script` -option, the `health-checker` can be extended to check many other targets. One concrete exeample is monitoring +Using the `--script` -option, the `health-checker` can be extended to check many other targets. One concrete example is monitoring `ZooKeeper` node status during rolling deployment. Just polling the `ZooKeeper`'s TCP client port doesn't necessarily guarantee that the node has (re-)joined the cluster. Using the `health-check` with a custom script target, we can [monitor ZooKeeper](https://zookeeper.apache.org/doc/r3.4.8/zookeeperAdmin.html#sc_monitoring) using the diff --git a/server/server.go b/server/server.go index fad532f..9f2083d 100644 --- a/server/server.go +++ b/server/server.go @@ -61,11 +61,15 @@ func runChecks(opts *options.Options) *httpResponse { for _, script := range opts.Scripts { waitGroup.Add(1) go func(script options.Script) { + + defer waitGroup.Done() + logger.Infof("Executing '%v' with a timeout of %v seconds...", script, opts.ScriptTimeout) timeout := time.Second * time.Duration(opts.ScriptTimeout) ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() cmd := exec.CommandContext(ctx, script.Name, script.Args...) @@ -79,8 +83,6 @@ func runChecks(opts *options.Options) *httpResponse { } else { logger.Infof("Script %v successful", script) } - - waitGroup.Done() }(script) }