Skip to content

Commit

Permalink
No more sending on closed channels during checkscontroller shutdown (#…
Browse files Browse the repository at this point in the history
…245)

* feat: added metrics to targetmanager

First metric announces registration state.

* test: added metrics to the tests

* fix: race condition when shutting down checks

When teh sparrow check controller would shut down, it would iterate over
the actual slice of the checks, shutdown each check and then procceed to
delete said check from the slice.

Since the shutting down procedure is instant, there was a race condition
that would delete a wrong check from the slice and then the same
shutting down loop would try and shutdown the same check again.

Just returning a copy of the slice resolves this problem, as the
iteration is now done on the copy only. A more sophisticated deletion
routine for the checks slice could be another way to handle this, but it
would probably increase the complexity of the checks and
checkscontroller structs.

* chore: shutdown message on end

* test: added validating test case

Signed-off-by: Bruno Bressi <[email protected]>

* chore: marked function as helper

* test: added test for the controller's update function

This test proves that the shallow clone works as intended and returns a clone of the slice, where the original references can still be used and updated.

* chore: bumped golangci lint to latest version

This should fix the bodyclose linting remarks

Signed-off-by: Bruno Bressi <[email protected]>

---------

Signed-off-by: Bruno Bressi <[email protected]>
  • Loading branch information
puffitos authored Jan 27, 2025
1 parent 9019962 commit b5337ce
Show file tree
Hide file tree
Showing 8 changed files with 162 additions and 9 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pre-commit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:

- name: Install go toolchain for pre-commit
run: |
curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v1.60.3
curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v1.63.4
go install mvdan.cc/gofumpt@latest
go install github.com/matryer/moq@latest
go install github.com/norwoodj/helm-docs/cmd/helm-docs@latest
Expand Down
7 changes: 4 additions & 3 deletions cmd/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,17 +86,18 @@ func run() func(cmd *cobra.Command, args []string) error {

s := sparrow.New(cfg)
cErr := make(chan error, 1)
log.Info("Running sparrow")
log.InfoContext(ctx, "Running sparrow")
go func() {
cErr <- s.Run(ctx)
}()

select {
case <-sigChan:
log.Info("Signal received, shutting down")
log.InfoContext(ctx, "Signal received, shutting down")
cancel()
<-cErr
case err := <-cErr:
case err = <-cErr:
log.InfoContext(ctx, "Sparrow was shut down")
return err
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/api/api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ func TestAPI_Run(t *testing.T) {
rec := httptest.NewRecorder()
a.router.ServeHTTP(rec, req)

if status := rec.Result().StatusCode; status != tt.want.status { //nolint:bodyclose // closed in defer below
if status := rec.Result().StatusCode; status != tt.want.status {
t.Errorf("Handler for route %s returned wrong status code: got %v want %v", tt.want.path, status, tt.want.status)
}

Expand Down
3 changes: 2 additions & 1 deletion pkg/checks/runtime/checks.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package runtime

import (
"slices"
"sync"

"github.com/telekom/sparrow/pkg/checks"
Expand Down Expand Up @@ -53,5 +54,5 @@ func (c *Checks) Delete(check checks.Check) {
func (c *Checks) Iter() []checks.Check {
c.mu.RLock()
defer c.mu.RUnlock()
return c.checks
return slices.Clone(c.checks)
}
147 changes: 147 additions & 0 deletions pkg/sparrow/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,58 @@ func TestRun_ContextCancellation(t *testing.T) {
}
}

// TestChecksController_Shutdown tests the shutdown of the ChecksController
// when none, one or multiple checks are registered. The test checks that after shutdown no
// checks are registered anymore (the checks slice is empty) and that the done channel is closed.
func TestChecksController_Shutdown(t *testing.T) {
tests := []struct {
name string
checks []checks.Check
}{
{
name: "no checks registered",
checks: nil,
},
{
name: "one check registered",
checks: []checks.Check{newMockCheck(t, "mockCheck")},
},
{
name: "multiple checks registered",
checks: []checks.Check{
newMockCheck(t, "mockCheck1"),
newMockCheck(t, "mockCheck2"),
newMockCheck(t, "mockCheck3"),
newMockCheck(t, "mockCheck4"),
},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
cc := NewChecksController(db.NewInMemory(), metrics.New(metrics.Config{}))

if tt.checks != nil {
for _, check := range tt.checks {
cc.RegisterCheck(context.Background(), check)
}
}

cc.Shutdown(context.Background())

select {
case <-cc.done:
if len(cc.checks.Iter()) != 0 {
t.Errorf("Expected no checks to be registered")
}
return
case <-time.After(time.Second):
t.Fatal("Expected done channel to be closed")
}
})
}
}

func TestChecksController_Reconcile(t *testing.T) {
ctx, cancel := logger.NewContextWithLogger(context.Background())
defer cancel()
Expand Down Expand Up @@ -235,6 +287,74 @@ func TestChecksController_Reconcile(t *testing.T) {
}
}

// TestChecksController_Reconcile_Update tests the update of the checks
// when the runtime configuration changes.
func TestChecksController_Reconcile_Update(t *testing.T) {
ctx, cancel := logger.NewContextWithLogger(context.Background())
defer cancel()

tests := []struct {
name string
checks []checks.Check
newRuntimeConfig runtime.Config
}{
{
name: "update health check",
checks: []checks.Check{
health.NewCheck(),
},
newRuntimeConfig: runtime.Config{
Health: &health.Config{
Targets: []string{"https://new.com"},
Interval: 200 * time.Millisecond,
Timeout: 1000 * time.Millisecond,
},
},
},
{
name: "update health & latency check",
checks: []checks.Check{
health.NewCheck(),
latency.NewCheck(),
},
newRuntimeConfig: runtime.Config{
Health: &health.Config{
Targets: []string{"https://new.com"},
Interval: 200 * time.Millisecond,
Timeout: 1000 * time.Millisecond,
},
Latency: &latency.Config{
Targets: []string{"https://new.com"},
Interval: 200 * time.Millisecond,
Timeout: 1000 * time.Millisecond,
},
},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
cc := NewChecksController(db.NewInMemory(), metrics.New(metrics.Config{}))
for _, c := range tt.checks {
cc.checks.Add(c)
}

cc.Reconcile(ctx, tt.newRuntimeConfig)

for _, c := range cc.checks.Iter() {
switch c.GetConfig().For() {
case health.CheckName:
hc := c.(*health.Health)
assert.Equal(t, tt.newRuntimeConfig.Health.Targets, hc.GetConfig().(*health.Config).Targets)
case latency.CheckName:
lc := c.(*latency.Latency)
assert.Equal(t, tt.newRuntimeConfig.Latency.Targets, lc.GetConfig().(*latency.Config).Targets)
}
}
})
}
}

func TestChecksController_RegisterCheck(t *testing.T) {
tests := []struct {
name string
Expand Down Expand Up @@ -379,3 +499,30 @@ func TestGenerateCheckSpecs(t *testing.T) {
})
}
}

// newMockCheck creates a new mock check with the given name.
func newMockCheck(t *testing.T, name string) *checks.CheckMock {
t.Helper()
return &checks.CheckMock{
GetMetricCollectorsFunc: func() []prometheus.Collector {
return []prometheus.Collector{
prometheus.NewCounter(prometheus.CounterOpts{
Name: fmt.Sprintf("%s_mock_metric", name),
}),
}
},
NameFunc: func() string {
return name
},
RemoveLabelledMetricsFunc: nil,
RunFunc: func(ctx context.Context, cResult chan checks.ResultDTO) error {
t.Logf("Run called for check %s", name)
return nil
},
SchemaFunc: nil,
ShutdownFunc: func() {
t.Logf("Shutdown called for check %s", name)
},
UpdateConfigFunc: nil,
}
}
2 changes: 1 addition & 1 deletion pkg/sparrow/handlers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ func TestSparrow_handleCheckMetrics(t *testing.T) {
}

s.handleCheckMetrics(w, r)
resp := w.Result() //nolint:bodyclose
resp := w.Result()
body, _ := io.ReadAll(resp.Body)

if tt.wantCode == http.StatusOK {
Expand Down
5 changes: 3 additions & 2 deletions pkg/sparrow/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ func (s *Sparrow) Run(ctx context.Context) error {
s.shutdown(ctx)
}
case <-s.cDone:
log.InfoContext(ctx, "Sparrow was shut down")
return fmt.Errorf("sparrow was shut down")
}
}
Expand Down Expand Up @@ -181,7 +182,7 @@ func (s *Sparrow) shutdown(ctx context.Context) {
defer cancel()

s.shutOnce.Do(func() {
log.Info("Shutting down sparrow gracefully")
log.InfoContext(ctx, "Shutting down sparrow")
var sErrs ErrShutdown
if s.tarMan != nil {
sErrs.errTarMan = s.tarMan.Shutdown(ctx)
Expand All @@ -192,7 +193,7 @@ func (s *Sparrow) shutdown(ctx context.Context) {
s.controller.Shutdown(ctx)

if sErrs.HasError() {
log.Error("Failed to shutdown gracefully", "contextError", errC, "errors", sErrs)
log.ErrorContext(ctx, "Failed to shutdown gracefully", "contextError", errC, "errors", sErrs)
}

// Signal that shutdown is complete
Expand Down
3 changes: 3 additions & 0 deletions pkg/sparrow/run_errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,15 @@

package sparrow

// ErrShutdown holds any errors that may
// have occurred during shutdown of the Sparrow
type ErrShutdown struct {
errAPI error
errTarMan error
errMetrics error
}

// HasError returns true if any of the errors are set
func (e ErrShutdown) HasError() bool {
return e.errAPI != nil || e.errTarMan != nil || e.errMetrics != nil
}

0 comments on commit b5337ce

Please sign in to comment.