fxtest: Enforceable Timeouts on Lifecycle (#1203)

In Fx, if starting/stopping the application takes longer than the context's timeout, the fx.App will bail early, regardless of the status of the currently running hooks. This prevents stalling an application when hooks (accidentally) block forever. In order to test hook behavior, Fx provides fxtest.Lifecycle to interact with. This Lifecycle is a simple wrapper around the actual fx Lifecycle type, meaning it does not check for timeout and bail early like fx.App does. This is an issue because: * It allows for long-running hooks (which should be considered bugs) to pass tests. * It allows for tests to completely stall for hooks that block forever. See #1180 for more details. This PR adds an option that can be passed to `fxtest.NewLifecycle` to cause it to immediately fail when context expires, similar to fx.App. ```go lc := fxtest.NewLifecycle(fxtest.EnforceTimeout(true)) lc.Append(fx.StartHook(func() { for {} })) ctx, _ := context.WithTimeout(context.Background(), time.Second) err := lc.Start(ctx) // Will return deadline exceeded after a second ``` This PR doesn't provide a way to test timeouts using `RequireStart` and `RequireStop`. However, since those don't take contexts anyways, my assumption is that usage of those APIs represents an intentional decision to not care about timeouts by the test writer. However, if people feel differently, we can instead do something like expose two new APIs `RequireStartWithContext(ctx)` and `RequireStopWithContext(ctx)` or something (names obviously up for discussion).
uber-go · May 29, 2024 · 9e6f6c2 · 9e6f6c2
1 parent abda254
commit 9e6f6c2
Show file tree

Hide file tree

Showing 2 changed files with 160 additions and 4 deletions.
diff --git a/fxtest/lifecycle.go b/fxtest/lifecycle.go
@@ -66,34 +66,91 @@ func (t *panicT) FailNow() {
 	panic("test lifecycle failed")
 }
 
+// LifecycleOption modifies the behavior of the [Lifecycle]
+// when passed to [NewLifecycle].
+type LifecycleOption interface {
+	apply(*Lifecycle)
+}
+
+// EnforceTimeout will cause the [Lifecycle]'s Start and Stop methods
+// to return an error as soon as context expires,
+// regardless of whether specific hooks respect the timeout.
+func EnforceTimeout(enforce bool) LifecycleOption {
+	return &enforceTimeout{
+		enforce: enforce,
+	}
+}
+
+type enforceTimeout struct {
+	enforce bool
+}
+
+func (e *enforceTimeout) apply(lc *Lifecycle) {
+	lc.enforceTimeout = e.enforce
+}
+
+var _ LifecycleOption = (*enforceTimeout)(nil)
+
 // Lifecycle is a testing spy for fx.Lifecycle. It exposes Start and Stop
 // methods (and some test-specific helpers) so that unit tests can exercise
 // hooks.
 type Lifecycle struct {
 	t  TB
 	lc *lifecycle.Lifecycle
+
+	enforceTimeout bool
 }
 
 var _ fx.Lifecycle = (*Lifecycle)(nil)
 
 // NewLifecycle creates a new test lifecycle.
-func NewLifecycle(t TB) *Lifecycle {
+func NewLifecycle(t TB, opts ...LifecycleOption) *Lifecycle {
 	var w io.Writer
 	if t != nil {
 		w = testutil.WriteSyncer{T: t}
 	} else {
 		w = os.Stderr
 		t = &panicT{W: os.Stderr}
 	}
-	return &Lifecycle{
+	lc := &Lifecycle{
 		lc: lifecycle.New(fxlog.DefaultLogger(w), fxclock.System),
 		t:  t,
 	}
+	for _, opt := range opts {
+		opt.apply(lc)
+	}
+	return lc
+}
+
+func (l *Lifecycle) withTimeout(ctx context.Context, fn func(context.Context) error) error {
+	if !l.enforceTimeout {
+		return fn(ctx)
+	}
+
+	// Cancel on timeout in case function only respects
+	// cancellation and not deadline exceeded.
+	ctx, cancel := context.WithCancel(ctx)
+	defer cancel()
+
+	c := make(chan error, 1) // buffered to avoid goroutine leak
+	go func() {
+		c <- fn(ctx)
+	}()
+
+	var err error
+	select {
+	case err = <-c:
+	case <-ctx.Done():
+		err = ctx.Err()
+	}
+	return err
 }
 
 // Start executes all registered OnStart hooks in order, halting at the first
 // hook that doesn't succeed.
-func (l *Lifecycle) Start(ctx context.Context) error { return l.lc.Start(ctx) }
+func (l *Lifecycle) Start(ctx context.Context) error {
+	return l.withTimeout(ctx, l.lc.Start)
+}
 
 // RequireStart calls Start with context.Background(), failing the test if an
 // error is encountered.
@@ -114,7 +171,9 @@ func (l *Lifecycle) RequireStart() *Lifecycle {
 // If any hook returns an error, execution continues for a best-effort
 // cleanup. Any errors encountered are collected into a single error and
 // returned.
-func (l *Lifecycle) Stop(ctx context.Context) error { return l.lc.Stop(ctx) }
+func (l *Lifecycle) Stop(ctx context.Context) error {
+	return l.withTimeout(ctx, l.lc.Stop)
+}
 
 // RequireStop calls Stop with context.Background(), failing the test if an error
 // is encountered.

diff --git a/fxtest/lifecycle_test.go b/fxtest/lifecycle_test.go
@@ -26,6 +26,7 @@ import (
 	"errors"
 	"fmt"
 	"testing"
+	"time"
 
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
@@ -117,6 +118,102 @@ func TestLifecycle(t *testing.T) {
 	})
 }
 
+func TestEnforceTimeout(t *testing.T) {
+	// These tests directly call Start and Stop
+	// rather than RequireStart and RequireStop
+	// because EnforceTimeout does not apply to those.
+
+	t.Run("StartHookTimeout", func(t *testing.T) {
+		t.Parallel()
+
+		wait := make(chan struct{})
+		defer close(wait) // force timeout by blocking OnStart until end of test
+
+		spy := newTB()
+		lc := NewLifecycle(spy, EnforceTimeout(true))
+		lc.Append(fx.Hook{
+			OnStart: func(context.Context) error {
+				<-wait
+				return nil
+			},
+		})
+
+		ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond)
+		defer cancel()
+		assert.ErrorIs(t, lc.Start(ctx), context.DeadlineExceeded)
+		assert.Zero(t, spy.failures)
+	})
+
+	t.Run("StopHookTimeout", func(t *testing.T) {
+		t.Parallel()
+
+		wait := make(chan struct{})
+		defer close(wait) // force timeout by blocking OnStop until end of test
+
+		spy := newTB()
+		lc := NewLifecycle(spy, EnforceTimeout(true))
+		lc.Append(fx.Hook{
+			OnStop: func(context.Context) error {
+				<-wait
+				return nil
+			},
+		})
+
+		require.NoError(t, lc.Start(context.Background()))
+		ctx, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond)
+		defer cancel()
+		assert.ErrorIs(t, lc.Stop(ctx), context.DeadlineExceeded)
+		assert.Zero(t, spy.failures)
+	})
+
+	t.Run("NoTimeout", func(t *testing.T) {
+		t.Parallel()
+
+		var (
+			started bool
+			stopped bool
+		)
+
+		spy := newTB()
+		lc := NewLifecycle(spy, EnforceTimeout(true))
+		lc.Append(fx.Hook{
+			OnStart: func(context.Context) error {
+				started = true
+				return nil
+			},
+			OnStop: func(context.Context) error {
+				stopped = true
+				return nil
+			},
+		})
+
+		ctx, cancel := context.WithTimeout(context.Background(), time.Hour)
+		defer cancel()
+		require.NoError(t, lc.Start(ctx))
+		require.NoError(t, lc.Stop(ctx))
+		assert.True(t, started)
+		assert.True(t, stopped)
+		assert.Zero(t, spy.failures)
+	})
+
+	t.Run("OtherError", func(t *testing.T) {
+		t.Parallel()
+
+		spy := newTB()
+		lc := NewLifecycle(spy, EnforceTimeout(true))
+		lc.Append(fx.Hook{
+			OnStart: func(context.Context) error {
+				return errors.New("NOT a context-related error")
+			},
+		})
+
+		ctx, cancel := context.WithTimeout(context.Background(), time.Hour)
+		defer cancel()
+		assert.ErrorContains(t, lc.Start(ctx), "NOT a context-related error")
+		assert.Zero(t, spy.failures)
+	})
+}
+
 func TestLifecycle_OptionalT(t *testing.T) {
 	t.Parallel()