Skip to content

Commit

Permalink
Improve operations logging and fix tests (#620)
Browse files Browse the repository at this point in the history
* tests(metrics_reporter): fix parallel tests using diff schedulers
* test(runtime): fix sleep before getting pdb
* chore(docker): project name in docker-compose file
* chore(e2e): update mod dependencies
* refactor(logs): debug lease op logs
* refactor(logs): add sched version and autoscale info
* chore: prune old name for healthcontroller interval config
  • Loading branch information
hspedro authored May 27, 2024
1 parent bb118c8 commit 6a349f1
Show file tree
Hide file tree
Showing 11 changed files with 37 additions and 28 deletions.
14 changes: 7 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -120,25 +120,25 @@ migrate: ## Execute migration.
.PHONY: deps/up
deps/up: ## Create containers dependencies.
@echo "Creating dependencies "
@docker-compose --project-name maestro up -d
@docker-compose up -d --build
@echo "Dependencies created successfully."

.PHONY: deps/start
deps/start: ## Start containers dependencies.
@echo "Starting dependencies "
@docker-compose --project-name maestro start
@docker-compose start
@echo "Dependencies started successfully."

.PHONY: deps/stop
deps/stop: ## Stop containers dependencies.
@echo "Stopping dependencies "
@docker-compose --project-name maestro stop
@docker-compose stop
@echo "Dependencies stopped successfully."

.PHONY: deps/down
deps/down: ## Delete containers dependencies.
@echo "Deleting dependencies "
@docker-compose --project-name maestro down
@docker-compose down
@echo "Dependencies deleted successfully."


Expand All @@ -148,13 +148,13 @@ deps/down: ## Delete containers dependencies.
.PHONY: maestro/start
maestro/start: build-linux-x86_64 ## Start Maestro with all of its dependencies.
@echo "Starting maestro..."
@cd ./e2e/framework/maestro; docker-compose --project-name maestro up --build -d
@cd ./e2e/framework/maestro; docker-compose up --build -d
@MAESTRO_MIGRATION_PATH="file://internal/service/migrations" go run main.go migrate;
@cd ./e2e/framework/maestro; docker-compose --project-name maestro up --build -d worker runtime-watcher #Worker and watcher do not work before migration, so we start them after it.
@cd ./e2e/framework/maestro; docker-compose up --build -d worker runtime-watcher #Worker and watcher do not work before migration, so we start them after it.
@echo "Maestro is up and running!"

.PHONY: maestro/down
maestro/down: ## Delete Maestro and all of its dependencies.
@echo "Deleting maestro..."
@cd ./e2e/framework/maestro; docker-compose --project-name maestro down
@cd ./e2e/framework/maestro; docker-compose down
@echo "Maestro was deleted with success!"
2 changes: 1 addition & 1 deletion docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# to run define K3S_TOKEN, K3S_VERSION is optional, eg:
# K3S_TOKEN=${RANDOM}${RANDOM}${RANDOM} docker-compose up

version: '3'
name: maestro
services:
k3s_server:
image: "rancher/k3s:v1.19.11-k3s1"
Expand Down
2 changes: 1 addition & 1 deletion docs/tutorials/Autoscaling.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ If autoscaling is configured **and** enabled, it will use the configured autosca
decide_operation -- desired < actual --> remove_rooms_operation --> finish;
```

Currently, the sync interval is configured by environment variable `MAESTRO_WORKERS_OPERATIONEXECUTION_HEALTHCONTROLLERINTERVAL`.
Currently, the sync interval is configured by environment variable `MAESTRO_WORKERS_HEALTHCONTROLLERINTERVAL`.

> By default, the scheduler does not have autoscaling configured.
Expand Down
4 changes: 1 addition & 3 deletions e2e/framework/maestro/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
version: '3.8'
name: maestro
services:

management-api:
build:
context: ../../..
Expand Down Expand Up @@ -76,7 +76,6 @@ services:
- MAESTRO_ADAPTERS_RUNTIME_KUBERNETES_KUBECONFIG=/kubeconfig/kubeconfig.yaml
- MAESTRO_ADAPTERS_RUNTIME_KUBERNETES_MASTERURL=https://k3s_server:6443
- MAESTRO_ADAPTERS_SCHEDULERCACHE_REDIS_URL=redis://redis:6379/0
- MAESTRO_WORKERS_OPERATIONEXECUTION_HEALTHCONTROLLERINTERVAL=5s
- MAESTRO_INTERNALAPI_PORT=8051
- MAESTRO_WORKERS_HEALTHCONTROLLERINTERVAL=5s
- MAESTRO_WORKERS_STORAGECLENUPINTERVAL=5s
Expand Down Expand Up @@ -109,7 +108,6 @@ services:
volumes:
# This is just so that we get the kubeconfig file out
- ../../../kubeconfig:/output

ports:
- "6443:6443" # Kubernetes API Server

Expand Down
14 changes: 7 additions & 7 deletions e2e/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ require (
github.com/testcontainers/testcontainers-go v0.11.1
github.com/topfreegames/maestro v0.0.0-00010101000000-000000000000
go.uber.org/zap v1.18.1
google.golang.org/protobuf v1.30.0
google.golang.org/protobuf v1.31.0
k8s.io/api v0.23.0
k8s.io/apimachinery v0.23.0
k8s.io/apiserver v0.22.1
Expand Down Expand Up @@ -70,13 +70,13 @@ require (
go.opencensus.io v0.24.0 // indirect
go.uber.org/atomic v1.7.0 // indirect
go.uber.org/multierr v1.7.0 // indirect
golang.org/x/crypto v0.9.0 // indirect
golang.org/x/net v0.10.0 // indirect
golang.org/x/crypto v0.18.0 // indirect
golang.org/x/net v0.20.0 // indirect
golang.org/x/oauth2 v0.6.0 // indirect
golang.org/x/sync v0.1.0 // indirect
golang.org/x/sys v0.8.0 // indirect
golang.org/x/term v0.8.0 // indirect
golang.org/x/text v0.9.0 // indirect
golang.org/x/sync v0.6.0 // indirect
golang.org/x/sys v0.16.0 // indirect
golang.org/x/term v0.16.0 // indirect
golang.org/x/text v0.14.0 // indirect
golang.org/x/time v0.0.0-20210723032227-1f47c861a9ac // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/genproto v0.0.0-20230306155012-7f2fa6fef1f4 // indirect
Expand Down
7 changes: 7 additions & 0 deletions e2e/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -828,6 +828,7 @@ golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5y
golang.org/x/crypto v0.0.0-20210817164053-32db794688a5/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.9.0 h1:LF6fAI+IutBocDJ2OT0Q1g8plpYljMZ4+lty+dsqw3g=
golang.org/x/crypto v0.9.0/go.mod h1:yrmDGqONDYtNj3tH8X9dzUun2m2lzPa9ngI6/RUPGR0=
golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
Expand Down Expand Up @@ -918,6 +919,7 @@ golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qx
golang.org/x/net v0.0.0-20210825183410-e898025ed96a/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M=
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
Expand Down Expand Up @@ -945,6 +947,7 @@ golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
Expand Down Expand Up @@ -1034,12 +1037,14 @@ golang.org/x/sys v0.0.0-20210831042530-f4d43177bf5e/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20211107104306-e0b2ad06fe42/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU=
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.8.0 h1:n5xxQn2i3PC0yLAbjTpNT85q/Kgzcr2gIoX9OrJUols=
golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY=
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
Expand All @@ -1051,6 +1056,7 @@ golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE=
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
Expand Down Expand Up @@ -1244,6 +1250,7 @@ google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQ
google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng=
google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
gopkg.in/airbrake/gobrake.v2 v2.0.9/go.mod h1:/h5ZAUhDkGaJfjzjKLSjv6zCL6O0LLBxU4K+aSYdM/U=
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
Expand Down
2 changes: 1 addition & 1 deletion internal/adapters/runtime/kubernetes/scheduler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -310,13 +310,13 @@ func TestMitigateDisruption(t *testing.T) {
}
}()

time.Sleep(time.Millisecond * 100)
pdb, err := client.PolicyV1().PodDisruptionBudgets(scheduler.Name).Get(ctx, scheduler.Name, metav1.GetOptions{})
require.NoError(t, err)
require.NotNil(t, pdb)
require.Equal(t, pdb.Name, scheduler.Name)
require.Equal(t, pdb.Spec.MinAvailable.IntVal, int32(0))

time.Sleep(time.Millisecond * 100)
newValue := 100
err = kubernetesRuntime.MitigateDisruption(ctx, scheduler, newValue, 0.0)
require.NoError(t, err)
Expand Down
1 change: 1 addition & 0 deletions internal/core/logs/logs.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ package logs

const (
LogFieldSchedulerName = "scheduler_name"
LogFieldSchedulerVersion = "scheduler_version"
LogFieldGame = "game"
LogFieldInstanceID = "instance_id"
LogFieldRoomID = "room_id"
Expand Down
3 changes: 3 additions & 0 deletions internal/core/operations/healthcontroller/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ func (ex *Executor) Execute(ctx context.Context, op *operation.Operation, defini
return err
}

logger = logger.With(zap.String(logs.LogFieldSchedulerVersion, scheduler.Spec.Version))

nonexistentGameRoomsIDs, existentGameRoomsInstancesMap := ex.mapExistentAndNonExistentGameRooms(gameRoomIDs, instances)
if len(nonexistentGameRoomsIDs) > 0 {
logger.Error("found registered rooms that no longer exists")
Expand Down Expand Up @@ -172,6 +174,7 @@ func (ex *Executor) ensureDesiredAmountOfInstances(ctx context.Context, op *oper
var msgToAppend string
var tookAction bool

logger = logger.With(zap.Int("actual", actualAmount), zap.Int("desired", desiredAmount))
switch {
case actualAmount > desiredAmount: // Need to scale down
can, msg := ex.canPerformDownscale(ctx, scheduler, logger)
Expand Down
14 changes: 7 additions & 7 deletions internal/core/services/operations/operation_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -272,32 +272,32 @@ func (om *OperationManager) WatchOperationCancellationRequests(ctx context.Conte

func (om *OperationManager) GrantLease(ctx context.Context, operation *operation.Operation) error {
managerLogger := om.Logger.With(zap.String(logs.LogFieldOperationID, operation.ID), zap.String(logs.LogFieldSchedulerName, operation.SchedulerName))
managerLogger.Info("granting lease - start")
managerLogger.Debug("granting lease - start")
err := om.LeaseStorage.GrantLease(ctx, operation.SchedulerName, operation.ID, om.Config.OperationLeaseTtl)
if err != nil {
return fmt.Errorf("failed to grant lease to operation: %w", err)
}

managerLogger.Info("granting lease - succeed")
managerLogger.Debug("granting lease - succeed")
return nil
}

func (om *OperationManager) RevokeLease(ctx context.Context, operation *operation.Operation) error {
managerLogger := om.Logger.With(zap.String(logs.LogFieldOperationID, operation.ID), zap.String(logs.LogFieldSchedulerName, operation.SchedulerName))
managerLogger.Info("revoking lease - start")
managerLogger.Debug("revoking lease - start")
err := om.LeaseStorage.RevokeLease(ctx, operation.SchedulerName, operation.ID)
if err != nil {
return fmt.Errorf("failed to revoke lease to operation: %w", err)
}

managerLogger.Info("revoking lease - succeed")
managerLogger.Debug("revoking lease - succeed")
return nil
}

func (om *OperationManager) StartLeaseRenewGoRoutine(operationCtx context.Context, op *operation.Operation) {
go func() {
managerLogger := om.Logger.With(zap.String(logs.LogFieldOperationID, op.ID), zap.String(logs.LogFieldSchedulerName, op.SchedulerName))
managerLogger.Info("starting operation lease renew go routine")
managerLogger.Debug("starting operation lease renew go routine")

ticker := time.NewTicker(om.Config.OperationLeaseTtl)
defer ticker.Stop()
Expand All @@ -308,7 +308,7 @@ func (om *OperationManager) StartLeaseRenewGoRoutine(operationCtx context.Contex
case <-ticker.C:
if op.Status == operation.StatusFinished || op.Status == operation.StatusError {
status, _ := op.Status.String()
managerLogger.Sugar().Infof("finish operation lease renew go routine since operation got status %v", status)
managerLogger.Sugar().Debugf("finish operation lease renew go routine since operation got status %v", status)
break renewLeaseLoop
}

Expand All @@ -318,7 +318,7 @@ func (om *OperationManager) StartLeaseRenewGoRoutine(operationCtx context.Contex
}
case <-operationCtx.Done():
if goerrors.Is(operationCtx.Err(), context.Canceled) {
managerLogger.Info("finish operation lease renew go routine since operation is canceled")
managerLogger.Debug("finish operation lease renew go routine since operation is canceled")
} else {
managerLogger.With(zap.Error(operationCtx.Err())).Error("loop to renew operation lease received an error context event")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ func TestMetricsReporterWorker_StartDoNotProduceMetrics(t *testing.T) {
instanceStorage := mock.NewMockGameRoomInstanceStorage(mockCtl)
ctx, cancelFunc := context.WithCancel(context.Background())

scheduler := &entities.Scheduler{Name: "random-scheduler"}
scheduler := &entities.Scheduler{Name: "random-scheduler-2"}

workerOpts := &worker.WorkerOptions{
RoomStorage: roomStorage,
Expand Down

0 comments on commit 6a349f1

Please sign in to comment.