Skip to content

Commit

Permalink
refactor(disruption): use fixed maxUnavailable (#639)
Browse files Browse the repository at this point in the history
Some schedulers might decide on a high readyTarget, which means a low amount of
occupied rooms in regards to total rooms, this causes the PDB to have a
relative low number in compared to the total. Since PDB does not filter by room
status, we still are subject to a high number of rooms being evicted and system
highly impacted. To mitigate this, Maestro will now set a fixed amount for
maxUnavailable with default value to 5%, if not set on scheduler. It can be
configured via:

  MAESTRO_SERVICES_SCHEDULERMANAGER_DEFAULTPDBMAXUNAVAILABLE="10%"

The variable accepts integets greater than 0 and percentage strings.
  • Loading branch information
hspedro authored Oct 7, 2024
1 parent 3af43b2 commit 4fe0422
Show file tree
Hide file tree
Showing 41 changed files with 656 additions and 649 deletions.
1 change: 1 addition & 0 deletions cmd/managementapi/wire.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ func initializeManagementMux(ctx context.Context, conf config.Config) (*runtime.
providers.ProvideDefinitionConstructors,

// services
service.NewSchedulerManagerConfig,
service.NewSchedulerManager,
service.NewOperationManager,

Expand Down
6 changes: 5 additions & 1 deletion cmd/managementapi/wire_gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 2 additions & 11 deletions cmd/runtimewatcher/wire.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ import (
"github.com/topfreegames/maestro/internal/core/services/events"
"github.com/topfreegames/maestro/internal/core/services/workers"
"github.com/topfreegames/maestro/internal/core/worker"
workerconfigs "github.com/topfreegames/maestro/internal/core/worker/config"
"github.com/topfreegames/maestro/internal/core/worker/runtimewatcher"
"github.com/topfreegames/maestro/internal/service"
)
Expand All @@ -43,24 +42,16 @@ func provideRuntimeWatcherBuilder() *worker.WorkerBuilder {
}
}

func provideRuntimeWatcherConfig(c config.Config) *workerconfigs.RuntimeWatcherConfig {
return &workerconfigs.RuntimeWatcherConfig{
DisruptionWorkerIntervalSeconds: c.GetDuration("runtimeWatcher.disruptionWorker.intervalSeconds"),
DisruptionSafetyPercentage: c.GetFloat64("runtimeWatcher.disruptionWorker.safetyPercentage"),
}
}

var WorkerOptionsSet = wire.NewSet(
service.NewRuntimeKubernetes,
service.NewRoomStorageRedis,
RoomManagerSet,
provideRuntimeWatcherConfig,
wire.Struct(new(worker.WorkerOptions), "Runtime", "RoomStorage", "RoomManager", "RuntimeWatcherConfig"))
wire.Struct(new(worker.WorkerOptions), "RoomManager", "Runtime"))

var RoomManagerSet = wire.NewSet(
service.NewSchedulerStoragePg,
service.NewClockTime,
service.NewPortAllocatorRandom,
service.NewRoomStorageRedis,
service.NewGameRoomInstanceStorageRedis,
service.NewSchedulerCacheRedis,
service.NewRoomManagerConfig,
Expand Down
28 changes: 8 additions & 20 deletions cmd/runtimewatcher/wire_gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions cmd/worker/wire.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ func initializeWorker(c config.Config, builder *worker.WorkerBuilder) (*workerss
worker.ProvideWorkerOptions,
workersservice.NewWorkersManager,
service.NewSchedulerManager,
service.NewSchedulerManagerConfig,
)

return &workersservice.WorkersManager{}, nil
Expand Down
6 changes: 5 additions & 1 deletion cmd/worker/wire_gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ operations:
limit: 1000

services:
schedulerManager:
defaultPdbMaxUnavailable: "5%"
roomManager:
roomPingTimeoutMillis: 240000
roomInitializationTimeoutMillis: 120000
Expand Down
46 changes: 44 additions & 2 deletions docs/reference/Scheduler.md
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,8 @@ autoscaling:
"metadata": {}
}
}
]
],
"pdbMaxUnavailable": "5%",
"autoscaling": {
"enabled": true,
"min": 10,
Expand Down Expand Up @@ -234,6 +235,7 @@ forwarders: Forwarders
autoscaling: Autoscaling
spec: Spec
annotation: Map
pdbMaxUnavailable: String
```
- **Name**: Scheduler name. This name is unique and will be the same name used for the kubernetes namespace. It's
Expand All @@ -254,6 +256,7 @@ annotation: Map
used by them, limits and images. More info [here](#spec).
- **annotations**: Allows annotations for the scheduler's game room. Know more about annotations on
Kubernetes [here](https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations)
- **pdbMaxUnavailable**: Defines the disruption budget for game rooms. Optional and defaults to 5%. Value can be defined as a string representing the % between 0 and 100, "15%", or a raw number of rooms "100".
### PortRange
The **PortRange** is used to select a random port for a GRU between **start** and **end**.
Expand Down Expand Up @@ -403,4 +406,43 @@ It is represented as:
- **name**: Name of the port. Facilitates on recognition;
- **protocol**: Port protocol. Can be UDP, TCP or SCTP.;
- **port**: The port exposed.
- **hostPortRange**: The [port range](#portrange) for the port to be allocated in the host. Mutually exclusive with the port range configured in the root structure.
- **hostPortRange**: The [port range](#portrange) for the port to be allocated in the host. Mutually exclusive with the port range configured in the root structure.

#### PDB Max Unavailable

A string value that defines the disruption budget of Game Rooms from a specific scheduler.
Maestro will create a [PDB Resource](https://kubernetes.io/docs/tasks/run-application/configure-pdb/)
to prevent evictions drastically impacting availability of the Game Rooms.

By default this value is set to 5%, so at worst runtime can evit 5% of the pods. There is no way to control
what pods will be evicted - if it prefers ready, pending, etc.

The configuration can be specified with this order of precedence:

1. Value specified in Scheduler's definition

```json
{
"pdbMaxUnavailable": "10%"
}
```

2. Value specified in the ENV VAR:

```shell
MAESTRO_SERVICES_SCHEDULERMANAGER_DEFAULTPDBMAXUNAVAILABLE="10%"
```

3. Value specified in the [config.yaml](../../config/config.yaml):

```yaml
services:
schedulerManager:
defaultPdbMaxUnavailable: "5%"
```

4. Value specified in [code](../../internal/core/entities/pdb/pdb.go) that defaults to 5%:

```go
const DefaultPdbMaxUnavailablePercentage = "5%"
```
64 changes: 8 additions & 56 deletions internal/adapters/runtime/kubernetes/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,18 @@ import (
"strconv"

"github.com/topfreegames/maestro/internal/core/entities"
pdbEntity "github.com/topfreegames/maestro/internal/core/entities/pdb"
"github.com/topfreegames/maestro/internal/core/ports/errors"
"go.uber.org/zap"
v1 "k8s.io/api/core/v1"
v1Policy "k8s.io/api/policy/v1"
kerrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
)

const (
DefaultDisruptionSafetyPercentage float64 = 0.05
MajorKubeVersionPDB int = 1
MinorKubeVersionPDB int = 21
MajorKubeVersionPDB int = 1
MinorKubeVersionPDB int = 21
)

func (k *kubernetes) isPDBSupported() bool {
Expand Down Expand Up @@ -96,10 +95,7 @@ func (k *kubernetes) createPDBFromScheduler(ctx context.Context, scheduler *enti
},
},
Spec: v1Policy.PodDisruptionBudgetSpec{
MinAvailable: &intstr.IntOrString{
Type: intstr.Int,
IntVal: int32(0),
},
MaxUnavailable: pdbEntity.ConvertStrToSpec(scheduler.PdbMaxUnavailable),
Selector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"maestro-scheduler": scheduler.Name,
Expand Down Expand Up @@ -173,29 +169,8 @@ func (k *kubernetes) DeleteScheduler(ctx context.Context, scheduler *entities.Sc
return nil
}

func (k *kubernetes) MitigateDisruption(
ctx context.Context,
scheduler *entities.Scheduler,
roomAmount int,
safetyPercentage float64,
) error {
if scheduler == nil {
return errors.NewErrInvalidArgument("empty pointer received for scheduler, can not mitigate disruptions")
}

incSafetyPercentage := 1.0
if safetyPercentage < DefaultDisruptionSafetyPercentage {
k.logger.Warn(
"invalid safety percentage, using default percentage",
zap.Float64("safetyPercentage", safetyPercentage),
zap.Float64("DefaultDisruptionSafetyPercentage", DefaultDisruptionSafetyPercentage),
)
safetyPercentage = DefaultDisruptionSafetyPercentage
}
incSafetyPercentage += safetyPercentage

// For kubernetes mitigating disruptions means updating the current PDB
// minAvailable to the number of occupied rooms if above a threshold
func (k *kubernetes) UpdateScheduler(ctx context.Context, scheduler *entities.Scheduler) error {
// Check if PDB exists, if not, create it
pdb, err := k.clientSet.PolicyV1().PodDisruptionBudgets(scheduler.Name).Get(ctx, scheduler.Name, metav1.GetOptions{})
if err != nil && !kerrors.IsNotFound(err) {
// Non-recoverable errors
Expand All @@ -209,31 +184,8 @@ func (k *kubernetes) MitigateDisruption(
}
}

var currentPdbMinAvailable int32
// PDB might exist and is based on MaxUnavailable
if pdb.Spec.MinAvailable != nil {
currentPdbMinAvailable = pdb.Spec.MinAvailable.IntVal
}

if currentPdbMinAvailable == int32(float64(roomAmount)*incSafetyPercentage) {
return nil
}

// In theory, the PDB object can be changed in the runtime in the meantime after
// fetching initial state/ask for creation (beginning of the function) and before
// updating the value. This should never happen in production because there is only
// one agent setting this PDB in the namespace and it's the worker. However, on tests
// we were seeing intermittent failures running parallel cases, hence why adding this
// code it is safer to update the PDB object
pdb, err = k.clientSet.PolicyV1().PodDisruptionBudgets(scheduler.Name).Get(ctx, scheduler.Name, metav1.GetOptions{})
if err != nil || pdb == nil {
return errors.NewErrUnexpected("non recoverable error when getting PDB for scheduler '%s': %s", scheduler.Name, err)
}
pdb.Spec = v1Policy.PodDisruptionBudgetSpec{
MinAvailable: &intstr.IntOrString{
Type: intstr.Int,
IntVal: int32(float64(roomAmount) * incSafetyPercentage),
},
MaxUnavailable: pdbEntity.ConvertStrToSpec(scheduler.PdbMaxUnavailable),
Selector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"maestro-scheduler": scheduler.Name,
Expand All @@ -243,7 +195,7 @@ func (k *kubernetes) MitigateDisruption(

_, err = k.clientSet.PolicyV1().PodDisruptionBudgets(scheduler.Name).Update(ctx, pdb, metav1.UpdateOptions{})
if err != nil {
return errors.NewErrUnexpected("error updating PDB to mitigate disruptions for scheduler '%s': %s", scheduler.Name, err)
return errors.NewErrUnexpected("error updating PDB for scheduler '%s': %s", scheduler.Name, err)
}

return nil
Expand Down
Loading

0 comments on commit 4fe0422

Please sign in to comment.