Skip to content

Commit

Permalink
allow jump among steps
Browse files Browse the repository at this point in the history
Signed-off-by: yunbo <[email protected]>
  • Loading branch information
Funinu committed May 28, 2024
1 parent 36d5d43 commit f45aa60
Show file tree
Hide file tree
Showing 10 changed files with 196 additions and 67 deletions.
4 changes: 2 additions & 2 deletions lua_configuration/convert_test_case_to_lua_object.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ func objectToTable(path string) error {
rollout := testCase.Rollout
trafficRouting := testCase.TrafficRouting
if rollout != nil {
steps := rollout.Spec.Strategy.Canary.Steps
steps := rollout.Spec.Strategy.GetSteps()
for i, step := range steps {
var weight *int32
if step.TrafficRoutingStrategy.Traffic != nil {
Expand All @@ -92,7 +92,7 @@ func objectToTable(path string) error {
weight = utilpointer.Int32(-1)
}
var canaryService string
stableService := rollout.Spec.Strategy.Canary.TrafficRoutings[0].Service
stableService := rollout.Spec.Strategy.GetTrafficRouting()[0].Service
canaryService = fmt.Sprintf("%s-canary", stableService)
data := &custom.LuaData{
Data: custom.Data{
Expand Down
8 changes: 4 additions & 4 deletions pkg/controller/batchrelease/context/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,20 +73,20 @@ func (bc *BatchContext) Log() string {
// IsBatchReady return nil if the batch is ready
func (bc *BatchContext) IsBatchReady() error {
if bc.UpdatedReplicas < bc.DesiredUpdatedReplicas {
return fmt.Errorf("current batch not ready: updated replicas not satified")
return fmt.Errorf("current batch not ready: updated replicas not satisfied, UpdatedReplicas %d < DesiredUpdatedReplicas %d", bc.UpdatedReplicas, bc.DesiredUpdatedReplicas)
}

unavailableToleration := allowedUnavailable(bc.FailureThreshold, bc.UpdatedReplicas)
if unavailableToleration+bc.UpdatedReadyReplicas < bc.DesiredUpdatedReplicas {
return fmt.Errorf("current batch not ready: updated ready replicas not satified")
return fmt.Errorf("current batch not ready: updated ready replicas not satisfied, allowedUnavailable + UpdatedReadyReplicas %d < DesiredUpdatedReplicas %d", unavailableToleration+bc.UpdatedReadyReplicas, bc.DesiredUpdatedReplicas)
}

if bc.DesiredUpdatedReplicas > 0 && bc.UpdatedReadyReplicas == 0 {
return fmt.Errorf("current batch not ready: no updated ready replicas")
return fmt.Errorf("current batch not ready: no updated ready replicas, DesiredUpdatedReplicas %d > 0 and UpdatedReadyReplicas %d = 0", bc.DesiredUpdatedReplicas, bc.UpdatedReadyReplicas)
}

if !batchLabelSatisfied(bc.Pods, bc.RolloutID, bc.PlannedUpdatedReplicas) {
return fmt.Errorf("current batch not ready: pods with batch label not satified")
return fmt.Errorf("current batch not ready: pods with batch label not satisfied, RolloutID %s, PlannedUpdatedReplicas %d", bc.RolloutID, bc.PlannedUpdatedReplicas)
}
return nil
}
Expand Down
59 changes: 52 additions & 7 deletions pkg/controller/rollout/rollout_canary.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,13 @@ func (m *canaryReleaseManager) runCanary(c *RolloutContext) error {
}
}
switch canaryStatus.CurrentStepState {
// before CanaryStepStateUpgrade, handle some special cases, to prevent traffic loss
case v1beta1.CanaryStepStateInit:

Check warning on line 91 in pkg/controller/rollout/rollout_canary.go

View check run for this annotation

Codecov / codecov/patch

pkg/controller/rollout/rollout_canary.go#L91

Added line #L91 was not covered by tests
// placeholder for the later traffic modification Pull Request
canaryStatus.NextStepIndex = util.NextBatchIndex(c.Rollout, canaryStatus.CurrentStepIndex)
canaryStatus.CurrentStepState = v1beta1.CanaryStepStateUpgrade
fallthrough

Check warning on line 95 in pkg/controller/rollout/rollout_canary.go

View check run for this annotation

Codecov / codecov/patch

pkg/controller/rollout/rollout_canary.go#L93-L95

Added lines #L93 - L95 were not covered by tests

case v1beta1.CanaryStepStateUpgrade:
klog.Infof("rollout(%s/%s) run canary strategy, and state(%s)", c.Rollout.Namespace, c.Rollout.Name, v1beta1.CanaryStepStateUpgrade)
done, err := m.doCanaryUpgrade(c)
Expand Down Expand Up @@ -144,7 +151,8 @@ func (m *canaryReleaseManager) runCanary(c *RolloutContext) error {
if len(c.Rollout.Spec.Strategy.Canary.Steps) > int(canaryStatus.CurrentStepIndex) {
canaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
canaryStatus.CurrentStepIndex++
canaryStatus.CurrentStepState = v1beta1.CanaryStepStateUpgrade
canaryStatus.NextStepIndex = util.NextBatchIndex(c.Rollout, canaryStatus.CurrentStepIndex)
canaryStatus.CurrentStepState = v1beta1.CanaryStepStateInit

Check warning on line 155 in pkg/controller/rollout/rollout_canary.go

View check run for this annotation

Codecov / codecov/patch

pkg/controller/rollout/rollout_canary.go#L154-L155

Added lines #L154 - L155 were not covered by tests
klog.Infof("rollout(%s/%s) canary step from(%d) -> to(%d)", c.Rollout.Namespace, c.Rollout.Name, canaryStatus.CurrentStepIndex-1, canaryStatus.CurrentStepIndex)
} else {
klog.Infof("rollout(%s/%s) canary run all steps, and completed", c.Rollout.Namespace, c.Rollout.Name)
Expand Down Expand Up @@ -201,15 +209,12 @@ func (m *canaryReleaseManager) doCanaryMetricsAnalysis(c *RolloutContext) (bool,
}

func (m *canaryReleaseManager) doCanaryPaused(c *RolloutContext) (bool, error) {
if m.doCanaryJump(c) {
return false, nil

Check warning on line 213 in pkg/controller/rollout/rollout_canary.go

View check run for this annotation

Codecov / codecov/patch

pkg/controller/rollout/rollout_canary.go#L213

Added line #L213 was not covered by tests
}
canaryStatus := c.NewStatus.CanaryStatus
currentStep := c.Rollout.Spec.Strategy.Canary.Steps[canaryStatus.CurrentStepIndex-1]
steps := len(c.Rollout.Spec.Strategy.Canary.Steps)
// If it is the last step, and 100% of pods, then return true
if int32(steps) == canaryStatus.CurrentStepIndex {
if currentStep.Replicas != nil && currentStep.Replicas.StrVal == "100%" {
return true, nil
}
}
cond := util.GetRolloutCondition(*c.NewStatus, v1beta1.RolloutConditionProgressing)
// need manual confirmation
if currentStep.Pause.Duration == nil {
Expand All @@ -232,6 +237,46 @@ func (m *canaryReleaseManager) doCanaryPaused(c *RolloutContext) (bool, error) {
return false, nil
}

func (m *canaryReleaseManager) doCanaryJump(c *RolloutContext) (jumped bool) {
canaryStatus := c.NewStatus.CanaryStatus
currentStep := c.Rollout.Spec.Strategy.Canary.Steps[canaryStatus.CurrentStepIndex-1]
nextIndex := canaryStatus.NextStepIndex
if nextIndex != util.NextBatchIndex(c.Rollout, canaryStatus.CurrentStepIndex) && nextIndex != 0 || nextIndex < 0 {
/*
* The NextStepIndex should typically be assigned a non-negative integer.
* However, assigning it a negative value acts as a sentinel to trigger an APPROVE.
* For instance, if currentStepIndex is 2 and nextStepIndex is 3,
* directly jumping to step 3 by patching nextStepIndex should not be possible since the value remains unchanged.
* By permitting the assignment of a negative value, such as -1, we can jump to step 3, which essentially
* means to approve current batch.
* Might be useful someday in the future.
*/
if nextIndex < 0 {
nextIndex = util.NextBatchIndex(c.Rollout, canaryStatus.CurrentStepIndex)

Check warning on line 255 in pkg/controller/rollout/rollout_canary.go

View check run for this annotation

Codecov / codecov/patch

pkg/controller/rollout/rollout_canary.go#L254-L255

Added lines #L254 - L255 were not covered by tests
}
currentIndexBackup := canaryStatus.CurrentStepIndex
canaryStatus.CurrentStepIndex = nextIndex
canaryStatus.NextStepIndex = util.NextBatchIndex(c.Rollout, nextIndex)
nextStep := c.Rollout.Spec.Strategy.Canary.Steps[nextIndex-1]

Check warning on line 260 in pkg/controller/rollout/rollout_canary.go

View check run for this annotation

Codecov / codecov/patch

pkg/controller/rollout/rollout_canary.go#L257-L260

Added lines #L257 - L260 were not covered by tests
// if the Replicas between currentStep and nextStep is same, we can jump to
// the TrafficRouting step; otherwise, we should start from the Init step
if reflect.DeepEqual(nextStep.Replicas, currentStep.Replicas) {
canaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
canaryStatus.CurrentStepState = v1beta1.CanaryStepStateTrafficRouting
klog.Infof("rollout(%s/%s) step(%d) state from(%s) -> to(%s)", c.Rollout.Namespace, c.Rollout.Name,
canaryStatus.CurrentStepIndex, v1beta1.CanaryStepStatePaused, canaryStatus.CurrentStepState)
} else {
canaryStatus.LastUpdateTime = &metav1.Time{Time: time.Now()}
canaryStatus.CurrentStepState = v1beta1.CanaryStepStateInit
klog.Infof("rollout(%s/%s) step(%d) state from(%s) -> to(%s)", c.Rollout.Namespace, c.Rollout.Name,
canaryStatus.CurrentStepIndex, v1beta1.CanaryStepStatePaused, v1beta1.CanaryStepStateInit)

Check warning on line 272 in pkg/controller/rollout/rollout_canary.go

View check run for this annotation

Codecov / codecov/patch

pkg/controller/rollout/rollout_canary.go#L263-L272

Added lines #L263 - L272 were not covered by tests
}
klog.Infof("rollout(%s/%s) canary step from(%d) -> to(%d)", c.Rollout.Namespace, c.Rollout.Name, currentIndexBackup, canaryStatus.CurrentStepIndex)
return true

Check warning on line 275 in pkg/controller/rollout/rollout_canary.go

View check run for this annotation

Codecov / codecov/patch

pkg/controller/rollout/rollout_canary.go#L274-L275

Added lines #L274 - L275 were not covered by tests
}
return false
}

// cleanup after rollout is completed or finished
func (m *canaryReleaseManager) doCanaryFinalising(c *RolloutContext) (bool, error) {
// when CanaryStatus is nil, which means canary action hasn't started yet, don't need doing cleanup
Expand Down
Loading

0 comments on commit f45aa60

Please sign in to comment.