Skip to content

Commit

Permalink
feat: retries and delays before checks
Browse files Browse the repository at this point in the history
  • Loading branch information
adityathebe committed Apr 5, 2024
1 parent e077b6c commit f86306f
Show file tree
Hide file tree
Showing 10 changed files with 255 additions and 9 deletions.
71 changes: 71 additions & 0 deletions api/v1/checks.go
Original file line number Diff line number Diff line change
Expand Up @@ -788,6 +788,72 @@ type KubernetesResourceChecks struct {
CanarySpec `yaml:",inline" json:",inline"`
}

type KubernetesResourceCheckRetries struct {
// Delay is the initial delay
Delay string `json:"delay,omitempty"`
Timeout string `json:"timeout,omitempty"`
Interval string `json:"interval,omitempty"`
MaxRetries int `json:"maxRetries,omitempty"`

parsedDelay *time.Duration `json:"-"`
parsedTimeout *time.Duration `json:"-"`
parsedInterval *time.Duration `json:"-"`
}

func (t *KubernetesResourceCheckRetries) GetInitialDelay() (time.Duration, error) {
if t.parsedDelay != nil {
return *t.parsedDelay, nil
}

if t.Delay == "" {
return time.Duration(0), nil
}

tt, err := duration.ParseDuration(t.Delay)
if err != nil {
return time.Duration(0), err
}
t.parsedDelay = lo.ToPtr(time.Duration(tt))

return *t.parsedDelay, nil
}

func (t *KubernetesResourceCheckRetries) GetTimeout() (time.Duration, error) {
if t.parsedTimeout != nil {
return *t.parsedTimeout, nil
}

if t.Timeout == "" {
return time.Duration(0), nil
}

tt, err := duration.ParseDuration(t.Timeout)
if err != nil {
return time.Duration(0), err
}
t.parsedTimeout = lo.ToPtr(time.Duration(tt))

return *t.parsedTimeout, nil
}

func (t *KubernetesResourceCheckRetries) GetInterval() (time.Duration, error) {
if t.parsedInterval != nil {
return *t.parsedInterval, nil
}

if t.Interval == "" {
return time.Duration(0), nil
}

tt, err := duration.ParseDuration(t.Interval)
if err != nil {
return time.Duration(0), err
}
t.parsedInterval = lo.ToPtr(time.Duration(tt))

return *t.parsedInterval, nil
}

type KubernetesResourceCheckWaitFor struct {
// Expr is a cel expression that determines whether all the resources
// are in their desired state before running checks on them.
Expand All @@ -805,6 +871,8 @@ type KubernetesResourceCheckWaitFor struct {
// Default: 30s
Interval string `json:"interval,omitempty"`

MaxRetries int `json:"maxRetries,omitempty"`

parsedTimeout *time.Duration `json:"-"`
parsedInterval *time.Duration `json:"-"`
}
Expand Down Expand Up @@ -865,6 +933,9 @@ type KubernetesResourceCheck struct {
// +kubebuilder:validation:XPreserveUnknownFields
Checks []KubernetesResourceChecks `json:"checks,omitempty"`

// Set initial delays and retry intervals for checks.
CheckRetries KubernetesResourceCheckRetries `json:"checkRetries,omitempty"`

// Kubeconfig is the kubeconfig or the path to the kubeconfig file.
Kubeconfig *types.EnvVar `yaml:"kubeconfig,omitempty" json:"kubeconfig,omitempty"`

Expand Down
31 changes: 31 additions & 0 deletions api/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

65 changes: 56 additions & 9 deletions checks/kubernetes_resource.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,16 +115,47 @@ func (c *KubernetesResourceChecker) Check(ctx *context.Context, check v1.Kuberne
return results.Failf("error templating checks: %v", err)
}

checkCtx := context.New(ctx.Context, virtualCanary)
res, err := Exec(checkCtx)
if err != nil {
return results.Failf("%v", err)
} else {
for _, r := range res {
if r.Error != "" {
results.Failf("check (name:%s) failed with error: %v", r.GetName(), r.Error)
if wt, _ := check.CheckRetries.GetInitialDelay(); wt > 0 {
time.Sleep(wt)
}

var backoff retry.Backoff
backoff = retry.BackoffFunc(func() (time.Duration, bool) {
return 0, true // don't retry by default
})

if retryInterval, _ := check.CheckRetries.GetInterval(); retryInterval > 0 {
backoff = retry.NewConstant(retryInterval)
}

if check.CheckRetries.MaxRetries > 0 {
backoff = retry.WithMaxRetries(uint64(check.CheckRetries.MaxRetries), backoff)
}

if maxRetryTimeout, _ := check.CheckRetries.GetTimeout(); maxRetryTimeout > 0 {
backoff = retry.WithMaxDuration(maxRetryTimeout, backoff)
}

retryErr := retry.Do(ctx, backoff, func(_ctx gocontext.Context) error {
ctx.Infof("running check: %s", virtualCanary.Name)

ctx = _ctx.(*context.Context)
checkCtx := context.New(ctx.Context, virtualCanary)
res, err := Exec(checkCtx)
if err != nil {
return err
} else {
for _, r := range res {
if r.Error != "" {
return retry.RetryableError(fmt.Errorf("check (name:%s) failed with error: %v", r.GetName(), r.Error))
}
}
}

return nil
})
if retryErr != nil {
return results.Failf(retryErr.Error())
}
}

Expand All @@ -142,15 +173,19 @@ func (c *KubernetesResourceChecker) evalWaitFor(ctx *context.Context, check v1.K
waitInterval = wt
}

kClient := pkg.NewKubeClient(ctx.Kommons().GetRESTConfig)

var attempts int
backoff := retry.WithMaxDuration(waitTimeout, retry.NewConstant(waitInterval))
if check.WaitFor.MaxRetries > 0 {
backoff = retry.WithMaxRetries(uint64(check.WaitFor.MaxRetries), backoff)
}
retryErr := retry.Do(ctx, backoff, func(_ctx gocontext.Context) error {
ctx = _ctx.(*context.Context)
attempts++
ctx.Tracef("waiting for %d resources to be ready. (attempts: %d)", check.TotalResources(), attempts)

var templateVar = map[string]any{}
kClient := pkg.NewKubeClient(ctx.Kommons().GetRESTConfig)
if response, err := kClient.FetchResources(ctx, append(check.StaticResources, check.Resources...)...); err != nil {
return fmt.Errorf("wait for evaluation. fetching resources: %w", err)
} else if len(response) != check.TotalResources() {
Expand Down Expand Up @@ -219,6 +254,18 @@ func (c *KubernetesResourceChecker) validate(ctx *context.Context, check v1.Kube
return fmt.Errorf("failed to parse wait for timeout(%s): %w", check.WaitFor.Timeout, err)
}

if _, err := check.CheckRetries.GetTimeout(); err != nil {
return fmt.Errorf("failed to parse check retry timeout(%s): %w", check.CheckRetries.Timeout, err)
}

if _, err := check.CheckRetries.GetInterval(); err != nil {
return fmt.Errorf("failed to parse check retry interval(%s): %w", check.CheckRetries.Interval, err)
}

if _, err := check.CheckRetries.GetInitialDelay(); err != nil {
return fmt.Errorf("failed to parse check retry initial delay(%s): %w", check.CheckRetries.Delay, err)
}

maxResourcesAllowed := ctx.Properties().Int("checks.kubernetesResource.maxResources", defaultMaxResourcesAllowed)
if check.TotalResources() > maxResourcesAllowed {
return fmt.Errorf("too many resources (%d). only %d allowed", check.TotalResources(), maxResourcesAllowed)
Expand Down
13 changes: 13 additions & 0 deletions config/deploy/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4983,6 +4983,19 @@ spec:
kubernetesResource:
items:
properties:
checkRetries:
description: Set initial delays and retry intervals for checks.
properties:
delay:
description: Delay is the initial delay
type: string
interval:
type: string
maxRetries:
type: integer
timeout:
type: string
type: object
checks:
description: Checks to run against the kubernetes resources.
items:
Expand Down
13 changes: 13 additions & 0 deletions config/deploy/manifests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4982,6 +4982,19 @@ spec:
kubernetesResource:
items:
properties:
checkRetries:
description: Set initial delays and retry intervals for checks.
properties:
delay:
description: Delay is the initial delay
type: string
interval:
type: string
maxRetries:
type: integer
timeout:
type: string
type: object
checks:
description: Checks to run against the kubernetes resources.
items:
Expand Down
21 changes: 21 additions & 0 deletions config/schemas/canary.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -2320,6 +2320,9 @@
},
"type": "array"
},
"checkRetries": {
"$ref": "#/$defs/KubernetesResourceCheckRetries"
},
"kubeconfig": {
"$ref": "#/$defs/EnvVar"
},
Expand All @@ -2334,6 +2337,24 @@
"resources"
]
},
"KubernetesResourceCheckRetries": {
"properties": {
"delay": {
"type": "string"
},
"timeout": {
"type": "string"
},
"interval": {
"type": "string"
},
"maxRetries": {
"type": "integer"
}
},
"additionalProperties": false,
"type": "object"
},
"KubernetesResourceCheckWaitFor": {
"properties": {
"expr": {
Expand Down
21 changes: 21 additions & 0 deletions config/schemas/component.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -2574,6 +2574,9 @@
},
"type": "array"
},
"checkRetries": {
"$ref": "#/$defs/KubernetesResourceCheckRetries"
},
"kubeconfig": {
"$ref": "#/$defs/EnvVar"
},
Expand All @@ -2588,6 +2591,24 @@
"resources"
]
},
"KubernetesResourceCheckRetries": {
"properties": {
"delay": {
"type": "string"
},
"timeout": {
"type": "string"
},
"interval": {
"type": "string"
},
"maxRetries": {
"type": "integer"
}
},
"additionalProperties": false,
"type": "object"
},
"KubernetesResourceCheckWaitFor": {
"properties": {
"expr": {
Expand Down
21 changes: 21 additions & 0 deletions config/schemas/topology.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -2544,6 +2544,9 @@
},
"type": "array"
},
"checkRetries": {
"$ref": "#/$defs/KubernetesResourceCheckRetries"
},
"kubeconfig": {
"$ref": "#/$defs/EnvVar"
},
Expand All @@ -2558,6 +2561,24 @@
"resources"
]
},
"KubernetesResourceCheckRetries": {
"properties": {
"delay": {
"type": "string"
},
"timeout": {
"type": "string"
},
"interval": {
"type": "string"
},
"maxRetries": {
"type": "integer"
}
},
"additionalProperties": false,
"type": "object"
},
"KubernetesResourceCheckWaitFor": {
"properties": {
"expr": {
Expand Down
4 changes: 4 additions & 0 deletions fixtures/k8s/kubernetes_resource_ingress_pass.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,7 @@ spec:
headers:
- name: Host
value: "{{(index ((index .staticResources 1).Object.spec.rules) 0).host}}"
checkRetries:
delay: 3s
interval: 2s
maxRetries: 3
4 changes: 4 additions & 0 deletions fixtures/k8s/kubernetes_resource_service_pass.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,7 @@ spec:
- http:
- name: Call httpbin service
url: "http://httpbin-svc.default.svc"
checkRetries:
delay: 2s
maxRetries: 5
interval: 3s

0 comments on commit f86306f

Please sign in to comment.