Skip to content

Commit

Permalink
fix: Split resource deletion (kyma-project#1336)
Browse files Browse the repository at this point in the history
* split cleanup process to postpone crd deletion after all resources get deleted

* split cleanup process to postpone operator related resource deletion

* revert doPreDelete

* rename doPreDelete to removeModuleCR

* fix flaky test

* increase eventually timeout

* long timeout when waiting for manifest deletion

* increase timeout

* set state to Warning if deletion is not finished

* update module template

* update moduletemplate

* remove declarative test

* apply review suggestion

* add unit test for IsOperatorRelatedResources

* add unit test for SplitResources
enhance e2e to test module deletion when managed CR get blocked

* import template operator api in tests

* enhance GetManifest

* fix test

* fix flaky tests

* fix e2e test

* debug e2e

* add missing Namespace to operator related resources
  • Loading branch information
ruanxin authored Mar 11, 2024
1 parent 43608f2 commit a0c4943
Show file tree
Hide file tree
Showing 44 changed files with 740 additions and 1,103 deletions.
2 changes: 2 additions & 0 deletions .golangci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ linters-settings:
no-unaliased: true
no-extra-aliases: true
alias:
- pkg: github.com/kyma-project/template-operator/api/v1alpha1
alias: templatev1alpha1
- pkg: github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1
alias: certmanagerv1
- pkg: github.com/cert-manager/cert-manager/pkg/apis/meta/v1
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ require (

require (
github.com/go-co-op/gocron v1.37.0
github.com/kyma-project/template-operator/api v0.0.0-20240307063418-941f23f566fe
github.com/prometheus/client_model v0.6.0
k8s.io/api v0.29.2
k8s.io/apiextensions-apiserver v0.29.2
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1136,6 +1136,8 @@ github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/kyma-project/runtime-watcher/listener v0.0.0-20231011102033-b8383d92883e h1:yJoi2kUTS1CKOBQOG8BRNxWAdEAk63DurKSJif6miUU=
github.com/kyma-project/runtime-watcher/listener v0.0.0-20231011102033-b8383d92883e/go.mod h1:h+qRkxtzjaBmmwNW8om8othH+xCm4iK4a53WoMoMe50=
github.com/kyma-project/template-operator/api v0.0.0-20240307063418-941f23f566fe h1:21CBtXpbWtI4jp1SJL8DZZIBj6y2K9y3zQgDjK5kawQ=
github.com/kyma-project/template-operator/api v0.0.0-20240307063418-941f23f566fe/go.mod h1:zz645UFLGfaPSqHLJ5FOGe949mSKhiqQhlp0fKzLI3s=
github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q=
github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNaVckg+4=
github.com/lestrrat-go/blackmagic v1.0.2 h1:Cg2gVSc9h7sz9NOByczrbUvLopQmXrfFx//N+AkAr5k=
Expand Down
65 changes: 0 additions & 65 deletions internal/declarative/v2/cleanup.go

This file was deleted.

52 changes: 21 additions & 31 deletions internal/declarative/v2/reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ import (
"sigs.k8s.io/controller-runtime/pkg/manager"

"github.com/kyma-project/lifecycle-manager/api/shared"
"github.com/kyma-project/lifecycle-manager/api/v1beta2"
"github.com/kyma-project/lifecycle-manager/internal/pkg/metrics"
"github.com/kyma-project/lifecycle-manager/internal/pkg/resources"
"github.com/kyma-project/lifecycle-manager/pkg/common"
"github.com/kyma-project/lifecycle-manager/pkg/queue"
"github.com/kyma-project/lifecycle-manager/pkg/util"
Expand Down Expand Up @@ -160,15 +162,14 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu
return r.ssaStatus(ctx, obj, metrics.ManifestRenderResources)
}

diff := ResourceList(current).Difference(target)
if err := r.pruneDiff(ctx, clnt, obj, diff, spec); errors.Is(err, ErrDeletionNotFinished) {
if err := r.pruneDiff(ctx, clnt, obj, current, target, spec); errors.Is(err, resources.ErrDeletionNotFinished) {
r.Metrics.RecordRequeueReason(metrics.ManifestPruneDiffNotFinished, queue.IntendedRequeue)
return ctrl.Result{Requeue: true}, nil
} else if err != nil {
return r.ssaStatus(ctx, obj, metrics.ManifestPruneDiff)
}

if err := r.doPreDelete(ctx, clnt, obj); err != nil {
if err := r.removeModuleCR(ctx, clnt, obj); err != nil {
if errors.Is(err, ErrRequeueRequired) {
r.Metrics.RecordRequeueReason(metrics.ManifestPreDeleteEnqueueRequired, queue.IntendedRequeue)
return ctrl.Result{Requeue: true}, nil
Expand Down Expand Up @@ -313,7 +314,9 @@ func (r *Reconciler) renderResources(
return target, current, nil
}

func (r *Reconciler) syncResources(ctx context.Context, clnt Client, obj Object, target []*resource.Info) error {
func (r *Reconciler) syncResources(ctx context.Context, clnt Client, obj Object,
target []*resource.Info,
) error {
status := obj.GetStatus()

if err := ConcurrentSSA(clnt, r.FieldOwner).Run(ctx, target); err != nil {
Expand Down Expand Up @@ -408,24 +411,7 @@ func generateOperationMessage(installationCondition apimetav1.Condition, stateIn
return installationCondition.Message
}

func (r *Reconciler) deleteDiffResources(
ctx context.Context, clnt Client, obj Object, diff []*resource.Info,
) error {
status := obj.GetStatus()

if err := NewConcurrentCleanup(clnt).Run(ctx, diff); errors.Is(err, ErrDeletionNotFinished) {
r.Event(obj, "Normal", "Deletion", err.Error())
return err
} else if err != nil {
r.Event(obj, "Warning", "Deletion", err.Error())
obj.SetStatus(status.WithState(shared.StateError).WithErr(err))
return err
}

return nil
}

func (r *Reconciler) doPreDelete(ctx context.Context, clnt Client, obj Object) error {
func (r *Reconciler) removeModuleCR(ctx context.Context, clnt Client, obj Object) error {
if !obj.GetDeletionTimestamp().IsZero() {
for _, preDelete := range r.PreDeletes {
if err := preDelete(ctx, clnt, r.Client, obj); err != nil {
Expand Down Expand Up @@ -490,15 +476,16 @@ func (r *Reconciler) pruneDiff(
ctx context.Context,
clnt Client,
obj Object,
diff []*resource.Info,
current, target []*resource.Info,
spec *Spec,
) error {
var err error
diff, err = pruneResource(diff, "Namespace", namespaceNotBeRemoved)
diff, err := pruneResource(ResourceList(current).Difference(target), "Namespace", namespaceNotBeRemoved)
if err != nil {
return err
}

if len(diff) == 0 {
return nil
}
if manifestNotInDeletingAndOciRefNotChangedButDiffDetected(diff, obj, spec) {
// This case should not happen normally, but if happens, it means the resources read from cache is incomplete,
// and we should prevent diff resources to be deleted.
Expand All @@ -509,14 +496,17 @@ func (r *Reconciler) pruneDiff(
return ErrResourceSyncDiffInSameOCILayer
}

if err := r.deleteDiffResources(ctx, clnt, obj, diff); err != nil {
return err
// Remove this type casting while in progress this issue: https://github.com/kyma-project/lifecycle-manager/issues/1006
manifest, ok := obj.(*v1beta2.Manifest)
if !ok {
return v1beta2.ErrTypeAssertManifest
}

return nil
return resources.NewConcurrentCleanup(clnt, manifest).DeleteDiffResources(ctx, diff)
}

func manifestNotInDeletingAndOciRefNotChangedButDiffDetected(diff []*resource.Info, obj Object, spec *Spec) bool {
func manifestNotInDeletingAndOciRefNotChangedButDiffDetected(diff []*resource.Info, obj Object,
spec *Spec,
) bool {
return len(diff) > 0 && ociRefNotChanged(obj, spec.OCIRef) && obj.GetDeletionTimestamp().IsZero()
}

Expand Down
19 changes: 4 additions & 15 deletions internal/manifest/custom_resource.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package manifest

import (
"context"
"errors"
"fmt"

apierrors "k8s.io/apimachinery/pkg/api/errors"
Expand All @@ -15,13 +14,6 @@ import (
"github.com/kyma-project/lifecycle-manager/pkg/util"
)

var (
ErrWaitingForAsyncCustomResourceDeletion = errors.New(
"deletion of custom resource was triggered and is now waiting to be completed")
ErrWaitingForAsyncCustomResourceDefinitionDeletion = errors.New(
"deletion of custom resource definition was triggered and is now waiting to be completed")
)

// PostRunCreateCR is a hook for creating the manifest default custom resource if not available in the cluster
// It is used to provide the controller with default data in the Runtime.
func PostRunCreateCR(
Expand Down Expand Up @@ -60,13 +52,10 @@ func PostRunCreateCR(
return nil
}

// PreDeleteDeleteCR is a hook for deleting the manifest default custom resource if available in the cluster
// It is used to clean up the controller default data.
// It uses DeletePropagationBackground as it will return an error if the resource exists, even if deletion is triggered
// This leads to the reconciled resource immediately being requeued due to ErrWaitingForAsyncCustomResourceDeletion.
// In this case, the next time it will run into this delete function,
// it will either say that the resource is already being deleted (2xx) and retry or its no longer found.
// Then the finalizer is dropped, and we consider the CR removal successful.
// PreDeleteDeleteCR is a hook for deleting the module CR if available in the cluster.
// It uses DeletePropagationBackground to delete module CR.
// Only if module CR is not found (indicated by NotFound error), it continues to remove Manifest finalizer,
// and we consider the CR removal successful.
func PreDeleteDeleteCR(
ctx context.Context, skr declarativev2.Client, kcp client.Client, obj declarativev2.Object,
) error {
Expand Down
126 changes: 126 additions & 0 deletions internal/pkg/resources/cleanup.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
package resources

import (
"context"
"errors"

apimetav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/cli-runtime/pkg/resource"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/kyma-project/lifecycle-manager/api/shared"
"github.com/kyma-project/lifecycle-manager/api/v1beta2"
"github.com/kyma-project/lifecycle-manager/pkg/common"
"github.com/kyma-project/lifecycle-manager/pkg/util"
)

var ErrDeletionNotFinished = errors.New("deletion is not yet finished")

type ConcurrentCleanup struct {
clnt client.Client
manifest *v1beta2.Manifest
}

func NewConcurrentCleanup(clnt client.Client, manifest *v1beta2.Manifest) *ConcurrentCleanup {
return &ConcurrentCleanup{
clnt: clnt,
manifest: manifest,
}
}

func (c *ConcurrentCleanup) DeleteDiffResources(ctx context.Context, resources []*resource.Info,
) error {
status := c.manifest.GetStatus()
operatorRelatedResources, operatorManagedResources, err := SplitResources(resources)
if err != nil {
return err
}
if err := c.cleanupResources(ctx, operatorManagedResources, status); err != nil {
return err
}
return c.cleanupResources(ctx, operatorRelatedResources, status)
}

func (c *ConcurrentCleanup) cleanupResources(
ctx context.Context,
resources []*resource.Info,
status shared.Status,
) error {
if err := c.Run(ctx, resources); errors.Is(err, ErrDeletionNotFinished) {
c.manifest.SetStatus(status.WithState(shared.StateWarning).WithErr(err))
return err
} else if err != nil {
c.manifest.SetStatus(status.WithState(shared.StateError).WithErr(err))
return err
}
return nil
}

func SplitResources(resources []*resource.Info) ([]*resource.Info, []*resource.Info, error) {
operatorRelatedResources := make([]*resource.Info, 0)
operatorManagedResources := make([]*resource.Info, 0)

for _, item := range resources {
obj, ok := item.Object.(client.Object)
if !ok {
return nil, nil, common.ErrTypeAssert
}
if IsOperatorRelatedResources(obj.GetObjectKind().GroupVersionKind().Kind) {
operatorRelatedResources = append(operatorRelatedResources, item)
continue
}
operatorManagedResources = append(operatorManagedResources, item)
}

return operatorRelatedResources, operatorManagedResources, nil
}

func IsOperatorRelatedResources(kind string) bool {
return kind == "CustomResourceDefinition" ||
kind == "Namespace" ||
kind == "ServiceAccount" ||
kind == "Role" ||
kind == "ClusterRole" ||
kind == "RoleBinding" ||
kind == "ClusterRoleBinding" ||
kind == "Service" ||
kind == "Deployment"
}

func (c *ConcurrentCleanup) Run(ctx context.Context, infos []*resource.Info) error {
results := make(chan error, len(infos))
for i := range infos {
i := i
go c.cleanupResource(ctx, infos[i], results)
}

var errs []error
present := len(infos)
for i := 0; i < len(infos); i++ {
err := <-results
if util.IsNotFound(err) {
present--
continue
}
if err != nil {
errs = append(errs, err)
}
}

if len(errs) > 0 {
return errors.Join(errs...)
}

if present > 0 {
return ErrDeletionNotFinished
}
return nil
}

func (c *ConcurrentCleanup) cleanupResource(ctx context.Context, info *resource.Info, results chan error) {
obj, ok := info.Object.(client.Object)
if !ok {
return
}
results <- c.clnt.Delete(ctx, obj, client.PropagationPolicy(apimetav1.DeletePropagationBackground))
}
Loading

0 comments on commit a0c4943

Please sign in to comment.