diff --git a/internal/controller/nonadminbackup_controller.go b/internal/controller/nonadminbackup_controller.go index ef63ea54..71123df5 100644 --- a/internal/controller/nonadminbackup_controller.go +++ b/internal/controller/nonadminbackup_controller.go @@ -31,7 +31,6 @@ import ( "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -161,7 +160,6 @@ func (r *NonAdminBackupReconciler) Reconcile(ctx context.Context, req ctrl.Reque // - bool: true if reconciliation should be requeued, false otherwise // - error: any error encountered during the process func (r *NonAdminBackupReconciler) setStatusAndConditionForDeletionAndCallDelete(ctx context.Context, logger logr.Logger, nab *nacv1alpha1.NonAdminBackup) (bool, error) { - requeueRequired := false updatedPhase := updateNonAdminPhase(&nab.Status.Phase, nacv1alpha1.NonAdminPhaseDeleting) updatedCondition := meta.SetStatusCondition(&nab.Status.Conditions, metav1.Condition{ @@ -177,19 +175,10 @@ func (r *NonAdminBackupReconciler) setStatusAndConditionForDeletionAndCallDelete return false, err } logger.V(1).Info("NonAdminBackup status marked for deletion") - requeueRequired = true // Requeue to ensure latest NAB object in the next reconciliation steps } else { logger.V(1).Info("NonAdminBackup status unchanged during deletion") } - if nab.ObjectMeta.DeletionTimestamp.IsZero() { - logger.V(1).Info("Marking NonAdminBackup for deletion", nameString, nab.Name) - if err := r.Delete(ctx, nab); err != nil { - logger.Error(err, "Failed to call Delete on the NonAdminBackup object") - return false, err - } - requeueRequired = true // Requeue to allow deletion to proceed - } - return requeueRequired, nil + return false, nil } // setStatusForDirectKubernetesAPIDeletion updates the status and conditions when a NonAdminBackup @@ -249,9 +238,7 @@ func (r *NonAdminBackupReconciler) setStatusForDirectKubernetesAPIDeletion(ctx c func (r *NonAdminBackupReconciler) createVeleroDeleteBackupRequest(ctx context.Context, logger logr.Logger, nab *nacv1alpha1.NonAdminBackup) (bool, error) { // This function is called just after setStatusAndConditionForDeletionAndCallDelete - standard delete path, which already // requeued the reconciliation to get the latest NAB object. There is no need to fetch the latest NAB object here. - if !controllerutil.ContainsFinalizer(nab, constant.NabFinalizerName) || - nab.Status.VeleroBackup == nil || - nab.Status.VeleroBackup.NACUUID == constant.EmptyString { + if nab.Status.VeleroBackup == nil || nab.Status.VeleroBackup.NACUUID == constant.EmptyString { return false, nil } @@ -259,9 +246,7 @@ func (r *NonAdminBackupReconciler) createVeleroDeleteBackupRequest(ctx context.C // For the ForceDelete we do not create DeleteBackupRequest veleroBackupNACUUID := nab.Status.VeleroBackup.NACUUID veleroBackup, err := function.GetVeleroBackupByLabel(ctx, r.Client, r.OADPNamespace, veleroBackupNACUUID) - if err != nil { - // Log error if multiple VeleroBackup objects are found logger.Error(err, findSingleVBError, uuidString, veleroBackupNACUUID) return false, err } @@ -273,7 +258,6 @@ func (r *NonAdminBackupReconciler) createVeleroDeleteBackupRequest(ctx context.C deleteBackupRequest, err := function.GetVeleroDeleteBackupRequestByLabel(ctx, r.Client, r.OADPNamespace, veleroBackupNACUUID) if err != nil { - // Log error if multiple DeleteBackupRequest objects are found logger.Error(err, findSingleVDBRError, uuidString, veleroBackupNACUUID) return false, err } @@ -319,7 +303,7 @@ func (r *NonAdminBackupReconciler) createVeleroDeleteBackupRequest(ctx context.C logger.V(1).Info("NonAdminBackup DeleteBackupRequest Status unchanged") } - return false, nil // Continue so initNabDeletion can initialize deletion of a NonAdminBackup object + return true, nil } // deleteVeleroBackupAndDeleteBackupRequestObjects deletes both the VeleroBackup and any associated @@ -340,9 +324,9 @@ func (r *NonAdminBackupReconciler) deleteVeleroBackupAndDeleteBackupRequestObjec return false, nil } + deleted := false veleroBackupNACUUID := nab.Status.VeleroBackup.NACUUID veleroBackup, err := function.GetVeleroBackupByLabel(ctx, r.Client, r.OADPNamespace, veleroBackupNACUUID) - if err != nil { // Case where more than one VeleroBackup is found with the same label UUID // TODO (migi): Determine if all objects with this UUID should be deleted @@ -356,24 +340,26 @@ func (r *NonAdminBackupReconciler) deleteVeleroBackupAndDeleteBackupRequestObjec return false, err } logger.V(1).Info("VeleroBackup deletion initiated", nameString, veleroBackup.Name) + deleted = true } else { logger.V(1).Info("VeleroBackup already deleted") } deleteBackupRequest, err := function.GetVeleroDeleteBackupRequestByLabel(ctx, r.Client, r.OADPNamespace, veleroBackupNACUUID) if err != nil { - // Log error if multiple DeleteBackupRequest objects are found logger.Error(err, findSingleVDBRError, uuidString, veleroBackupNACUUID) return false, err } + if deleteBackupRequest != nil { if err = r.Delete(ctx, deleteBackupRequest); err != nil { logger.Error(err, "Failed to delete VeleroDeleteBackupRequest", nameString, deleteBackupRequest.Name) return false, err } logger.V(1).Info("VeleroDeleteBackupRequest deletion initiated", nameString, deleteBackupRequest.Name) + deleted = true } - return false, nil // Continue so initNabDeletion can initialize deletion of an NonAdminBackup object + return deleted, nil } // removeNabFinalizerUponVeleroBackupDeletion ensures the associated VeleroBackup object is deleted @@ -395,40 +381,19 @@ func (r *NonAdminBackupReconciler) deleteVeleroBackupAndDeleteBackupRequestObjec // - A boolean indicating whether to requeue the reconcile loop (true if waiting for VeleroBackup deletion). // - An error if any update operation or deletion check fails. func (r *NonAdminBackupReconciler) removeNabFinalizerUponVeleroBackupDeletion(ctx context.Context, logger logr.Logger, nab *nacv1alpha1.NonAdminBackup) (bool, error) { - // We do not need to gather latest NAB object here: - // 1. the NAB object was already requeued by setStatusAndConditionForDeletionAndCallDelete - // 2. removal of NAB finalizer is the last step in the delete paths that will requeue the reconciliation if the - // corresponding VeleroBackup object is found based on the NACUUID stored in the NAB status for which we already - // have the latest NAB object (point 1 above). - if !nab.ObjectMeta.DeletionTimestamp.IsZero() { - if !nab.Spec.ForceDeleteBackup && nab.Status.VeleroBackup != nil && nab.Status.VeleroBackup.NACUUID != constant.EmptyString { - veleroBackupNACUUID := nab.Status.VeleroBackup.NACUUID - - veleroBackup, err := function.GetVeleroBackupByLabel(ctx, r.Client, r.OADPNamespace, veleroBackupNACUUID) - if err != nil { - // Case in which more then one VeleroBackup is found with the same label UUID - // TODO (migi): Should we delete all of the objects with such UUID ? - logger.Error(err, findSingleVBError, uuidString, veleroBackupNACUUID) - return false, err - } - - if veleroBackup != nil { - logger.V(1).Info("Waiting for VeleroBackup to be deleted", nameString, veleroBackupNACUUID) - return true, nil // Requeue - } - } - // VeleroBackup is deleted, proceed with deleting the NonAdminBackup - logger.V(1).Info("VeleroBackup deleted, removing NonAdminBackup finalizer") - - controllerutil.RemoveFinalizer(nab, constant.NabFinalizerName) - - if err := r.Update(ctx, nab); err != nil { - logger.Error(err, "Failed to remove finalizer from NonAdminBackup") + controllerutil.RemoveFinalizer(nab, constant.NabFinalizerName) + if err := r.Update(ctx, nab); err != nil { + logger.Error(err, "Failed to remove finalizer from NonAdminBackup") + return false, err + } + if nab.ObjectMeta.DeletionTimestamp.IsZero() { + logger.V(1).Info("Marking NonAdminBackup for deletion", nameString, nab.Name) + if err := r.Delete(ctx, nab); err != nil { + logger.Error(err, "Failed to call Delete on the NonAdminBackup object") return false, err } - - logger.V(1).Info("NonAdminBackup finalizer removed and object deleted") } + logger.V(1).Info("NonAdminBackup finalizer removed and object deleted") return false, nil } @@ -532,14 +497,6 @@ func (r *NonAdminBackupReconciler) validateSpec(ctx context.Context, logger logr // // This function generates a UUID and stores it in the VeleroBackup status field of NonAdminBackup. func (r *NonAdminBackupReconciler) setBackupUUIDInStatus(ctx context.Context, logger logr.Logger, nab *nacv1alpha1.NonAdminBackup) (bool, error) { - // Get the latest version of the NAB object just before checking if the NACUUID is set - // to ensure we do not miss any updates to the NAB object - nabOriginal := nab.DeepCopy() - if err := r.Get(ctx, types.NamespacedName{Name: nabOriginal.Name, Namespace: nabOriginal.Namespace}, nab); err != nil { - logger.Error(err, "Failed to re-fetch NonAdminBackup") - return false, err - } - if nab.Status.VeleroBackup == nil || nab.Status.VeleroBackup.NACUUID == constant.EmptyString { veleroBackupNACUUID := function.GenerateNacObjectUUID(nab.Namespace, nab.Name) nab.Status.VeleroBackup = &nacv1alpha1.VeleroBackup{ diff --git a/internal/controller/nonadminbackup_controller_test.go b/internal/controller/nonadminbackup_controller_test.go index e9b86ed4..9ec24c3d 100644 --- a/internal/controller/nonadminbackup_controller_test.go +++ b/internal/controller/nonadminbackup_controller_test.go @@ -30,9 +30,9 @@ import ( "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/wait" "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/cache" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/reconcile" @@ -59,6 +59,7 @@ type nonAdminBackupFullReconcileScenario struct { enforcedBackupSpec *velerov1.BackupSpec spec nacv1alpha1.NonAdminBackupSpec status nacv1alpha1.NonAdminBackupStatus + errorLogs int } func buildTestNonAdminBackup(nonAdminNamespace string, nonAdminName string, spec nacv1alpha1.NonAdminBackupSpec) *nacv1alpha1.NonAdminBackup { @@ -187,6 +188,10 @@ var _ = ginkgo.Describe("Test single reconciles of NonAdminBackup Reconcile func }, nonAdminBackup, ) == nil { + for _, finalizer := range nonAdminBackup.GetFinalizers() { + controllerutil.RemoveFinalizer(nonAdminBackup, finalizer) + } + gomega.Expect(k8sClient.Update(ctx, nonAdminBackup)).To(gomega.Succeed()) gomega.Expect(k8sClient.Delete(ctx, nonAdminBackup)).To(gomega.Succeed()) } gomega.Expect(deleteTestNamespaces(ctx, nonAdminObjectNamespace, oadpNamespace)).To(gomega.Succeed()) @@ -782,8 +787,12 @@ var _ = ginkgo.Describe("Test full reconcile loop of NonAdminBackup Controller", gomega.Expect(deleteTestNamespaces(ctx, nonAdminObjectNamespace, oadpNamespace)).To(gomega.Succeed()) cancel() - // wait cancel - time.Sleep(1 * time.Second) + // wait manager shutdown + gomega.Eventually(func() (bool, error) { + logOutput := ginkgo.CurrentSpecReport().CapturedGinkgoWriterOutput + shutdownlog := "INFO Wait completed, proceeding to shutdown the manager" + return strings.Contains(logOutput, shutdownlog) && strings.Count(logOutput, shutdownlog) == 1, nil + }, 5*time.Second, 1*time.Second).Should(gomega.BeTrue()) }) ginkgo.DescribeTable("Reconcile triggered by NonAdminBackup Create event", @@ -794,6 +803,12 @@ var _ = ginkgo.Describe("Test full reconcile loop of NonAdminBackup Controller", k8sManager, err := ctrl.NewManager(cfg, ctrl.Options{ Scheme: k8sClient.Scheme(), + Cache: cache.Options{ + DefaultNamespaces: map[string]cache.Config{ + nonAdminObjectNamespace: {}, + oadpNamespace: {}, + }, + }, }) gomega.Expect(err).ToNot(gomega.HaveOccurred()) @@ -815,53 +830,44 @@ var _ = ginkgo.Describe("Test full reconcile loop of NonAdminBackup Controller", gomega.Expect(err).ToNot(gomega.HaveOccurred(), "failed to run manager") }() // wait manager start - managerStartTimeout := 10 * time.Second - pollInterval := 100 * time.Millisecond - ctxTimeout, cancel := context.WithTimeout(ctx, managerStartTimeout) - defer cancel() - - err = wait.PollUntilContextTimeout(ctxTimeout, pollInterval, managerStartTimeout, true, func(ctx context.Context) (done bool, err error) { - select { - case <-ctx.Done(): - return false, ctx.Err() - default: - // Check if the manager has started by verifying if the client is initialized - return k8sManager.GetClient() != nil, nil - } - }) - // Check if the context timeout or another error occurred - gomega.Expect(err).ToNot(gomega.HaveOccurred(), "Manager failed to start within the timeout period") + gomega.Eventually(func() (bool, error) { + logOutput := ginkgo.CurrentSpecReport().CapturedGinkgoWriterOutput + startUplog := `INFO Starting workers {"controller": "nonadminbackup", "controllerGroup": "oadp.openshift.io", "controllerKind": "NonAdminBackup", "worker count": 1}` + return strings.Contains(logOutput, startUplog) && + strings.Count(logOutput, startUplog) == 1 && + !strings.Contains(logOutput, "DEBUG Accepted NAB Create event"), nil + }, 5*time.Second, 1*time.Second).Should(gomega.BeTrue()) ginkgo.By("Waiting Reconcile of create event") nonAdminBackup := buildTestNonAdminBackup(nonAdminObjectNamespace, nonAdminObjectName, scenario.spec) - gomega.Expect(k8sClient.Create(ctxTimeout, nonAdminBackup)).To(gomega.Succeed()) + gomega.Expect(k8sClient.Create(ctx, nonAdminBackup)).To(gomega.Succeed()) // wait NAB reconcile - time.Sleep(2 * time.Second) - - ginkgo.By("Fetching NonAdminBackup after Reconcile") - gomega.Expect(k8sClient.Get( - ctxTimeout, - types.NamespacedName{ - Name: nonAdminObjectName, - Namespace: nonAdminObjectNamespace, - }, - nonAdminBackup, - )).To(gomega.Succeed()) - - ginkgo.By("Validating NonAdminBackup Status") + gomega.Eventually(func() (bool, error) { + err := k8sClient.Get( + ctx, + types.NamespacedName{ + Name: nonAdminObjectName, + Namespace: nonAdminObjectNamespace, + }, + nonAdminBackup, + ) + if err != nil { + return false, err + } + err = checkTestNonAdminBackupStatus(nonAdminBackup, scenario.status, oadpNamespace) + return err == nil, err + }, 5*time.Second, 1*time.Second).Should(gomega.BeTrue()) - gomega.Expect(checkTestNonAdminBackupStatus(nonAdminBackup, scenario.status, oadpNamespace)).To(gomega.Succeed()) + ginkgo.By("Checking if NonAdminBackup Spec was not changed") + gomega.Expect(reflect.DeepEqual( + nonAdminBackup.Spec, + scenario.spec, + )).To(gomega.BeTrue()) + veleroBackup := &velerov1.Backup{} if scenario.status.VeleroBackup != nil && len(nonAdminBackup.Status.VeleroBackup.NACUUID) > 0 { - ginkgo.By("Checking if NonAdminBackup Spec was not changed") - gomega.Expect(reflect.DeepEqual( - nonAdminBackup.Spec, - scenario.spec, - )).To(gomega.BeTrue()) - - veleroBackup := &velerov1.Backup{} gomega.Expect(k8sClient.Get( - ctxTimeout, + ctx, types.NamespacedName{ Name: nonAdminBackup.Status.VeleroBackup.NACUUID, Namespace: oadpNamespace, @@ -877,22 +883,17 @@ var _ = ginkgo.Describe("Test full reconcile loop of NonAdminBackup Controller", } ginkgo.By("Simulating VeleroBackup update to finished state") - veleroBackup.Status = velerov1.BackupStatus{ Phase: velerov1.BackupPhaseCompleted, CompletionTimestamp: &metav1.Time{Time: time.Now()}, } - // can not call .Status().Update() for veleroBackup object https://github.com/vmware-tanzu/velero/issues/8285 - gomega.Expect(k8sClient.Update(ctxTimeout, veleroBackup)).To(gomega.Succeed()) - - ginkgo.By("VeleroBackup updated") + gomega.Expect(k8sClient.Update(ctx, veleroBackup)).To(gomega.Succeed()) // wait NAB reconcile - gomega.Eventually(func() (bool, error) { err := k8sClient.Get( - ctxTimeout, + ctx, types.NamespacedName{ Name: nonAdminObjectName, Namespace: nonAdminObjectNamespace, @@ -910,8 +911,44 @@ var _ = ginkgo.Describe("Test full reconcile loop of NonAdminBackup Controller", } ginkgo.By("Waiting Reconcile of delete event") - gomega.Expect(k8sClient.Delete(ctxTimeout, nonAdminBackup)).To(gomega.Succeed()) - time.Sleep(1 * time.Second) + nonAdminBackup.Spec.ForceDeleteBackup = true + gomega.Expect(k8sClient.Update(ctx, nonAdminBackup)).To(gomega.Succeed()) + if scenario.status.VeleroBackup != nil && len(nonAdminBackup.Status.VeleroBackup.NACUUID) > 0 { + gomega.Eventually(func() (bool, error) { + err := k8sClient.Get( + ctx, + types.NamespacedName{ + Name: nonAdminBackup.Status.VeleroBackup.Name, + Namespace: oadpNamespace, + }, + veleroBackup, + ) + if errors.IsNotFound(err) { + return true, nil + } + return false, err + }, 10*time.Second, 1*time.Second).Should(gomega.BeTrue()) + } + gomega.Eventually(func() (bool, error) { + err := k8sClient.Get( + ctx, + types.NamespacedName{ + Name: nonAdminObjectName, + Namespace: nonAdminObjectNamespace, + }, + nonAdminBackup, + ) + if errors.IsNotFound(err) { + return true, nil + } + return false, err + }, 10*time.Second, 1*time.Second).Should(gomega.BeTrue()) + gomega.Eventually(func() (bool, error) { + logOutput := ginkgo.CurrentSpecReport().CapturedGinkgoWriterOutput + deletelog := "DEBUG Accepted NAB Delete event" + return strings.Contains(logOutput, deletelog) && strings.Count(logOutput, deletelog) == 1, nil + }, 5*time.Second, 1*time.Second).Should(gomega.BeTrue()) + gomega.Expect(strings.Count(ginkgo.CurrentSpecReport().CapturedGinkgoWriterOutput, "ERROR")).Should(gomega.Equal(scenario.errorLogs)) }, ginkgo.Entry("Should update NonAdminBackup until VeleroBackup completes and then delete it", nonAdminBackupFullReconcileScenario{ spec: nacv1alpha1.NonAdminBackupSpec{ @@ -966,6 +1003,7 @@ var _ = ginkgo.Describe("Test full reconcile loop of NonAdminBackup Controller", enforcedBackupSpec: &velerov1.BackupSpec{ SnapshotVolumes: ptr.To(false), }, + errorLogs: 1, }), ) }) diff --git a/internal/controller/nonadminrestore_controller.go b/internal/controller/nonadminrestore_controller.go index 81690eb6..6ecbb954 100644 --- a/internal/controller/nonadminrestore_controller.go +++ b/internal/controller/nonadminrestore_controller.go @@ -18,7 +18,7 @@ package controller import ( "context" - "errors" + "fmt" "reflect" "github.com/go-logr/logr" @@ -89,7 +89,6 @@ func (r *NonAdminRestoreReconciler) Reconcile(ctx context.Context, req ctrl.Requ reconcileSteps = []nonAdminRestoreReconcileStepFunction{ r.setStatusAndConditionForDeletionAndCallDelete, r.deleteVeleroRestore, - r.removeNarFinalizerUponVeleroRestoreDeletion, } default: logger.V(1).Info("Executing creation/update path") @@ -128,7 +127,6 @@ func (r *NonAdminRestoreReconciler) Reconcile(ctx context.Context, req ctrl.Requ // - bool: true if reconciliation should be requeued, false otherwise // - error: any error encountered during the process func (r *NonAdminRestoreReconciler) setStatusAndConditionForDeletionAndCallDelete(ctx context.Context, logger logr.Logger, nar *nacv1alpha1.NonAdminRestore) (bool, error) { - requeueRequired := false updatedPhase := updateNonAdminPhase(&nar.Status.Phase, nacv1alpha1.NonAdminPhaseDeleting) updatedCondition := meta.SetStatusCondition(&nar.Status.Conditions, metav1.Condition{ @@ -144,19 +142,10 @@ func (r *NonAdminRestoreReconciler) setStatusAndConditionForDeletionAndCallDelet return false, err } logger.V(1).Info("NonAdminRestore status marked for deletion") - requeueRequired = true // Requeue to ensure latest NAR object in the next reconciliation steps } else { logger.V(1).Info("NonAdminRestore status unchanged during deletion") } - if nar.ObjectMeta.DeletionTimestamp.IsZero() { - logger.V(1).Info("Marking NonAdminRestore for deletion", nameString, nar.Name) - if err := r.Delete(ctx, nar); err != nil { - logger.Error(err, "Failed to call Delete on the NonAdminRestore object") - return false, err - } - requeueRequired = true // Requeue to allow deletion to proceed - } - return requeueRequired, nil + return false, nil } func (r *NonAdminRestoreReconciler) deleteVeleroRestore(ctx context.Context, logger logr.Logger, nar *nacv1alpha1.NonAdminRestore) (bool, error) { @@ -167,9 +156,7 @@ func (r *NonAdminRestoreReconciler) deleteVeleroRestore(ctx context.Context, log veleroRestoreNACUUID := nar.Status.VeleroRestore.NACUUID veleroRestore, err := function.GetVeleroRestoreByLabel(ctx, r.Client, r.OADPNamespace, veleroRestoreNACUUID) - if err != nil { - // Case in which more then one VeleroRestore is found with the same label NACUUID logger.Error(err, findSingleVRError, uuidString, veleroRestoreNACUUID) return false, err } @@ -185,32 +172,8 @@ func (r *NonAdminRestoreReconciler) deleteVeleroRestore(ctx context.Context, log logger.V(1).Info("VeleroRestore deletion initiated", nameString, veleroRestore.Name) } else { logger.V(1).Info("VeleroRestore already deleted") - } - return false, nil // Continue -} - -func (r *NonAdminRestoreReconciler) removeNarFinalizerUponVeleroRestoreDeletion(ctx context.Context, logger logr.Logger, nar *nacv1alpha1.NonAdminRestore) (bool, error) { - if !nar.ObjectMeta.DeletionTimestamp.IsZero() { - if nar.Status.VeleroRestore != nil && nar.Status.VeleroRestore.NACUUID != constant.EmptyString { - veleroRestoreNACUUID := nar.Status.VeleroRestore.NACUUID - - veleroRestore, err := function.GetVeleroRestoreByLabel(ctx, r.Client, r.OADPNamespace, veleroRestoreNACUUID) - if err != nil { - // Case in which more then one VeleroRestore is found with the same label UUID - logger.Error(err, findSingleVRError, uuidString, veleroRestoreNACUUID) - return false, err - } - - if veleroRestore != nil { - logger.V(1).Info("Waiting for VeleroRestore to be deleted", nameString, veleroRestoreNACUUID) - return true, nil // Requeue - } - } - // VeleroRestore is deleted, proceed with deleting the NonAdminRestore - logger.V(1).Info("VeleroRestore deleted, removing NonAdminRestore finalizer") controllerutil.RemoveFinalizer(nar, constant.NonAdminRestoreFinalizerName) - if err := r.Update(ctx, nar); err != nil { logger.Error(err, "Failed to remove finalizer from NonAdminRestore") return false, err @@ -277,14 +240,6 @@ func (r *NonAdminRestoreReconciler) validateSpec(ctx context.Context, logger log } func (r *NonAdminRestoreReconciler) setUUID(ctx context.Context, logger logr.Logger, nar *nacv1alpha1.NonAdminRestore) (bool, error) { - // Get the latest version of the NAR object just before checking if the NACUUID is set - // to ensure we do not miss any updates to the NAR object - narOriginal := nar.DeepCopy() - if err := r.Get(ctx, types.NamespacedName{Name: narOriginal.Name, Namespace: narOriginal.Namespace}, nar); err != nil { - logger.Error(err, "Failed to re-fetch NonAdminRestore") - return false, err - } - if nar.Status.VeleroRestore == nil || nar.Status.VeleroRestore.NACUUID == constant.EmptyString { veleroRestoreNACUUID := function.GenerateNacObjectUUID(nar.Namespace, nar.Name) nar.Status.VeleroRestore = &nacv1alpha1.VeleroRestore{ @@ -318,14 +273,9 @@ func (r *NonAdminRestoreReconciler) setFinalizer(ctx context.Context, logger log } func (r *NonAdminRestoreReconciler) createVeleroRestore(ctx context.Context, logger logr.Logger, nar *nacv1alpha1.NonAdminRestore) (bool, error) { - if nar.Status.VeleroRestore == nil || nar.Status.VeleroRestore.NACUUID == constant.EmptyString { - return false, errors.New("unable to get Velero Restore UUID from NonAdminRestore Status") - } - veleroRestoreNACUUID := nar.Status.VeleroRestore.NACUUID veleroRestore, err := function.GetVeleroRestoreByLabel(ctx, r.Client, r.OADPNamespace, veleroRestoreNACUUID) - if err != nil { // Case in which more then one VeleroBackup is found with the same label UUID logger.Error(err, findSingleVRError, uuidString, veleroRestoreNACUUID) @@ -334,6 +284,23 @@ func (r *NonAdminRestoreReconciler) createVeleroRestore(ctx context.Context, log if veleroRestore == nil { logger.Info("VeleroRestore with label not found, creating one", uuidString, veleroRestoreNACUUID) + if nar.Status.Phase == nacv1alpha1.NonAdminPhaseCreated { + updatedCondition := meta.SetStatusCondition(&nar.Status.Conditions, + metav1.Condition{ + Type: "Restorable", + Status: metav1.ConditionFalse, + Reason: "RestoreDeleted", + Message: "Deleted Velero Restore object", + }, + ) + if updatedCondition { + if updateErr := r.Status().Update(ctx, nar); updateErr != nil { + logger.Error(updateErr, nonAdminRestoreStatusUpdateFailureMessage) + return false, updateErr + } + } + return false, reconcile.TerminalError(fmt.Errorf("VeleroRestore was deleted")) + } nab := &nacv1alpha1.NonAdminBackup{} err = r.Get(ctx, types.NamespacedName{Name: nar.Spec.RestoreSpec.BackupName, Namespace: nar.Namespace}, nab) if err != nil { diff --git a/internal/controller/nonadminrestore_controller_test.go b/internal/controller/nonadminrestore_controller_test.go index aaa3bcf6..20d9f5a9 100644 --- a/internal/controller/nonadminrestore_controller_test.go +++ b/internal/controller/nonadminrestore_controller_test.go @@ -30,9 +30,9 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/wait" "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/cache" nacv1alpha1 "github.com/migtools/oadp-non-admin/api/v1alpha1" "github.com/migtools/oadp-non-admin/internal/common/constant" @@ -47,6 +47,7 @@ type nonAdminRestoreFullReconcileScenario struct { spec nacv1alpha1.NonAdminRestoreSpec status nacv1alpha1.NonAdminRestoreStatus backupStatus nacv1alpha1.NonAdminBackupStatus + errorLogs int } func buildTestNonAdminRestore(nonAdminNamespace string, nonAdminName string, spec nacv1alpha1.NonAdminRestoreSpec) *nacv1alpha1.NonAdminRestore { @@ -168,8 +169,12 @@ var _ = ginkgo.Describe("Test full reconcile loop of NonAdminRestore Controller" gomega.Expect(deleteTestNamespaces(ctx, nonAdminRestoreNamespace, oadpNamespace)).To(gomega.Succeed()) cancel() - // wait cancel - time.Sleep(1 * time.Second) + // wait manager shutdown + gomega.Eventually(func() (bool, error) { + logOutput := ginkgo.CurrentSpecReport().CapturedGinkgoWriterOutput + shutdownlog := "INFO Wait completed, proceeding to shutdown the manager" + return strings.Contains(logOutput, shutdownlog) && strings.Count(logOutput, shutdownlog) == 1, nil + }, 5*time.Second, 1*time.Second).Should(gomega.BeTrue()) }) ginkgo.DescribeTable("Reconcile triggered by NonAdminRestore Create event", @@ -185,6 +190,12 @@ var _ = ginkgo.Describe("Test full reconcile loop of NonAdminRestore Controller" k8sManager, err := ctrl.NewManager(cfg, ctrl.Options{ Scheme: k8sClient.Scheme(), + Cache: cache.Options{ + DefaultNamespaces: map[string]cache.Config{ + nonAdminRestoreNamespace: {}, + oadpNamespace: {}, + }, + }, }) gomega.Expect(err).ToNot(gomega.HaveOccurred()) @@ -206,53 +217,44 @@ var _ = ginkgo.Describe("Test full reconcile loop of NonAdminRestore Controller" gomega.Expect(err).ToNot(gomega.HaveOccurred(), "failed to run manager") }() // wait manager start - managerStartTimeout := 10 * time.Second - pollInterval := 100 * time.Millisecond - ctxTimeout, cancel := context.WithTimeout(ctx, managerStartTimeout) - defer cancel() - - err = wait.PollUntilContextTimeout(ctxTimeout, pollInterval, managerStartTimeout, true, func(ctx context.Context) (done bool, err error) { - select { - case <-ctx.Done(): - return false, ctx.Err() - default: - // Check if the manager has started by verifying if the client is initialized - return k8sManager.GetClient() != nil, nil - } - }) - // Check if the context timeout or another error occurred - gomega.Expect(err).ToNot(gomega.HaveOccurred(), "Manager failed to start within the timeout period") + gomega.Eventually(func() (bool, error) { + logOutput := ginkgo.CurrentSpecReport().CapturedGinkgoWriterOutput + startUplog := `INFO Starting workers {"controller": "nonadminrestore", "controllerGroup": "oadp.openshift.io", "controllerKind": "NonAdminRestore", "worker count": 1}` + return strings.Contains(logOutput, startUplog) && + strings.Count(logOutput, startUplog) == 1 && + !strings.Contains(logOutput, "DEBUG Accepted Create event"), nil + }, 5*time.Second, 1*time.Second).Should(gomega.BeTrue()) ginkgo.By("Waiting Reconcile of create event") nonAdminRestore := buildTestNonAdminRestore(nonAdminRestoreNamespace, nonAdminRestoreName, scenario.spec) - gomega.Expect(k8sClient.Create(ctxTimeout, nonAdminRestore)).To(gomega.Succeed()) + gomega.Expect(k8sClient.Create(ctx, nonAdminRestore)).To(gomega.Succeed()) // wait NonAdminRestore reconcile - time.Sleep(2 * time.Second) - - ginkgo.By("Fetching NonAdminRestore after Reconcile") - gomega.Expect(k8sClient.Get( - ctxTimeout, - types.NamespacedName{ - Name: nonAdminRestoreName, - Namespace: nonAdminRestoreNamespace, - }, - nonAdminRestore, - )).To(gomega.Succeed()) - - ginkgo.By("Validating NonAdminRestore Status") + gomega.Eventually(func() (bool, error) { + err := k8sClient.Get( + ctx, + types.NamespacedName{ + Name: nonAdminRestoreName, + Namespace: nonAdminRestoreNamespace, + }, + nonAdminRestore, + ) + if err != nil { + return false, err + } + err = checkTestNonAdminRestoreStatus(nonAdminRestore, scenario.status) + return err == nil, err + }, 5*time.Second, 1*time.Second).Should(gomega.BeTrue()) - gomega.Expect(checkTestNonAdminRestoreStatus(nonAdminRestore, scenario.status)).To(gomega.Succeed()) + ginkgo.By("Checking if NonAdminRestore Spec was not changed") + gomega.Expect(reflect.DeepEqual( + nonAdminRestore.Spec, + scenario.spec, + )).To(gomega.BeTrue()) veleroRestore := &velerov1.Restore{} if scenario.status.VeleroRestore != nil && len(nonAdminRestore.Status.VeleroRestore.NACUUID) > 0 { - ginkgo.By("Checking if NonAdminRestore Spec was not changed") - gomega.Expect(reflect.DeepEqual( - nonAdminRestore.Spec, - scenario.spec, - )).To(gomega.BeTrue()) - gomega.Expect(k8sClient.Get( - ctxTimeout, + ctx, types.NamespacedName{ Name: nonAdminRestore.Status.VeleroRestore.Name, Namespace: oadpNamespace, @@ -268,19 +270,16 @@ var _ = ginkgo.Describe("Test full reconcile loop of NonAdminRestore Controller" } ginkgo.By("Simulating Velero Restore update to finished state") - veleroRestore.Status = velerov1.RestoreStatus{ Phase: velerov1.RestorePhaseCompleted, } // can not call .Status().Update() for veleroRestore object https://github.com/vmware-tanzu/velero/issues/8285 - gomega.Expect(k8sClient.Update(ctxTimeout, veleroRestore)).To(gomega.Succeed()) - - ginkgo.By("Velero Restore updated") + gomega.Expect(k8sClient.Update(ctx, veleroRestore)).To(gomega.Succeed()) // wait NonAdminRestore reconcile gomega.Eventually(func() (bool, error) { err := k8sClient.Get( - ctxTimeout, + ctx, types.NamespacedName{ Name: nonAdminRestoreName, Namespace: nonAdminRestoreNamespace, @@ -298,25 +297,11 @@ var _ = ginkgo.Describe("Test full reconcile loop of NonAdminRestore Controller" } ginkgo.By("Waiting NonAdminRestore deletion") - gomega.Expect(k8sClient.Delete(ctxTimeout, nonAdminRestore)).To(gomega.Succeed()) - gomega.Eventually(func() (bool, error) { - err := k8sClient.Get( - ctxTimeout, - types.NamespacedName{ - Name: nonAdminRestoreName, - Namespace: nonAdminRestoreNamespace, - }, - nonAdminRestore, - ) - if apierrors.IsNotFound(err) { - return true, nil - } - return false, err - }, 10*time.Second, 1*time.Second).Should(gomega.BeTrue()) + gomega.Expect(k8sClient.Delete(ctx, nonAdminRestore)).To(gomega.Succeed()) if scenario.status.VeleroRestore != nil && len(nonAdminRestore.Status.VeleroRestore.NACUUID) > 0 { gomega.Eventually(func() (bool, error) { err := k8sClient.Get( - ctxTimeout, + ctx, types.NamespacedName{ Name: nonAdminRestore.Status.VeleroRestore.Name, Namespace: oadpNamespace, @@ -329,6 +314,26 @@ var _ = ginkgo.Describe("Test full reconcile loop of NonAdminRestore Controller" return false, err }, 10*time.Second, 1*time.Second).Should(gomega.BeTrue()) } + gomega.Eventually(func() (bool, error) { + err := k8sClient.Get( + ctx, + types.NamespacedName{ + Name: nonAdminRestoreName, + Namespace: nonAdminRestoreNamespace, + }, + nonAdminRestore, + ) + if apierrors.IsNotFound(err) { + return true, nil + } + return false, err + }, 10*time.Second, 1*time.Second).Should(gomega.BeTrue()) + gomega.Eventually(func() (bool, error) { + logOutput := ginkgo.CurrentSpecReport().CapturedGinkgoWriterOutput + deletelog := `DEBUG Accepted Delete event {"NonAdminRestorePredicate"` + return strings.Contains(logOutput, deletelog) && strings.Count(logOutput, deletelog) == 1, nil + }, 5*time.Second, 1*time.Second).Should(gomega.BeTrue()) + gomega.Expect(strings.Count(ginkgo.CurrentSpecReport().CapturedGinkgoWriterOutput, "ERROR")).Should(gomega.Equal(scenario.errorLogs)) }, ginkgo.Entry("Should update NonAdminRestore until Velero Restore completes and then delete it", nonAdminRestoreFullReconcileScenario{ spec: nacv1alpha1.NonAdminRestoreSpec{ @@ -417,6 +422,7 @@ var _ = ginkgo.Describe("Test full reconcile loop of NonAdminRestore Controller" }, }, }, + errorLogs: 1, }), ) })