From 0e876d91a8503345df2f5b4dc533f9d8ffc9c0c3 Mon Sep 17 00:00:00 2001 From: Raj Date: Fri, 31 Jan 2025 09:20:40 +0100 Subject: [PATCH] test: Watcher Zero Downtime E2E Test --- .../deploy-lifecycle-manager-e2e/action.yml | 3 +- .../action.yml | 3 +- .../test-e2e-with-modulereleasemeta.yml | 1 + pkg/testutils/kyma.go | 4 + pkg/testutils/utils.go | 8 ++ scripts/tests/create_test_clusters.sh | 1 + tests/e2e/Makefile | 3 + tests/e2e/commontestutils/metrics.go | 45 ++++++-- ...nance_windows_initial_installation_test.go | 3 +- tests/e2e/maintenance_windows_skip_test.go | 3 +- tests/e2e/maintenance_windows_wait_test.go | 3 +- tests/e2e/module_deletion_test.go | 2 +- .../e2e/module_upgrade_channel_switch_test.go | 2 +- tests/e2e/watcher_test.go | 2 +- tests/e2e/watcher_zero_downtime_test.go | 100 ++++++++++++++++++ .../kyma/kyma_module_channel_test.go | 2 - 16 files changed, 161 insertions(+), 24 deletions(-) create mode 100644 tests/e2e/watcher_zero_downtime_test.go diff --git a/.github/actions/deploy-lifecycle-manager-e2e/action.yml b/.github/actions/deploy-lifecycle-manager-e2e/action.yml index ae6aaaf286..e660935cc3 100644 --- a/.github/actions/deploy-lifecycle-manager-e2e/action.yml +++ b/.github/actions/deploy-lifecycle-manager-e2e/action.yml @@ -82,7 +82,8 @@ runs: popd - name: Patch CA certificate renewBefore if: ${{matrix.e2e-test == 'ca-certificate-rotation' || - matrix.e2e-test == 'istio-gateway-secret-rotation'}} + matrix.e2e-test == 'istio-gateway-secret-rotation' || + matrix.e2e-test == 'watcher-zero-downtime'}} working-directory: lifecycle-manager shell: bash run: | diff --git a/.github/actions/deploy-template-operator-with-modulereleasemeta/action.yml b/.github/actions/deploy-template-operator-with-modulereleasemeta/action.yml index 4afbbd5738..ebd77d34e5 100644 --- a/.github/actions/deploy-template-operator-with-modulereleasemeta/action.yml +++ b/.github/actions/deploy-template-operator-with-modulereleasemeta/action.yml @@ -89,7 +89,8 @@ runs: matrix.e2e-test == 'modulereleasemeta-not-allowed-installation' || matrix.e2e-test == 'maintenance-windows' || matrix.e2e-test == 'maintenance-windows-initial-installation' || - matrix.e2e-test == 'maintenance-windows-skip' + matrix.e2e-test == 'maintenance-windows-skip' || + matrix.e2e-test == 'watcher-zero-downtime' }} shell: bash run: | diff --git a/.github/workflows/test-e2e-with-modulereleasemeta.yml b/.github/workflows/test-e2e-with-modulereleasemeta.yml index 89b49ef491..1622048550 100644 --- a/.github/workflows/test-e2e-with-modulereleasemeta.yml +++ b/.github/workflows/test-e2e-with-modulereleasemeta.yml @@ -70,6 +70,7 @@ jobs: - maintenance-windows - maintenance-windows-initial-installation - maintenance-windows-skip + - watcher-zero-downtime runs-on: ubuntu-latest timeout-minutes: 20 diff --git a/pkg/testutils/kyma.go b/pkg/testutils/kyma.go index d430ff615e..53c169bb52 100644 --- a/pkg/testutils/kyma.go +++ b/pkg/testutils/kyma.go @@ -28,6 +28,10 @@ var ( ErrModuleMessageInStatusIsIncorrect = errors.New("status.modules.message is incorrect") ) +const ( + FastChannel = "fast" +) + func NewTestKyma(name string) *v1beta2.Kyma { return NewKymaWithSyncLabel(name, ControlPlaneNamespace, v1beta2.DefaultChannel) } diff --git a/pkg/testutils/utils.go b/pkg/testutils/utils.go index c260bc0227..91dca36018 100644 --- a/pkg/testutils/utils.go +++ b/pkg/testutils/utils.go @@ -7,6 +7,7 @@ import ( "fmt" "io" "os" + "os/exec" "path/filepath" "time" @@ -180,3 +181,10 @@ func parseResourcesFromYAML(yamlFilePath string, clnt client.Client) ([]*unstruc } return resources, nil } + +func PatchServiceToTypeLoadBalancer(kubeconfigPath string, serviceName, namespace string) error { + kubeCtl := exec.Command("kubectl", "patch", "service", serviceName, "-n", namespace, + "-p", `{"spec": {"type": "LoadBalancer"}}`) + kubeCtl.Env = append(os.Environ(), "KUBECONFIG="+kubeconfigPath) + return kubeCtl.Run() +} diff --git a/scripts/tests/create_test_clusters.sh b/scripts/tests/create_test_clusters.sh index 68a3145316..35a24184c1 100755 --- a/scripts/tests/create_test_clusters.sh +++ b/scripts/tests/create_test_clusters.sh @@ -54,6 +54,7 @@ else k3d cluster create skr \ -p 10080:80@loadbalancer \ -p 10443:443@loadbalancer \ + -p 2112:2112@loadbalancer \ --k3s-arg --tls-san="skr.cluster.local@server:*" \ --image rancher/k3s:v${K8S_VERSION}-k3s1 \ --k3s-arg --disable="traefik@server:*" \ diff --git a/tests/e2e/Makefile b/tests/e2e/Makefile index bda03c58d1..1a85bc46dc 100644 --- a/tests/e2e/Makefile +++ b/tests/e2e/Makefile @@ -180,3 +180,6 @@ maintenance-windows-initial-installation: maintenance-windows-skip: go test -timeout 20m -ginkgo.v -ginkgo.focus "Maintenance Windows - No Wait for Maintenance Widnow on Skip" + +watcher-zero-downtime: + go test -timeout 20m -ginkgo.v -ginkgo.focus "Watcher Zero Downtime" diff --git a/tests/e2e/commontestutils/metrics.go b/tests/e2e/commontestutils/metrics.go index bab02dd0d9..886855dfe6 100644 --- a/tests/e2e/commontestutils/metrics.go +++ b/tests/e2e/commontestutils/metrics.go @@ -13,10 +13,15 @@ import ( "github.com/kyma-project/lifecycle-manager/internal/pkg/metrics" ) +const ( + kcpMetricsPort = 9081 + skrMetricsPort = 2112 +) + var ErrMetricNotFound = errors.New("metric was not found") func GetKymaStateMetricCount(ctx context.Context, kymaName string, state shared.State) (int, error) { - bodyString, err := getMetricsBody(ctx) + bodyString, err := getKCPMetricsBody(ctx) if err != nil { return 0, err } @@ -32,7 +37,7 @@ func getKymaStateMetricRegex(kymaName string, state shared.State) *regexp.Regexp } func AssertKymaStateMetricNotFound(ctx context.Context, kymaName string, state shared.State) error { - bodyString, err := getMetricsBody(ctx) + bodyString, err := getKCPMetricsBody(ctx) if err != nil { return err } @@ -49,7 +54,7 @@ func AssertKymaStateMetricNotFound(ctx context.Context, kymaName string, state s func GetRequeueReasonCount(ctx context.Context, requeueReason, requeueType string, ) (int, error) { - bodyString, err := getMetricsBody(ctx) + bodyString, err := getKCPMetricsBody(ctx) if err != nil { return 0, err } @@ -63,7 +68,7 @@ func GetRequeueReasonCount(ctx context.Context, func IsManifestRequeueReasonCountIncreased(ctx context.Context, requeueReason, requeueType string) (bool, error, ) { - bodyString, err := getMetricsBody(ctx) + bodyString, err := getKCPMetricsBody(ctx) if err != nil { return false, err } @@ -76,7 +81,7 @@ func IsManifestRequeueReasonCountIncreased(ctx context.Context, requeueReason, r } func GetModuleStateMetricCount(ctx context.Context, kymaName, moduleName string, state shared.State) (int, error) { - bodyString, err := getMetricsBody(ctx) + bodyString, err := getKCPMetricsBody(ctx) if err != nil { return 0, err } @@ -93,7 +98,7 @@ func PurgeMetricsAreAsExpected(ctx context.Context, ) bool { correctCount := false correctTime := false - bodyString, err := getMetricsBody(ctx) + bodyString, err := getKCPMetricsBody(ctx) if err != nil { return false } @@ -121,7 +126,7 @@ func PurgeMetricsAreAsExpected(ctx context.Context, } func GetSelfSignedCertNotRenewMetricsGauge(ctx context.Context, kymaName string) (int, error) { - bodyString, err := getMetricsBody(ctx) + bodyString, err := getKCPMetricsBody(ctx) if err != nil { return 0, err } @@ -133,7 +138,7 @@ func GetSelfSignedCertNotRenewMetricsGauge(ctx context.Context, kymaName string) } func GetMandatoryModuleTemplateCountMetric(ctx context.Context) (int, error) { - bodyString, err := getMetricsBody(ctx) + bodyString, err := getKCPMetricsBody(ctx) if err != nil { return 0, err } @@ -143,7 +148,7 @@ func GetMandatoryModuleTemplateCountMetric(ctx context.Context) (int, error) { } func GetMandatoryModuleStateMetric(ctx context.Context, kymaName, moduleName, state string) (int, error) { - bodyString, err := getMetricsBody(ctx) + bodyString, err := getKCPMetricsBody(ctx) if err != nil { return 0, err } @@ -153,9 +158,27 @@ func GetMandatoryModuleStateMetric(ctx context.Context, kymaName, moduleName, st return parseCount(re, bodyString) } -func getMetricsBody(ctx context.Context) (string, error) { +func GetWatcherFailedKcpTotalMetric(ctx context.Context) (int, error) { + metricsBody, err := getSKRMetricsBody(ctx) + if err != nil { + return 0, err + } + regex := regexp.MustCompile(`watcher_failed_kcp_total{error_reason="failed-request"} (\d+)`) + return parseCount(regex, metricsBody) +} + +func getKCPMetricsBody(ctx context.Context) (string, error) { + return getMetricsBody(ctx, kcpMetricsPort) +} + +func getSKRMetricsBody(ctx context.Context) (string, error) { + return getMetricsBody(ctx, skrMetricsPort) +} + +func getMetricsBody(ctx context.Context, port int) (string, error) { clnt := &http.Client{} - request, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://localhost:9081/metrics", nil) + url := fmt.Sprintf("http://localhost:%d/metrics", port) + request, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) if err != nil { return "", fmt.Errorf("request to metrics endpoint :%w", err) } diff --git a/tests/e2e/maintenance_windows_initial_installation_test.go b/tests/e2e/maintenance_windows_initial_installation_test.go index 60d78721db..4ddeef495d 100644 --- a/tests/e2e/maintenance_windows_initial_installation_test.go +++ b/tests/e2e/maintenance_windows_initial_installation_test.go @@ -17,7 +17,6 @@ Maintenance Windows are defined as such: */ var _ = Describe("Maintenance Windows - No Wait for Maintenance Window on Initial Installation", Ordered, func() { - const fastChannel = "fast" const europe = "europe" kyma := NewKymaWithSyncLabel("kyma-sample", ControlPlaneNamespace, v1beta2.DefaultChannel) @@ -32,7 +31,7 @@ var _ = Describe("Maintenance Windows - No Wait for Maintenance Window on Initia Context("Given SKR Cluster; Kyma CR .spec.skipMaintenanceWindows=false; NO active maintenance window", func() { It("When module in fast channel is enabled (requiresDowntime=true)", func() { - module.Channel = fastChannel + module.Channel = FastChannel Eventually(EnableModule). WithContext(ctx). WithArguments(skrClient, defaultRemoteKymaName, RemoteNamespace, module). diff --git a/tests/e2e/maintenance_windows_skip_test.go b/tests/e2e/maintenance_windows_skip_test.go index f776e351aa..a3bd8007b6 100644 --- a/tests/e2e/maintenance_windows_skip_test.go +++ b/tests/e2e/maintenance_windows_skip_test.go @@ -17,7 +17,6 @@ Maintenance Windows are defined as such: */ var _ = Describe("Maintenance Windows - No Wait for Maintenance Widnow on Skip", Ordered, func() { - const fastChannel = "fast" const europe = "europe" kyma := NewKymaWithSyncLabel("kyma-sample", ControlPlaneNamespace, v1beta2.DefaultChannel) @@ -64,7 +63,7 @@ var _ = Describe("Maintenance Windows - No Wait for Maintenance Widnow on Skip", }) It("When module channel is changed to fast (requiresDowntime=true)", func() { - module.Channel = fastChannel + module.Channel = FastChannel Eventually(UpdateKymaModuleChannel). WithContext(ctx). WithArguments(skrClient, shared.DefaultRemoteKymaName, shared.DefaultRemoteNamespace, module.Channel). diff --git a/tests/e2e/maintenance_windows_wait_test.go b/tests/e2e/maintenance_windows_wait_test.go index 5c580bd320..0897ff135f 100644 --- a/tests/e2e/maintenance_windows_wait_test.go +++ b/tests/e2e/maintenance_windows_wait_test.go @@ -17,7 +17,6 @@ Maintenance Windows are defined as such: */ var _ = Describe("Maintenance Windows - Wait for Maintenance Window", Ordered, func() { - const fastChannel = "fast" const europe = "europe" const asia = "asia" @@ -65,7 +64,7 @@ var _ = Describe("Maintenance Windows - Wait for Maintenance Window", Ordered, f }) It("When module channel is changed to fast (requiresDowntime=true)", func() { - module.Channel = fastChannel + module.Channel = FastChannel Eventually(UpdateKymaModuleChannel). WithContext(ctx). WithArguments(skrClient, shared.DefaultRemoteKymaName, shared.DefaultRemoteNamespace, module.Channel). diff --git a/tests/e2e/module_deletion_test.go b/tests/e2e/module_deletion_test.go index 8387af78c6..8bbd524fe4 100644 --- a/tests/e2e/module_deletion_test.go +++ b/tests/e2e/module_deletion_test.go @@ -97,7 +97,7 @@ var _ = Describe("Non Blocking Kyma Module Deletion", Ordered, func() { }) It("When Kyma Module is re-enabled in different Module Distribution Channel", func() { - module.Channel = "fast" + module.Channel = FastChannel Eventually(EnableModule). WithContext(ctx). WithArguments(skrClient, defaultRemoteKymaName, RemoteNamespace, module). diff --git a/tests/e2e/module_upgrade_channel_switch_test.go b/tests/e2e/module_upgrade_channel_switch_test.go index eeaec78526..6c96db811a 100644 --- a/tests/e2e/module_upgrade_channel_switch_test.go +++ b/tests/e2e/module_upgrade_channel_switch_test.go @@ -53,7 +53,7 @@ var _ = Describe("Module Upgrade By Channel Switch", Ordered, func() { It("When upgrade version by switch Channel", func() { Eventually(UpdateKymaModuleChannel). WithContext(ctx). - WithArguments(skrClient, defaultRemoteKymaName, RemoteNamespace, "fast"). + WithArguments(skrClient, defaultRemoteKymaName, RemoteNamespace, FastChannel). Should(Succeed()) }) diff --git a/tests/e2e/watcher_test.go b/tests/e2e/watcher_test.go index c00cf7cf5a..35d15435e6 100644 --- a/tests/e2e/watcher_test.go +++ b/tests/e2e/watcher_test.go @@ -83,7 +83,7 @@ var _ = Describe("Enqueue Event from Watcher", Ordered, func() { timeNow := &apimetav1.Time{Time: time.Now()} It("When spec of SKR Kyma CR is changed", func() { GinkgoWriter.Println(fmt.Sprintf("Spec watching logs since %s: ", timeNow)) - switchedChannel := "fast" + switchedChannel := FastChannel Eventually(changeRemoteKymaChannel). WithContext(ctx). WithArguments(RemoteNamespace, switchedChannel, skrClient). diff --git a/tests/e2e/watcher_zero_downtime_test.go b/tests/e2e/watcher_zero_downtime_test.go new file mode 100644 index 0000000000..0adb2445d1 --- /dev/null +++ b/tests/e2e/watcher_zero_downtime_test.go @@ -0,0 +1,100 @@ +package e2e_test + +import ( + "context" + "errors" + "os" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/kyma-project/lifecycle-manager/api/shared" + "github.com/kyma-project/lifecycle-manager/api/v1beta2" + . "github.com/kyma-project/lifecycle-manager/pkg/testutils" + . "github.com/kyma-project/lifecycle-manager/tests/e2e/commontestutils" +) + +var _ = Describe("Watcher Zero Downtime", Ordered, func() { + kyma := NewKymaWithSyncLabel("kyma-sample", ControlPlaneNamespace, v1beta2.DefaultChannel) + module := NewTemplateOperator(v1beta2.DefaultChannel) + moduleCR := NewTestModuleCR(RemoteNamespace) + + InitEmptyKymaBeforeAll(kyma) + CleanupKymaAfterAll(kyma) + + Context("Given SKR Cluster", func() { + It("When Kyma Module is enabled on SKR Kyma CR", func() { + Eventually(EnableModule). + WithContext(ctx). + WithArguments(skrClient, defaultRemoteKymaName, RemoteNamespace, module). + Should(Succeed()) + }) + + It("Then Module Resources are deployed on SKR cluster", func() { + By("And Module CR exists") + Eventually(ModuleCRExists). + WithContext(ctx). + WithArguments(skrClient, moduleCR). + Should(Succeed()) + By("And Module Operator Deployment is ready") + Eventually(DeploymentIsReady). + WithContext(ctx). + WithArguments(skrClient, ModuleDeploymentNameInOlderVersion, TestModuleResourceNamespace). + Should(Succeed()) + + By("And KCP Kyma CR is in \"Ready\" State") + Eventually(KymaIsInState). + WithContext(ctx). + WithArguments(kyma.GetName(), kyma.GetNamespace(), kcpClient, shared.StateReady). + Should(Succeed()) + }) + + It("When SKR metrics service is exposed", func() { + Expect(PatchServiceToTypeLoadBalancer(os.Getenv(skrConfigEnvVar), + "skr-webhook-metrics", "kyma-system")). + To(Succeed()) + }) + + It("Then no downtime errors can be observed", func() { + // Eventually because exposed metrics are not immediately available + Eventually(triggerWatcherAndCheckDowntime). + WithContext(ctx). + WithArguments(skrClient, defaultRemoteKymaName, RemoteNamespace). + Should(Succeed()) + Consistently(triggerWatcherAndCheckDowntime). + WithContext(ctx). + WithArguments(skrClient, defaultRemoteKymaName, RemoteNamespace). + WithTimeout(4 * time.Minute). + Should(Succeed()) + }) + }) +}) + +func triggerWatcherAndCheckDowntime(ctx context.Context, skrClient client.Client, kymaName, kymaNamespace string) error { + // Triggering watcher request + kyma, err := GetKyma(ctx, skrClient, kymaName, kymaNamespace) + if err != nil { + return err + } + if kyma.Spec.Modules[0].Channel == v1beta2.DefaultChannel { + kyma.Spec.Modules[0].Channel = FastChannel + } else { + kyma.Spec.Modules[0].Channel = v1beta2.DefaultChannel + } + err = skrClient.Update(ctx, kyma) + if err != nil { + return err + } + + // Checking if failed KCP error metrics is not increasing + count, err := GetWatcherFailedKcpTotalMetric(ctx) + if err != nil { + return err + } + if count > 0 { + return errors.New("watcher is experiencing downtime") + } + return nil +} diff --git a/tests/integration/controller/kyma/kyma_module_channel_test.go b/tests/integration/controller/kyma/kyma_module_channel_test.go index 3208e96a7c..9e93a5a518 100644 --- a/tests/integration/controller/kyma/kyma_module_channel_test.go +++ b/tests/integration/controller/kyma/kyma_module_channel_test.go @@ -20,9 +20,7 @@ import ( ) const ( - FastChannel = "fast" ValidChannel = "valid" - InvalidNoneChannel = string(shared.NoneChannel) InValidChannel = "Invalid01" // lower case characters from a to z InValidMinLengthChannel = "ch" // minlength = 3 InValidMaxLengthChannel = "averylongchannelwhichlargerthanallowedmaxlength" // maxlength = 32