From 97d5ec38596990fcd0c00e3369d26f026cb7ebdf Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Wed, 28 Feb 2024 12:24:14 -0500 Subject: [PATCH] ci: don't remove cluster with unhealthy mng --- .github/workflows/e2e.yaml | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 95cc85246409..0bcd897c0651 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -172,9 +172,21 @@ jobs: role: ${{ vars.CI_ROLE_NAME }} region: ${{ inputs.region }} cluster_name: ${{ steps.generate-cluster-name.outputs.CLUSTER_NAME }} + # In the case of failure, check if the managed node group is unhealthy. If so, do not clean up cluster for further investigation. + # TODO: @jmdeal remove after investigation is complete + - name: detect unhealthy mng + id: detect-unhealthy-mng + shell: bash + if: failure() || cancelled() + run: | + if ! kubectl get nodes -l eks.amazonaws.com/nodegroup -oyaml | yq ".items[].status.conditions" | grep -q "KubeletNotReady"; then + echo UNHEALTHY="false" >> "$GITHUB_OUTPUT" + else + echo UNHEALTHY="true" >> "$GITHUB_OUTPUT" + fi - name: cleanup karpenter and cluster '${{ steps.generate-cluster-name.outputs.CLUSTER_NAME }}' resources uses: ./.github/actions/e2e/cleanup - if: always() && inputs.cleanup + if: always() && inputs.cleanup && (steps.detect-unhealthy-mng.conclusion == 'skipped' || steps.detect-unhealthy-mng.outputs.UNHEALTHY == 'false') with: account_id: ${{ vars.CI_ACCOUNT_ID }} role: ${{ vars.CI_ROLE_NAME }}