diff --git a/.github/actions/e2e/setup-cluster/action.yaml b/.github/actions/e2e/setup-cluster/action.yaml index c829662294f0..a3a6aac80809 100644 --- a/.github/actions/e2e/setup-cluster/action.yaml +++ b/.github/actions/e2e/setup-cluster/action.yaml @@ -30,7 +30,7 @@ inputs: default: "1.29" eksctl_version: description: "Version of eksctl to install" - default: v0.169.0 + default: v0.175.0 ip_family: description: "IP Family of the cluster. Valid values are IPv4 or IPv6" default: "IPv4" @@ -152,11 +152,9 @@ runs: minSize: 2 maxSize: 2 iam: + withAddonPolicies: + cloudWatch: true instanceRolePermissionsBoundary: "arn:aws:iam::$ACCOUNT_ID:policy/GithubActionsPermissionsBoundary" - taints: - - key: CriticalAddonsOnly - value: "true" - effect: NoSchedule cloudWatch: clusterLogging: enableTypes: ["*"] @@ -175,6 +173,8 @@ runs: $KARPENTER_IAM withOIDC: true addons: + - name: amazon-cloudwatch-observability + permissionsBoundary: "arn:aws:iam::$ACCOUNT_ID:policy/GithubActionsPermissionsBoundary" - name: vpc-cni permissionsBoundary: "arn:aws:iam::$ACCOUNT_ID:policy/GithubActionsPermissionsBoundary" - name: coredns @@ -211,6 +211,11 @@ runs: else eksctl ${cmd} cluster -f clusterconfig.yaml fi + + # Adding taints after all necessary pods have scheduled to the manged node group nodes + # amazon-cloudwatch-observability pods do no not tolerate CriticalAddonsOnly=true:NoSchedule and + # EKS addons does not allow to add tolerations to the addon pods as part of the advanced configuration + kubectl taint nodes CriticalAddonsOnly=true:NoSchedule --all - name: tag oidc provider of the cluster if: always() shell: bash diff --git a/.github/workflows/e2e-upgrade.yaml b/.github/workflows/e2e-upgrade.yaml index 0ad5a6e3f3e0..195bc4362694 100644 --- a/.github/workflows/e2e-upgrade.yaml +++ b/.github/workflows/e2e-upgrade.yaml @@ -90,7 +90,7 @@ jobs: region: ${{ inputs.region }} cluster_name: ${{ steps.generate-cluster-name.outputs.CLUSTER_NAME }} k8s_version: ${{ inputs.k8s_version }} - eksctl_version: v0.169.0 + eksctl_version: v0.175.0 ip_family: IPv4 # Set the value to IPv6 if IPv6 suite, else IPv4 git_ref: ${{ inputs.from_git_ref }} ecr_account_id: ${{ vars.SNAPSHOT_ACCOUNT_ID }} @@ -135,6 +135,15 @@ jobs: url: ${{ secrets.SLACK_WEBHOOK_URL }} suite: Upgrade git_ref: ${{ inputs.to_git_ref }} + - name: add log retention policy + if: ${{ inputs.workflow_trigger != 'private_cluster' }} + env: + CLUSTER_NAME: ${{ inputs.cluster_name }} + run: | + aws logs put-retention-policy --log-group-name /aws/containerinsights/"$CLUSTER_NAME"/application --retention-in-days 30 + aws logs put-retention-policy --log-group-name /aws/containerinsights/"$CLUSTER_NAME"/dataplane --retention-in-days 30 + aws logs put-retention-policy --log-group-name /aws/containerinsights/"$CLUSTER_NAME"/host --retention-in-days 30 + aws logs put-retention-policy --log-group-name /aws/containerinsights/"$CLUSTER_NAME"/performance --retention-in-days 30 - name: dump logs on failure uses: ./.github/actions/e2e/dump-logs if: failure() || cancelled() diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index dc120754be4a..920a789132c3 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -132,7 +132,7 @@ jobs: region: ${{ inputs.region }} cluster_name: ${{ steps.generate-cluster-name.outputs.CLUSTER_NAME }} k8s_version: ${{ inputs.k8s_version }} - eksctl_version: v0.169.0 + eksctl_version: v0.175.0 ip_family: ${{ contains(inputs.suite, 'IPv6') && 'IPv6' || 'IPv4' }} # Set the value to IPv6 if IPv6 suite, else IPv4 private_cluster: ${{ inputs.workflow_trigger == 'private_cluster' }} git_ref: ${{ inputs.git_ref }} @@ -187,6 +187,15 @@ jobs: suite: ${{ inputs.suite }} git_ref: ${{ inputs.git_ref }} workflow_trigger: ${{ inputs.workflow_trigger }} + - name: add log retention policy + if: ${{ inputs.workflow_trigger != 'private_cluster' }} + env: + CLUSTER_NAME: ${{ inputs.cluster_name }} + run: | + aws logs put-retention-policy --log-group-name /aws/containerinsights/"$CLUSTER_NAME"/application --retention-in-days 30 + aws logs put-retention-policy --log-group-name /aws/containerinsights/"$CLUSTER_NAME"/dataplane --retention-in-days 30 + aws logs put-retention-policy --log-group-name /aws/containerinsights/"$CLUSTER_NAME"/host --retention-in-days 30 + aws logs put-retention-policy --log-group-name /aws/containerinsights/"$CLUSTER_NAME"/performance --retention-in-days 30 - name: dump logs on failure uses: ./.github/actions/e2e/dump-logs if: (failure() || cancelled()) && inputs.workflow_trigger != 'private_cluster'