-
Notifications
You must be signed in to change notification settings - Fork 0
190 lines (171 loc) · 8.21 KB
/
aws_nightly_cleanup.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
---
name: AWS Scheduled Cleanup of test regions
on:
schedule:
# Single daily schedule at 5:00
- cron: 0 5 * * *
workflow_dispatch:
inputs:
region:
description: AWS Region to clean up
default: eu-west-2
cleanup_older_than:
description: Minimum age of the resources to cleanup
default: 12h
pull_request:
paths:
- .github/workflows/aws_nightly_cleanup.yml
- .github/workflows/scripts/aws_global_cleanup.sh
- .github/workflows/scripts/aws_regional_cleanup.sh
env:
AWS_PROFILE: infex
# renovate: datasource=github-tags depName=gruntwork-io/cloud-nuke
CLOUD_NUKE_VERSION: v0.38.1
# Limit workflow to a single execution per ref
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
aws-cleanup:
runs-on: ubuntu-latest
strategy:
fail-fast: false # don't propagate failing jobs
matrix:
# Define regions and types of cleanup based on day
# Please also update the README.md of the project
config:
- region: eu-west-2
day: All
cleanup_older_than: 12h
- region: eu-west-3
day: All
cleanup_older_than: 12h
- region: eu-north-1
day: Saturday
cleanup_older_than: 0h
- region: us-east-1
day: Saturday
cleanup_older_than: 0h
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
- name: Determine AWS_REGION and CLEANUP_OLDER_THAN
id: determine-values
run: |
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
AWS_REGION="${{ github.event.inputs.region }}"
CLEANUP_OLDER_THAN="${{ github.event.inputs.cleanup_older_than }}"
else
AWS_REGION="${{ matrix.config.region }}"
CLEANUP_OLDER_THAN="${{ matrix.config.cleanup_older_than }}"
fi
echo "AWS_REGION=$AWS_REGION" | tee -a "$GITHUB_ENV"
echo "CLEANUP_OLDER_THAN=$CLEANUP_OLDER_THAN" | tee -a "$GITHUB_ENV"
- name: Check if job should run based on the day
id: day-check
run: |
# Initialize a variable to determine if the job should continue
should_run=true
# If it's a workflow_dispatch, only allow the first region (eu-west-2) of the matrix to run
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
if [[ "${{ matrix.config.region }}" != "eu-west-2" ]]; then
echo "Skipping job for this region because only the first region is allowed to run for workflow_dispatch."
should_run=false
else
echo "Running job for the first region only since it's a workflow_dispatch event."
fi
else
# Otherwise, check if the day matches the one in the matrix
current_day=$(date +%A)
if [[ "${{ matrix.config.day }}" != "All" && "${{ matrix.config.day }}" != "$current_day" ]]; then
echo "Skipping job for region ${{ env.AWS_REGION }} as it’s intended for ${{ matrix.config.day }} only."
should_run=false
else
echo "Running job for region ${{ env.AWS_REGION }} on the correct day: $current_day."
fi
fi
echo "should_run=$should_run" | tee -a "$GITHUB_ENV"
- name: Import Secrets
id: secrets
if: ${{ env.should_run == 'true' }}
uses: hashicorp/vault-action@d1720f055e0635fd932a1d2a48f87a666a57906c # v3
with:
url: ${{ secrets.VAULT_ADDR }}
method: approle
roleId: ${{ secrets.VAULT_ROLE_ID }}
secretId: ${{ secrets.VAULT_SECRET_ID }}
exportEnv: false
secrets: |
secret/data/products/infrastructure-experience/ci/common AWS_ACCESS_KEY;
secret/data/products/infrastructure-experience/ci/common AWS_SECRET_KEY;
# Official action does not support profiles
- name: Add profile credentials to ~/.aws/credentials
if: ${{ env.should_run == 'true' }}
run: |
aws configure set aws_access_key_id ${{ steps.secrets.outputs.AWS_ACCESS_KEY }} --profile ${{ env.AWS_PROFILE }}
aws configure set aws_secret_access_key ${{ steps.secrets.outputs.AWS_SECRET_KEY }} --profile ${{ env.AWS_PROFILE }}
aws configure set region ${{ env.AWS_REGION }} --profile ${{ env.AWS_PROFILE }}
- name: Install Cloud Nuke
if: ${{ env.should_run == 'true' }}
run: |
curl -LO \
--retry 5 \
--max-time 15 \
--retry-delay 30 \
https://github.com/gruntwork-io/cloud-nuke/releases/download/${{ env.CLOUD_NUKE_VERSION }}/cloud-nuke_linux_amd64
chmod +x cloud-nuke_linux_amd64
- name: Delete additional regional AWS resources
timeout-minutes: 15
if: ${{ env.should_run == 'true' }}
run: .github/workflows/scripts/aws_regional_cleanup.sh "${{ env.AWS_REGION }}"
- name: Delete additional global AWS resources
# Only run in a single time per week
if: ${{ env.should_run == 'true' && env.AWS_REGION == 'eu-north-1' }}
timeout-minutes: 15
run: .github/workflows/scripts/aws_global_cleanup.sh
# This is likely to fail, therefore we ignore the error
# We're ignoring ec2_dhcp_option as they couldn't be deleted
# cloudtrail is managed by IT and can't be deleted either
- name: Run Cloud Nuke
timeout-minutes: 90
env:
DISABLE_TELEMETRY: 'true'
if: ${{ env.should_run == 'true' }}
run: |
./cloud-nuke_linux_amd64 aws \
--region ${{ env.AWS_REGION }} \
--force \
--older-than ${{ env.CLEANUP_OLDER_THAN }} \
--exclude-resource-type ec2_dhcp_option \
--exclude-resource-type ec2-keypairs \
--exclude-resource-type s3 \
--exclude-resource-type cloudtrail || true
# The second run should remove the remaining resources (VPCs) and fail if there's anything left
- name: Run Cloud Nuke
timeout-minutes: 90
env:
DISABLE_TELEMETRY: 'true'
if: ${{ env.should_run == 'true' }}
run: |
./cloud-nuke_linux_amd64 aws \
--region ${{ env.AWS_REGION }} \
--force \
--older-than ${{ env.CLEANUP_OLDER_THAN }} \
--exclude-resource-type ec2_dhcp_option \
--exclude-resource-type cloudtrail \
--exclude-resource-type ec2-keypairs \
--exclude-resource-type s3
notify-on-failure:
runs-on: ubuntu-latest
if: failure()
needs:
- aws-cleanup
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
- name: Notify in Slack in case of failure
id: slack-notification
if: github.event_name == 'schedule'
uses: ./.github/actions/report-failure-on-slack
with:
vault_addr: ${{ secrets.VAULT_ADDR }}
vault_role_id: ${{ secrets.VAULT_ROLE_ID }}
vault_secret_id: ${{ secrets.VAULT_SECRET_ID }}