-
Notifications
You must be signed in to change notification settings - Fork 2
192 lines (161 loc) · 6.32 KB
/
update-data.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
---
name: update-data
on:
push:
branches:
- "dev"
- "main"
workflow_dispatch:
# Temporarily disable scheduled on runs because geocoding without a cache everytime is expensive
# schedule:
# - cron: 5 7 * * 1-5
jobs:
archive:
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
# This will checkout the main branch on "schedule" and the specified branch on "workflow_dispatch"
# The archiver should probably be pulled out into its own repo so archive code and data don't diverge
- name: Who owns the workspace?
run: ls -ld $GITHUB_WORKSPACE
- uses: "google-github-actions/auth@v2"
with:
credentials_json: "${{ secrets.DGM_GITHUB_ACTION_CREDENTIALS }}"
- name: Display env variables
run: |
echo "Workspace directory: $GITHUB_WORKSPACE" \
echo "Google credentials path: $GOOGLE_GHA_CREDS_PATH" \
# Give the dbcp user ownership of the workspace
# So it can read and write files to the workspace
- name: Give the dbcp user ownership of the workspace
run: sudo chown -R 1000:1000 $GITHUB_WORKSPACE
- name: Set up Docker Compose
run: |
sudo apt-get update
sudo apt-get install -y docker-compose
- name: Build and run Docker Compose services
run: |
docker-compose up -d
- name: Run the archive
env:
AIRTABLE_API_KEY: ${{ secrets.AIRTABLE_API_KEY }}
run: |
make archive_all
# The google-github-actions/auth step is run as runner:docker,
# so we need to give the workspace back to runner:docker
- name: Give ownership of the workspace back to root
if: always()
run: sudo chown -R runner:docker $GITHUB_WORKSPACE
- name: Who owns the workspace?
if: always()
run: ls -ld $GITHUB_WORKSPACE
matrix_prep:
needs: archive # Ensure archive job finishes first
# Only run if the archive job is successful or is skipped
# I had to add always() because the matrix_pre job wouldn't run if the archive job was skipped
# I think this happens because archive is skipped on push, but matrix_prep is not
if: ${{ always() && (needs.archive.result == 'success' || needs.archive.result == 'skipped') }}
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- name: Set branch dynamically
id: set-matrix
run: |
if [ "${{ github.event_name }}" == "push" ]; then
echo "matrix={\"include\":[{\"branch\":\"${{ github.ref_name }}\"}]}" >> $GITHUB_OUTPUT
else
echo "matrix={\"include\":[{\"branch\":\"main\"},{\"branch\":\"dev\"}]}" >> $GITHUB_OUTPUT
fi
- name: echo matrix
run: echo ${{ steps.set-matrix.outputs.matrix }}
etl:
needs: matrix_prep # Ensure archive job finishes first
runs-on: ubuntu-latest
if: ${{ always() && needs.matrix_prep.result == 'success' }}
strategy:
matrix: ${{ fromJSON(needs.matrix_prep.outputs.matrix) }}
env:
API_KEY_GOOGLE_MAPS: ${{ secrets.API_KEY_GOOGLE_MAPS }}
GITHUB_REF: ${{ github.ref_name }} # This is changed to dev if running on a schedule
steps:
- name: print matrix
run: echo ${{ matrix.branch }}
- name: Checkout Repository
id: checkout
uses: actions/checkout@v4
with:
ref: ${{ matrix.branch }}
- name: Who owns the workspace?
run: ls -ld $GITHUB_WORKSPACE
- uses: "google-github-actions/auth@v2"
with:
credentials_json: "${{ secrets.DGM_GITHUB_ACTION_CREDENTIALS }}"
- name: Display env variables
run: |
echo "Workspace directory: $GITHUB_WORKSPACE" \
echo "Google credentials path: $GOOGLE_GHA_CREDS_PATH" \
# Give the dbcp user ownership of the workspace
# So it can read and write files to the workspace
- name: Give the dbcp user ownership of the workspace
run: sudo chown -R 1000:1000 $GITHUB_WORKSPACE
- name: Set up Docker Compose
run: |
sudo apt-get update
sudo apt-get install -y docker-compose
- name: Build and run Docker Compose services
run: |
docker-compose up -d
- name: Run full ETL
if: ${{ success() }}
run: |
make all
- name: Run all test
if: ${{ success() }}
run: |
make test
- name: Write settings.yaml
id: write_settings
if: ${{ success() }}
run: |
make save_settings
# give permission to root
- name: Give ownership of the workspace back to root
run: sudo chown -R runner:docker $GITHUB_WORKSPACE
# commit the settings file
- uses: EndBug/add-and-commit@v9
id: commit_settings_file
if: steps.write_settings.outcome == 'success'
with:
add: "src/dbcp/settings.yaml"
author_name: "DBCP Bot"
author_email: "[email protected]"
message: "Update settings.yaml"
push: true
# Give the dbcp user ownership of the workspace
# So it can read and write files to the workspace
- name: Give the dbcp user ownership of the workspace
run: sudo chown -R 1000:1000 $GITHUB_WORKSPACE
# publish the outputs, grab the git sha of the commit step
- name: Publish publish outputs
run: |
docker compose run --rm app python dbcp/cli.py publish-outputs \
-bq \
--build-ref ${{ matrix.branch }} \
--code-git-sha ${{ steps.checkout.outputs.commit }} \
--settings-file-git-sha ${{ steps.commit_settings_file.outputs.commit_long_sha }} \
--github-action-run-id ${{ github.run_id}}
- name: Stop Docker Compose services
if: always()
run: |
docker-compose down
# The google-github-actions/auth step is run as runner:docker,
# so we need to give the workspace back to runner:docker
- name: Give ownership of the workspace back to root
if: always()
run: sudo chown -R runner:docker $GITHUB_WORKSPACE
- name: Who owns the workspace?
if: always()
run: ls -ld $GITHUB_WORKSPACE