Skip to content

update-data

update-data #42

Workflow file for this run

---
name: update-data
on:
push:
branches:
- "dev"
- "main"
workflow_dispatch:
# Temporarily disable scheduled on runs because geocoding without a cache everytime is expensive
# schedule:
# - cron: 5 7 * * 1-5
jobs:
archive:
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
# This will checkout the main branch on "schedule" and the specified branch on "workflow_dispatch"
# The archiver should probably be pulled out into its own repo so archive code and data don't diverge
- name: Who owns the workspace?
run: ls -ld $GITHUB_WORKSPACE
- uses: "google-github-actions/auth@v2"
with:
credentials_json: "${{ secrets.DGM_GITHUB_ACTION_CREDENTIALS }}"
- name: Display env variables
run: |
echo "Workspace directory: $GITHUB_WORKSPACE" \
echo "Google credentials path: $GOOGLE_GHA_CREDS_PATH" \
# Give the dbcp user ownership of the workspace
# So it can read and write files to the workspace
- name: Give the dbcp user ownership of the workspace
run: sudo chown -R 1000:1000 $GITHUB_WORKSPACE
- name: Set up Docker Compose
run: |
sudo apt-get update
sudo apt-get install -y docker-compose
- name: Build and run Docker Compose services
run: |
docker-compose up -d
- name: Run the archive
env:
AIRTABLE_API_KEY: ${{ secrets.AIRTABLE_API_KEY }}
run: |
make archive_all
# The google-github-actions/auth step is run as runner:docker,
# so we need to give the workspace back to runner:docker
- name: Give ownership of the workspace back to root
if: always()
run: sudo chown -R runner:docker $GITHUB_WORKSPACE
- name: Who owns the workspace?
if: always()
run: ls -ld $GITHUB_WORKSPACE
matrix_prep:
needs: archive # Ensure archive job finishes first
# Only run if the archive job is successful or is skipped
# I had to add always() because the matrix_pre job wouldn't run if the archive job was skipped
# I think this happens because archive is skipped on push, but matrix_prep is not
if: ${{ always() && (needs.archive.result == 'success' || needs.archive.result == 'skipped') }}
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- name: Set branch dynamically
id: set-matrix
run: |
if [ "${{ github.event_name }}" == "push" ]; then
echo "matrix={\"include\":[{\"branch\":\"${{ github.ref_name }}\"}]}" >> $GITHUB_OUTPUT
else
echo "matrix={\"include\":[{\"branch\":\"main\"},{\"branch\":\"dev\"}]}" >> $GITHUB_OUTPUT
fi
- name: echo matrix
run: echo ${{ steps.set-matrix.outputs.matrix }}
etl:
needs: matrix_prep # Ensure archive job finishes first
runs-on: ubuntu-latest
if: ${{ always() && needs.matrix_prep.result == 'success' }}
strategy:
matrix: ${{ fromJSON(needs.matrix_prep.outputs.matrix) }}
env:
API_KEY_GOOGLE_MAPS: ${{ secrets.API_KEY_GOOGLE_MAPS }}
GITHUB_REF: ${{ github.ref_name }} # This is changed to dev if running on a schedule
steps:
- name: print matrix
run: echo ${{ matrix.branch }}
- name: Checkout Repository
id: checkout
uses: actions/checkout@v4
with:
ref: ${{ matrix.branch }}
- name: Who owns the workspace?
run: ls -ld $GITHUB_WORKSPACE
- uses: "google-github-actions/auth@v2"
with:
credentials_json: "${{ secrets.DGM_GITHUB_ACTION_CREDENTIALS }}"
- name: Display env variables
run: |
echo "Workspace directory: $GITHUB_WORKSPACE" \
echo "Google credentials path: $GOOGLE_GHA_CREDS_PATH" \
# Give the dbcp user ownership of the workspace
# So it can read and write files to the workspace
- name: Give the dbcp user ownership of the workspace
run: sudo chown -R 1000:1000 $GITHUB_WORKSPACE
- name: Set up Docker Compose
run: |
sudo apt-get update
sudo apt-get install -y docker-compose
- name: Build and run Docker Compose services
run: |
docker-compose up -d
- name: Run full ETL
if: ${{ success() }}
run: |
make all
- name: Run all test
if: ${{ success() }}
run: |
make test
- name: Write settings.yaml
id: write_settings
if: ${{ success() }}
run: |
make save_settings
# give permission to root
- name: Give ownership of the workspace back to root
run: sudo chown -R runner:docker $GITHUB_WORKSPACE
# commit the settings file
- uses: EndBug/add-and-commit@v9
id: commit_settings_file
if: steps.write_settings.outcome == 'success'
with:
add: "src/dbcp/settings.yaml"
author_name: "DBCP Bot"
author_email: "[email protected]"
message: "Update settings.yaml"
push: true
# Give the dbcp user ownership of the workspace
# So it can read and write files to the workspace
- name: Give the dbcp user ownership of the workspace
run: sudo chown -R 1000:1000 $GITHUB_WORKSPACE
# publish the outputs, grab the git sha of the commit step
- name: Publish publish outputs
run: |
docker compose run --rm app python dbcp/cli.py publish-outputs \
-bq \
--build-ref ${{ matrix.branch }} \
--code-git-sha ${{ steps.checkout.outputs.commit }} \
--settings-file-git-sha ${{ steps.commit_settings_file.outputs.commit_long_sha }} \
--github-action-run-id ${{ github.run_id}}
- name: Stop Docker Compose services
if: always()
run: |
docker-compose down
# The google-github-actions/auth step is run as runner:docker,
# so we need to give the workspace back to runner:docker
- name: Give ownership of the workspace back to root
if: always()
run: sudo chown -R runner:docker $GITHUB_WORKSPACE
- name: Who owns the workspace?
if: always()
run: ls -ld $GITHUB_WORKSPACE