reichlab · elray1 · Dec 10, 2024 · Dec 10, 2024 · Dec 10, 2024 · Dec 10, 2024
diff --git a/.github/workflows/snapshot-nhsn-data.yml b/.github/workflows/snapshot-nhsn-data.yml
@@ -0,0 +1,48 @@
+name: Snapshot NHSN data and upload to S3
+
+on:
+  schedule:
+    - cron: "45 5 * * 3" # every Wednesday at 5:45PM UTC == 12:45PM EST
+  workflow_dispatch:
+
+env:
+  # Reich lab AWS account number
+  AWS_ACCOUNT: 312560106906
+
+jobs:
+  snapshot-nhsn-data:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Set up R 📊
+        uses: r-lib/actions/setup-r@v2
+        with:
+          r-version: 4.4.1
+          install-r: true
+          use-public-rspm: true
+          extra-repositories: 'https://hubverse-org.r-universe.dev'
+
+      - name: install R packages
+        run: |
+          Rscript -e "install.packages('remotes')"
+          Rscript -e "remotes::install_github('Chicago/[email protected]')"
+
+      - name: Get file name
+        run: echo "FILE_NAME=nhsn-$(date +'%Y-%m-%d').csv" >> $GITHUB_ENV
+
+      - name: Snapshot NHSN data
+        run: Rscript -e "nhsn_data <- RSocrata::read.socrata('https://data.cdc.gov/resource/mpgq-jmmr.csv'); 
+                         write.csv(nhsn_data, file = $FILE_NAME, row.names = FALSE)"
+        env:
+          FILE_NAME: ${{ env.FILE_NAME }}
+
+    - name: Configure AWS credentials
+      # request credentials to assume the hub's AWS role via OpenID Connect
+      if: env.CLOUD_ENABLED == 'true'
+      uses: aws-actions/configure-aws-credentials@v4
+      with:
+        role-to-assume: arn:aws:iam::${{ env.AWS_ACCOUNT }}:role/iddata-github-action 
+        aws-region: us-east-1
+
+    - name: Copy files to cloud storage
+      run: |
+        aws s3 cp "./$FILE_NAME" "s3://infectious-disease-data/data-raw/influenza-nhsn" --dryrun