Merge pull request #26 from ACCESS-NRI/23-add-repro-infra

Addition of generic workflows from `ACCESS-NRI/reproducibility`
ACCESS-NRI · Jun 11, 2024 · c025d99 · c025d99
2 parents 8f5a421 + 9d3c1b1
commit c025d99
Show file tree

Hide file tree

Showing 2 changed files with 316 additions and 0 deletions.
diff --git a/.github/workflows/config-generate-checksums.yml b/.github/workflows/config-generate-checksums.yml
@@ -0,0 +1,193 @@
+name: Initial Checksums
+on:
+  workflow_call:
+    inputs:
+      model-name:
+        type: string
+        required: true
+        description: Name of the model that is having it's checksums generated.
+      config-branch-name:
+        type: string
+        required: true
+        description: The configuration branch that will be run that will generate the checksums.
+      commit-checksums:
+        type: boolean
+        required: true
+        description: Whether to commit the checksums to the config branch once generated.
+      committed-checksum-location:
+        type: string
+        required: false
+        default: ./testing/checksum
+        description: "If checksums are committed: Where in the repository the generated checksums should be committed to."
+      committed-checksum-tag:
+        type: string
+        required: false
+        description: "If checksums are committed: An optional tag to attach to the committed checksums."
+      environment-name:
+        type: string
+        required: true
+        description: The name of a GitHub Environment that is inherited from the caller.
+      model-config-tests-version:
+        type: string
+        required: true
+        description: A version of the model-config-tests package
+      python-version:
+        type: string
+        required: true
+        description: The python module version used to create test virtual environment
+    outputs:
+      checksum-location:
+        value: ${{ jobs.generate-checksum.outputs.checksum-location }}
+        description: Location of the checksums on the deployment target (deployment target given by the `environment-name` input).
+      artifact-name:
+        value: ${{ jobs.generate-checksum.outputs.artifact-name }}
+        description: Name of the artifact containing the checksums and test report for this repro run
+env:
+  OUTPUT_LOCAL_LOCATION: /opt/checksum-output
+jobs:
+  generate-checksum:
+    name: Generate
+    runs-on: ubuntu-latest
+    environment: ${{ inputs.environment-name }}
+    env:
+      EXPERIMENT_LOCATION: ${{ vars.EXPERIMENTS_LOCATION }}/${{ inputs.model-name }}/${{ inputs.config-branch-name }}
+    outputs:
+      artifact-name: ${{ steps.artifact.outputs.name }}
+      checksum-location: ${{ steps.run.outputs.checksum-location }}
+    steps:
+      - name: Setup SSH
+        id: ssh
+        uses: access-nri/actions/.github/actions/setup-ssh@main
+        with:
+          hosts: |
+            ${{ secrets.SSH_HOST }}
+            ${{ secrets.SSH_HOST_DATA }}
+          private-key: ${{ secrets.SSH_KEY }}
+
+      - name: Run model on ${{ inputs.environment-name }}
+        id: run
+        env:
+          BASE_EXPERIMENT_LOCATION: ${{ env.EXPERIMENT_LOCATION }}/base-experiment
+          TEST_VENV_LOCATION: ${{ env.EXPERIMENT_LOCATION }}/test-venv
+        run: |
+          ssh ${{ secrets.SSH_USER }}@${{ secrets.SSH_HOST }} -i ${{ steps.ssh.outputs.private-key-path }} /bin/bash<<EOT
+
+          # Remove base experiment if it already exists
+          if [ -d "${{ env.BASE_EXPERIMENT_LOCATION }}" ]; then
+            rm -rf ${{ env.BASE_EXPERIMENT_LOCATION }}
+          fi
+
+          # Setup a base experiment
+          git clone ${{ github.event.repository.clone_url }} ${{ env.BASE_EXPERIMENT_LOCATION }}
+          cd ${{ env.BASE_EXPERIMENT_LOCATION }}
+          git checkout ${{ inputs.config-branch-name }}
+
+           # Load Python module
+          module load python3/${{ inputs.python-version }}
+
+          # Create and activate virtual environment
+          python3 -m venv ${{ env.TEST_VENV_LOCATION }}
+          source ${{ env.TEST_VENV_LOCATION }}/bin/activate
+
+          # Install model-config-tests
+          pip install model-config-tests==${{ inputs.model-config-tests-version }}
+
+          # In this case, we expect the pytests in model-config-tests
+          # to fail because there are no checksums to compare
+          # against. But we still want the side-effect of creating the initial checksums.
+          set +e
+
+          # Run pytests - this also generates checksums files
+          model-config-tests -s \
+            -m "checksum" \
+            --output-path ${{ env.EXPERIMENT_LOCATION }}
+
+          # Deactivate and remove the test virtual environment
+          deactivate
+          rm -rf ${{ env.TEST_VENV_LOCATION }}
+
+          # In this case, we want the exit code post-`pytest` to be 0 so the overall `ssh` call succeeeds
+          # after the expected `pytest` error.
+          exit 0
+          EOT
+
+          echo "experiment-location=${{ env.EXPERIMENT_LOCATION }}" >> $GITHUB_OUTPUT
+          echo "::notice::Checksums generated on ${{ vars.DEPLOYMENT_TARGET }} at ${{ env.EXPERIMENT_LOCATION }}"
+
+      - name: Copy Back Checksums
+        run: |
+          rsync --recursive -e 'ssh -i ${{ steps.ssh.outputs.private-key-path }}' \
+              '${{ secrets.SSH_USER }}@${{ secrets.SSH_HOST_DATA }}:${{ env.EXPERIMENT_LOCATION }}/checksum' \
+              ${{ env.OUTPUT_LOCAL_LOCATION }}
+
+      - name: Generate Output Artifact Name
+        id: artifact
+        run: echo "name=${{ inputs.model-name }}-${{ inputs.config-branch-name }}" >> $GITHUB_OUTPUT
+
+      - name: Upload Output
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ steps.artifact.outputs.name }}
+          if-no-files-found: error
+          path: ${{ env.OUTPUT_LOCAL_LOCATION }}
+
+  commit-checksum-to-branch:
+    name: Commit Checksum To ${{ inputs.config-branch-name }}
+    needs:
+      - generate-checksum
+    if: inputs.commit-checksums
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          ref: ${{ inputs.config-branch-name }}
+          fetch-depth: 0
+          token: ${{ secrets.GH_FORCE_PUSH_TOKEN }}
+
+      - name: Download Checksums
+        uses: actions/download-artifact@v4
+        with:
+          name: ${{ needs.generate-checksum.outputs.artifact-name }}
+          path: ${{ env.OUTPUT_LOCAL_LOCATION }}
+
+      - name: Move Checksums to Repo
+        run: |
+          mkdir -p ${{ inputs.committed-checksum-location }}
+          mv ${{ env.OUTPUT_LOCAL_LOCATION }}/checksum/* ${{ inputs.committed-checksum-location }}
+
+      - name: Update version in metadata.yaml
+        if: inputs.committed-checksum-tag != ''
+        run: |
+          full_tag=${{ inputs.committed-checksum-tag }}
+          version=${full_tag/*-}
+          yq -i ".version = \"${version}\"" metadata.yaml
+
+      - name: Commit Checksums to Repo
+        # NOTE: Regarding the config user.name/user.email, see https://github.com/actions/checkout/pull/1184
+        run: |
+          git config user.name ${{ vars.GH_ACTIONS_BOT_GIT_USER_NAME }}
+          git config user.email ${{ vars.GH_ACTIONS_BOT_GIT_USER_EMAIL }}
+          git add .
+          git commit -m "Added initial checksums generated from ${{ inputs.config-branch-name }}"
+          git push
+          echo "::notice::Committed and pushed checksums generated from ${{ inputs.config-branch-name }}"
+
+      - name: Tag Checksums in Repo
+        if: inputs.committed-checksum-tag != ''
+        run: |
+          git tag ${{ inputs.committed-checksum-tag }}
+          git push --tags
+          echo "::notice::Pushed new tag ${{ inputs.committed-checksum-tag }}"
+
+      - name: Create Release
+        if: inputs.committed-checksum-tag != ''
+        env:
+          TAG: ${{ inputs.committed-checksum-tag }}
+          IS_REPRO_BREAK: ${{ endsWith(inputs.committed-checksum-tag, '.0') && 'DOES' || 'does not' }}
+        uses: softprops/action-gh-release@de2c0eb89ae2a093876385947365aca7b0e5f844  #v0.1.15
+        with:
+          tag_name: ${{ env.TAG }}
+          name: Configuration ${{ env.TAG }}
+          body: |
+            This released configuration ${{ env.IS_REPRO_BREAK }} break reproducibility with released configurations before it. See the 'Config Tags' section in the `README.md` for more information.
+          generate_release_notes: true
diff --git a/.github/workflows/config-pr-checks.yml b/.github/workflows/config-pr-checks.yml
@@ -0,0 +1,123 @@
+name: Repro Checks
+on:
+  workflow_call:
+    inputs:
+      model-name:
+        type: string
+        required: true
+        description: The name of the model to check for reproducibility
+      config-tag:
+        type: string
+        required: true
+        description: A tag on an associated config branch to use for the reproducibility run
+      environment-name:
+        type: string
+        required: true
+        description: The name of a GitHub Deployment Environment that is inherited from the caller
+      test-markers:
+        type: string
+        required: true
+        description: A python expression of markers to pass to the reproducibility pytests
+      model-config-tests-version:
+        type: string
+        required: true
+        description: A version of the model-config-tests package
+      python-version:
+        type: string
+        required: true
+        description: The python module version used to create test virtual environment
+    outputs:
+      artifact-name:
+        value: ${{ jobs.repro.outputs.artifact-name }}
+        description: Name of the artifact containing the checksums and test report for this repro run
+      experiment-location:
+        value: ${{ jobs.repro.outputs.experiment-location }}
+        description: Location of the experiment on the target environment
+env:
+  TEST_OUTPUT_LOCAL_LOCATION: /opt/test-output
+jobs:
+  repro:
+    # NOTE: A lot of these `vars` and `secrets` are not found in this repository. Instead, they are inherited
+    # from the calling workflow (for example, `ACCESS-NRI/access-om2-configs`)
+    name: Run ${{ inputs.config-tag }}
+    runs-on: ubuntu-latest
+    environment: ${{ inputs.environment-name }}
+    outputs:
+      artifact-name: ${{ steps.artifact.outputs.name }}
+      experiment-location: ${{ steps.run.outputs.experiment-location }}
+    env:
+      EXPERIMENT_LOCATION: ${{ vars.EXPERIMENTS_LOCATION }}/${{ inputs.model-name }}/${{ inputs.config-tag }}
+    steps:
+      - name: Setup SSH
+        id: ssh
+        uses: access-nri/actions/.github/actions/setup-ssh@main
+        with:
+          hosts: |
+            ${{ secrets.SSH_HOST }}
+            ${{ secrets.SSH_HOST_DATA }}
+          private-key: ${{ secrets.SSH_KEY }}
+
+      - name: Run configuration
+        id: run
+        env:
+          BASE_EXPERIMENT_LOCATION: ${{ env.EXPERIMENT_LOCATION }}/base-experiment
+          TEST_VENV_LOCATION: ${{ env.EXPERIMENT_LOCATION }}/test-venv
+        run: |
+          ssh ${{ secrets.SSH_USER }}@${{ secrets.SSH_HOST }} -i ${{ steps.ssh.outputs.private-key-path }} /bin/bash<<'EOT'
+
+          # Remove base experiment if it already exists
+          if [ -d "${{ env.BASE_EXPERIMENT_LOCATION }}" ]; then
+            rm -rf ${{ env.BASE_EXPERIMENT_LOCATION }}
+          fi
+
+          # Setup a base experiment
+          git clone ${{ github.event.repository.clone_url }} ${{ env.BASE_EXPERIMENT_LOCATION }}
+          cd ${{ env.BASE_EXPERIMENT_LOCATION }}
+          git checkout ${{ inputs.config-tag }}
+
+          # Load Python module
+          module load python3/${{ inputs.python-version }}
+
+          # Create and activate virtual environment
+          python3 -m venv ${{ env.TEST_VENV_LOCATION }}
+          source ${{ env.TEST_VENV_LOCATION }}/bin/activate
+
+          # Install model-config-tests
+          pip install model-config-tests==${{ inputs.model-config-tests-version }}
+
+          # The pytests in model-config-tests might fail in this command,
+          # but that is okay. We still want to run the rest of the commands
+          # after this step.
+          set +e
+
+          # Run model-config-tests pytests - this also generates checksums files
+          model-config-tests -s -m "${{ inputs.test-markers }}" \
+            --output-path ${{ env.EXPERIMENT_LOCATION }} \
+            --junitxml=${{ env.EXPERIMENT_LOCATION }}/checksum/test_report.xml
+          
+          # Deactivate and remove the test virtual environment
+          deactivate
+          rm -rf ${{ env.TEST_VENV_LOCATION }}
+
+          # We want the exit code post-`pytest` to be 0 so the overall `ssh` call succeeeds
+          # after a potential `pytest` error.
+          exit 0          
+          EOT
+          echo "experiment-location=${{ env.EXPERIMENT_LOCATION }}" >> $GITHUB_OUTPUT
+
+      - name: Copy Back Checksums and Test Report
+        run: |
+          rsync --recursive -e 'ssh -i ${{ steps.ssh.outputs.private-key-path }}' \
+              '${{ secrets.SSH_USER }}@${{ secrets.SSH_HOST_DATA }}:${{ env.EXPERIMENT_LOCATION }}/checksum' \
+              ${{ env.TEST_OUTPUT_LOCAL_LOCATION }}
+
+      - name: Generate Test Output Artifact Name
+        id: artifact
+        run: echo "name=${{ inputs.model-name }}-${{ inputs.config-tag }}" >> $GITHUB_OUTPUT
+
+      - name: Upload Test Output
+        uses: actions/upload-artifact@v3
+        with:
+          name: ${{ steps.artifact.outputs.name }}
+          if-no-files-found: error
+          path: ${{ env.TEST_OUTPUT_LOCAL_LOCATION }}