Skip to content

Commit

Permalink
Merge pull request #26 from ACCESS-NRI/23-add-repro-infra
Browse files Browse the repository at this point in the history
Addition of generic workflows from `ACCESS-NRI/reproducibility`
  • Loading branch information
CodeGat authored Jun 11, 2024
2 parents 8f5a421 + 9d3c1b1 commit c025d99
Show file tree
Hide file tree
Showing 2 changed files with 316 additions and 0 deletions.
193 changes: 193 additions & 0 deletions .github/workflows/config-generate-checksums.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
name: Initial Checksums
on:
workflow_call:
inputs:
model-name:
type: string
required: true
description: Name of the model that is having it's checksums generated.
config-branch-name:
type: string
required: true
description: The configuration branch that will be run that will generate the checksums.
commit-checksums:
type: boolean
required: true
description: Whether to commit the checksums to the config branch once generated.
committed-checksum-location:
type: string
required: false
default: ./testing/checksum
description: "If checksums are committed: Where in the repository the generated checksums should be committed to."
committed-checksum-tag:
type: string
required: false
description: "If checksums are committed: An optional tag to attach to the committed checksums."
environment-name:
type: string
required: true
description: The name of a GitHub Environment that is inherited from the caller.
model-config-tests-version:
type: string
required: true
description: A version of the model-config-tests package
python-version:
type: string
required: true
description: The python module version used to create test virtual environment
outputs:
checksum-location:
value: ${{ jobs.generate-checksum.outputs.checksum-location }}
description: Location of the checksums on the deployment target (deployment target given by the `environment-name` input).
artifact-name:
value: ${{ jobs.generate-checksum.outputs.artifact-name }}
description: Name of the artifact containing the checksums and test report for this repro run
env:
OUTPUT_LOCAL_LOCATION: /opt/checksum-output
jobs:
generate-checksum:
name: Generate
runs-on: ubuntu-latest
environment: ${{ inputs.environment-name }}
env:
EXPERIMENT_LOCATION: ${{ vars.EXPERIMENTS_LOCATION }}/${{ inputs.model-name }}/${{ inputs.config-branch-name }}
outputs:
artifact-name: ${{ steps.artifact.outputs.name }}
checksum-location: ${{ steps.run.outputs.checksum-location }}
steps:
- name: Setup SSH
id: ssh
uses: access-nri/actions/.github/actions/setup-ssh@main
with:
hosts: |
${{ secrets.SSH_HOST }}
${{ secrets.SSH_HOST_DATA }}
private-key: ${{ secrets.SSH_KEY }}

- name: Run model on ${{ inputs.environment-name }}
id: run
env:
BASE_EXPERIMENT_LOCATION: ${{ env.EXPERIMENT_LOCATION }}/base-experiment
TEST_VENV_LOCATION: ${{ env.EXPERIMENT_LOCATION }}/test-venv
run: |
ssh ${{ secrets.SSH_USER }}@${{ secrets.SSH_HOST }} -i ${{ steps.ssh.outputs.private-key-path }} /bin/bash<<EOT
# Remove base experiment if it already exists
if [ -d "${{ env.BASE_EXPERIMENT_LOCATION }}" ]; then
rm -rf ${{ env.BASE_EXPERIMENT_LOCATION }}
fi
# Setup a base experiment
git clone ${{ github.event.repository.clone_url }} ${{ env.BASE_EXPERIMENT_LOCATION }}
cd ${{ env.BASE_EXPERIMENT_LOCATION }}
git checkout ${{ inputs.config-branch-name }}
# Load Python module
module load python3/${{ inputs.python-version }}
# Create and activate virtual environment
python3 -m venv ${{ env.TEST_VENV_LOCATION }}
source ${{ env.TEST_VENV_LOCATION }}/bin/activate
# Install model-config-tests
pip install model-config-tests==${{ inputs.model-config-tests-version }}
# In this case, we expect the pytests in model-config-tests
# to fail because there are no checksums to compare
# against. But we still want the side-effect of creating the initial checksums.
set +e
# Run pytests - this also generates checksums files
model-config-tests -s \
-m "checksum" \
--output-path ${{ env.EXPERIMENT_LOCATION }}
# Deactivate and remove the test virtual environment
deactivate
rm -rf ${{ env.TEST_VENV_LOCATION }}
# In this case, we want the exit code post-`pytest` to be 0 so the overall `ssh` call succeeeds
# after the expected `pytest` error.
exit 0
EOT
echo "experiment-location=${{ env.EXPERIMENT_LOCATION }}" >> $GITHUB_OUTPUT
echo "::notice::Checksums generated on ${{ vars.DEPLOYMENT_TARGET }} at ${{ env.EXPERIMENT_LOCATION }}"
- name: Copy Back Checksums
run: |
rsync --recursive -e 'ssh -i ${{ steps.ssh.outputs.private-key-path }}' \
'${{ secrets.SSH_USER }}@${{ secrets.SSH_HOST_DATA }}:${{ env.EXPERIMENT_LOCATION }}/checksum' \
${{ env.OUTPUT_LOCAL_LOCATION }}
- name: Generate Output Artifact Name
id: artifact
run: echo "name=${{ inputs.model-name }}-${{ inputs.config-branch-name }}" >> $GITHUB_OUTPUT

- name: Upload Output
uses: actions/upload-artifact@v4
with:
name: ${{ steps.artifact.outputs.name }}
if-no-files-found: error
path: ${{ env.OUTPUT_LOCAL_LOCATION }}

commit-checksum-to-branch:
name: Commit Checksum To ${{ inputs.config-branch-name }}
needs:
- generate-checksum
if: inputs.commit-checksums
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
ref: ${{ inputs.config-branch-name }}
fetch-depth: 0
token: ${{ secrets.GH_FORCE_PUSH_TOKEN }}

- name: Download Checksums
uses: actions/download-artifact@v4
with:
name: ${{ needs.generate-checksum.outputs.artifact-name }}
path: ${{ env.OUTPUT_LOCAL_LOCATION }}

- name: Move Checksums to Repo
run: |
mkdir -p ${{ inputs.committed-checksum-location }}
mv ${{ env.OUTPUT_LOCAL_LOCATION }}/checksum/* ${{ inputs.committed-checksum-location }}
- name: Update version in metadata.yaml
if: inputs.committed-checksum-tag != ''
run: |
full_tag=${{ inputs.committed-checksum-tag }}
version=${full_tag/*-}
yq -i ".version = \"${version}\"" metadata.yaml
- name: Commit Checksums to Repo
# NOTE: Regarding the config user.name/user.email, see https://github.com/actions/checkout/pull/1184
run: |
git config user.name ${{ vars.GH_ACTIONS_BOT_GIT_USER_NAME }}
git config user.email ${{ vars.GH_ACTIONS_BOT_GIT_USER_EMAIL }}
git add .
git commit -m "Added initial checksums generated from ${{ inputs.config-branch-name }}"
git push
echo "::notice::Committed and pushed checksums generated from ${{ inputs.config-branch-name }}"
- name: Tag Checksums in Repo
if: inputs.committed-checksum-tag != ''
run: |
git tag ${{ inputs.committed-checksum-tag }}
git push --tags
echo "::notice::Pushed new tag ${{ inputs.committed-checksum-tag }}"
- name: Create Release
if: inputs.committed-checksum-tag != ''
env:
TAG: ${{ inputs.committed-checksum-tag }}
IS_REPRO_BREAK: ${{ endsWith(inputs.committed-checksum-tag, '.0') && 'DOES' || 'does not' }}
uses: softprops/action-gh-release@de2c0eb89ae2a093876385947365aca7b0e5f844 #v0.1.15
with:
tag_name: ${{ env.TAG }}
name: Configuration ${{ env.TAG }}
body: |
This released configuration ${{ env.IS_REPRO_BREAK }} break reproducibility with released configurations before it. See the 'Config Tags' section in the `README.md` for more information.
generate_release_notes: true
123 changes: 123 additions & 0 deletions .github/workflows/config-pr-checks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
name: Repro Checks
on:
workflow_call:
inputs:
model-name:
type: string
required: true
description: The name of the model to check for reproducibility
config-tag:
type: string
required: true
description: A tag on an associated config branch to use for the reproducibility run
environment-name:
type: string
required: true
description: The name of a GitHub Deployment Environment that is inherited from the caller
test-markers:
type: string
required: true
description: A python expression of markers to pass to the reproducibility pytests
model-config-tests-version:
type: string
required: true
description: A version of the model-config-tests package
python-version:
type: string
required: true
description: The python module version used to create test virtual environment
outputs:
artifact-name:
value: ${{ jobs.repro.outputs.artifact-name }}
description: Name of the artifact containing the checksums and test report for this repro run
experiment-location:
value: ${{ jobs.repro.outputs.experiment-location }}
description: Location of the experiment on the target environment
env:
TEST_OUTPUT_LOCAL_LOCATION: /opt/test-output
jobs:
repro:
# NOTE: A lot of these `vars` and `secrets` are not found in this repository. Instead, they are inherited
# from the calling workflow (for example, `ACCESS-NRI/access-om2-configs`)
name: Run ${{ inputs.config-tag }}
runs-on: ubuntu-latest
environment: ${{ inputs.environment-name }}
outputs:
artifact-name: ${{ steps.artifact.outputs.name }}
experiment-location: ${{ steps.run.outputs.experiment-location }}
env:
EXPERIMENT_LOCATION: ${{ vars.EXPERIMENTS_LOCATION }}/${{ inputs.model-name }}/${{ inputs.config-tag }}
steps:
- name: Setup SSH
id: ssh
uses: access-nri/actions/.github/actions/setup-ssh@main
with:
hosts: |
${{ secrets.SSH_HOST }}
${{ secrets.SSH_HOST_DATA }}
private-key: ${{ secrets.SSH_KEY }}

- name: Run configuration
id: run
env:
BASE_EXPERIMENT_LOCATION: ${{ env.EXPERIMENT_LOCATION }}/base-experiment
TEST_VENV_LOCATION: ${{ env.EXPERIMENT_LOCATION }}/test-venv
run: |
ssh ${{ secrets.SSH_USER }}@${{ secrets.SSH_HOST }} -i ${{ steps.ssh.outputs.private-key-path }} /bin/bash<<'EOT'
# Remove base experiment if it already exists
if [ -d "${{ env.BASE_EXPERIMENT_LOCATION }}" ]; then
rm -rf ${{ env.BASE_EXPERIMENT_LOCATION }}
fi
# Setup a base experiment
git clone ${{ github.event.repository.clone_url }} ${{ env.BASE_EXPERIMENT_LOCATION }}
cd ${{ env.BASE_EXPERIMENT_LOCATION }}
git checkout ${{ inputs.config-tag }}
# Load Python module
module load python3/${{ inputs.python-version }}
# Create and activate virtual environment
python3 -m venv ${{ env.TEST_VENV_LOCATION }}
source ${{ env.TEST_VENV_LOCATION }}/bin/activate
# Install model-config-tests
pip install model-config-tests==${{ inputs.model-config-tests-version }}
# The pytests in model-config-tests might fail in this command,
# but that is okay. We still want to run the rest of the commands
# after this step.
set +e
# Run model-config-tests pytests - this also generates checksums files
model-config-tests -s -m "${{ inputs.test-markers }}" \
--output-path ${{ env.EXPERIMENT_LOCATION }} \
--junitxml=${{ env.EXPERIMENT_LOCATION }}/checksum/test_report.xml
# Deactivate and remove the test virtual environment
deactivate
rm -rf ${{ env.TEST_VENV_LOCATION }}
# We want the exit code post-`pytest` to be 0 so the overall `ssh` call succeeeds
# after a potential `pytest` error.
exit 0
EOT
echo "experiment-location=${{ env.EXPERIMENT_LOCATION }}" >> $GITHUB_OUTPUT
- name: Copy Back Checksums and Test Report
run: |
rsync --recursive -e 'ssh -i ${{ steps.ssh.outputs.private-key-path }}' \
'${{ secrets.SSH_USER }}@${{ secrets.SSH_HOST_DATA }}:${{ env.EXPERIMENT_LOCATION }}/checksum' \
${{ env.TEST_OUTPUT_LOCAL_LOCATION }}
- name: Generate Test Output Artifact Name
id: artifact
run: echo "name=${{ inputs.model-name }}-${{ inputs.config-tag }}" >> $GITHUB_OUTPUT

- name: Upload Test Output
uses: actions/upload-artifact@v3
with:
name: ${{ steps.artifact.outputs.name }}
if-no-files-found: error
path: ${{ env.TEST_OUTPUT_LOCAL_LOCATION }}

0 comments on commit c025d99

Please sign in to comment.