Skip to content

Commit

Permalink
Merge branch 'main' into doc
Browse files Browse the repository at this point in the history
  • Loading branch information
erhoo82 authored Feb 5, 2025
2 parents b9182b3 + c95981a commit a14004c
Show file tree
Hide file tree
Showing 119 changed files with 6,148 additions and 1,674 deletions.
8 changes: 8 additions & 0 deletions .flake8.other
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[flake8]
select =
F541, # f-string without any placeholders
F841, # local variable 'x' is assigned to but never used
F401, # 'x' imported but unused
E741, # ambiguous variable name 'l'
F821, # undefined name 'x'
E266, # too many leading '#' for block comment
8 changes: 8 additions & 0 deletions .flake8.speech
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[flake8]
select =
F541, # f-string without any placeholders
F841, # local variable 'x' is assigned to but never used
F401, # 'x' imported but unused
E741, # ambiguous variable name 'l'
F821, # undefined name 'x'
E266, # too many leading '#' for block comment
2 changes: 2 additions & 0 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
.github/ @pablo-garay @ko3n1g @thomasdhc @chtruong814
Dockerfile.ci @pablo-garay @ko3n1g @thomasdhc @chtruong814
.pylintrc.* @pablo-garay @ko3n1g @thomasdhc @chtruong814
.flake8.* @pablo-garay @ko3n1g @thomasdhc @chtruong814
100 changes: 76 additions & 24 deletions .github/workflows/_test_template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ jobs:
log: ${{ steps.main.outputs.log }}
potential_infra_failure: ${{ steps.main.outputs.potential_infra_failure }}
coverage_report: ${{ steps.main.outputs.coverage_report }}
env:
DIR: ${{ github.run_id }}
steps:
- name: Docker system cleanup
run: |
Expand All @@ -63,12 +65,19 @@ jobs:
docker pull nemoci.azurecr.io/nemo_container:${{ github.run_id }}
- name: Start container
env:
DIR: ${{ github.run_id }}
run: |
mkdir -p $DIR
ARG=("")
if [[ "${{ inputs.RUNNER }}" != *cpu* ]]; then
ARG=("--runtime=nvidia --gpus all")
fi
cmd=$(cat <<RUN_TEST_EOF
#!/bin/bash
docker container rm -f nemo_container_${{ github.run_id }} || true
docker run \
--rm \
-d \
Expand All @@ -79,47 +88,91 @@ jobs:
--env HF_HOME=/home/TestData/HF_HOME \
--volume /mnt/datadrive/TestData:/home/TestData nemoci.azurecr.io/nemo_container:${{ github.run_id }} \
bash -c "sleep $(( ${{ inputs.TIMEOUT }} * 60 + 60 ))"
- id: main
name: Run main script
timeout-minutes: ${{ inputs.TIMEOUT }}
RUN_TEST_EOF
)
echo "$cmd" | tee "$DIR/retry_job.sh"
bash $DIR/retry_job.sh
- name: Create run-script
env:
DIR: ${{ github.run_id }}
SCRIPT: ${{ inputs.SCRIPT }}
id: create
run: |
mkdir -p ${{ github.run_id }}
cd ${{ github.run_id }}/
rm .coverage || true
set +e
(
SCRIPT=$(echo "$SCRIPT" | grep -v '^#')
SCRIPT=$(perl -pe 'chomp if eof' <<< "$SCRIPT")
mkdir -p $DIR
rm $DIR/.coverage || true
rm $DIR/err.log || true
cmd=$(cat <<RUN_TEST_EOF
#!/bin/bash
(
set -e
docker exec nemo_container_${{ github.run_id }} bash -c '${{ inputs.SCRIPT }}'
) 2> >(tee err.log)
docker exec nemo_container_${{ github.run_id }} bash -c '$SCRIPT && echo "Finished successfully." || echo "Did not finish."'
) 2>&1 | tee $DIR/err.log
EXIT_CODE=$?
RUN_TEST_EOF
)
set -x
echo "timeout_in_seconds=$(( ${{ inputs.TIMEOUT }} * 60 ))" | tee -a "$GITHUB_OUTPUT"
echo "$cmd" | tee "$DIR/job.sh"
log=$(tail -c 2000 err.log | base64 -w 0)
echo "log=$log" >> "$GITHUB_OUTPUT"
- name: Run main script
uses: nick-fields/retry@v3
with:
timeout_seconds: ${{ steps.create.outputs.timeout_in_seconds }}
max_attempts: 3
shell: bash
retry_on: timeout
command: /bin/bash ${{ github.run_id }}/job.sh
on_retry_command: /bin/bash ${{ github.run_id }}/retry_job.sh

- name: Check result
id: check
env:
SAVE_COVERAGE_REPORT: ${{ inputs.SAVE_COVERAGE_REPORT }}
run: |
cat $DIR/err.log
potential_infra_failure=$(cat err.log | grep -Eqiw "device" && echo true || echo false)
log=$(tail -c 2000 $DIR/err.log | base64 -w 0)
echo "log=$log" >> "$GITHUB_OUTPUT"
potential_infra_failure=$(cat $DIR/err.log | grep -Eqiw "device" && echo true || echo false)
echo "potential_infra_failure=$potential_infra_failure" >> "$GITHUB_OUTPUT"
coverage_report=coverage-${{ github.run_id }}-$(uuidgen)
echo "coverage_report=$coverage_report" >> "$GITHUB_OUTPUT"
docker exec nemo_container_${{ github.run_id }} bash -c 'ls -al'
docker cp nemo_container_${{ github.run_id }}:/workspace/.coverage .coverage
if [[ "$SAVE_COVERAGE_REPORT" == "true" ]]; then
docker cp nemo_container_${{ github.run_id }}:/workspace/.coverage $DIR/.coverage
docker cp nemo_container_${{ github.run_id }}:/workspace/coverage.xml $DIR/coverage.xml
fi
IS_SUCCESS=$(tail -n 1 $DIR/err.log | grep -q "Finished successfully." && echo "true" || echo "false")
if [[ "$IS_SUCCESS" == "false" ]]; then
echo Test did not finish successfully.
exit 1
fi
exit $EXIT_CODE
- name: Upload artifacts
uses: actions/upload-artifact@v4
if: inputs.SAVE_COVERAGE_REPORT == true
with:
name: ${{ steps.main.outputs.coverage_report }}
path: ${{ github.run_id }}/.coverage
name: ${{ steps.check.outputs.coverage_report }}
path: |
${{ github.run_id }}/coverage.xml
${{ github.run_id }}/.coverage
include-hidden-files: true

- uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
if: failure() && inputs.IS_OPTIONAL == false && !contains(github.event.pull_request.labels.*.name, 'no-fail-fast')
- name: after_script
Expand All @@ -130,5 +183,4 @@ jobs:
- name: Container shutdown
if: always()
run: |
docker container stop nemo_container_${{ github.run_id }} || true
docker container rm nemo_container_${{ github.run_id }} || true
docker container rm -f nemo_container_${{ github.run_id }} || true
10 changes: 2 additions & 8 deletions .github/workflows/build-test-publish-wheel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,11 @@ defaults:

jobs:
build-test-publish-wheel:
uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_build_test_publish_wheel.yml@v0.7.0
uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_build_test_publish_wheel.yml@v0.20.0
with:
image-name: nemo_container
dockerfile: Dockerfile.ci
image-label: nemo-core
build-args: |
IMAGE_LABEL=nemo-core
prune-filter-timerange: 24h
dry-run: true
python-package: nemo
container-workdir: /workspace
python-version: '3.10'
environment: public
secrets:
TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
Expand Down
Loading

0 comments on commit a14004c

Please sign in to comment.