add workflow file

huggingface · Jan 11, 2024 · a2dd624 · a2dd624
1 parent 41a6839
commit a2dd624
Showing 1 changed file with 99 additions and 0 deletions.
diff --git a/.github/workflows/slow-tests.yml b/.github/workflows/slow-tests.yml
@@ -0,0 +1,99 @@
+name: Slow tests (on push)
+
+on:
+  push:
+    branches: [ main ]
+env:
+  RUN_SLOW: "yes"
+  IS_GITHUB_CI: "1"
+  # To be able to run tests on CUDA 12.2
+  NVIDIA_DISABLE_REQUIRE: "1"
+  SLACK_API_TOKEN: ${{ secrets.SLACK_API_TOKEN }}
+
+
+jobs:
+  run_all_tests_single_gpu:
+    strategy:
+      fail-fast: false
+      matrix:
+        docker-image-name: ["huggingface/trl-latest-gpu:latest", "huggingface/trl-source-gpu:latest"]
+    runs-on: [self-hosted, single-gpu, nvidia-gpu, t4, ci]
+    env:
+      CUDA_VISIBLE_DEVICES: "0"
+      TEST_TYPE: "single_gpu_${{ matrix.docker-image-name }}"
+    container:
+     image: ${{ matrix.docker-image-name }}
+      options: --gpus all --shm-size "16gb" -e NVIDIA_DISABLE_REQUIRE=true
+    defaults:
+      run:
+        shell: bash
+    steps:
+      - uses: actions/checkout@v3
+      - name: Pip install
+        run: |
+          source activate trl
+          pip install -e . --no-deps
+          pip install pytest-reportlog
+      
+      - name: Run common tests on single GPU
+        run: |
+          source activate trl
+          make tests_common_gpu
+
+      - name: Run slow tests on single GPU
+        run: |
+          source activate trl
+          make slow_tests_single_gpu
+      
+      - name: Generate Report
+        if: always()
+        run: |
+          pip install slack_sdk tabulate
+          python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY
+
+
+  run_all_tests_multi_gpu:
+    strategy:
+      fail-fast: false
+      matrix:
+        docker-image-name: ["huggingface/trl-latest-gpu:latest", "huggingface/trl-source-gpu:latest"]
+    runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, ci]
+    env:
+      CUDA_VISIBLE_DEVICES: "0,1"
+      TEST_TYPE: "multi_gpu_${{ matrix.docker-image-name }}"
+    container:
+     image: ${{ matrix.docker-image-name }}
+      options: --gpus all --shm-size "16gb" -e NVIDIA_DISABLE_REQUIRE=true
+    defaults:
+      run:
+        shell: bash
+    steps:
+      - uses: actions/checkout@v3
+      - name: Pip install
+        run: |
+          source activate trl
+          pip install -e . --no-deps
+          pip install pytest-reportlog
+      
+      - name: Run common tests on single GPU
+        run: |
+          source activate trl
+          make tests_common_gpu
+
+      - name: Run slow tests on multi GPU
+        run: |
+          source activate trl
+          make slow_tests_multi_gpu
+
+      - name: Run end-to-end SFT examples tests on multi GPU
+        run: |
+          source activate trl
+          make run_sft_examples
+      
+      - name: Generate Reports
+        if: always()
+        run: |
+          pip install slack_sdk tabulate
+          python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY
+          python scripts/log_example_reports.py >> $GITHUB_STEP_SUMMARY
+          rm *.txt