Skip to content

Commit

Permalink
Troubleshooting the "slow" tests group in Azure DevOps CI.
Browse files Browse the repository at this point in the history
  • Loading branch information
alexsherstinsky committed Oct 20, 2023
1 parent 5b955fc commit 531b61a
Show file tree
Hide file tree
Showing 2 changed files with 0 additions and 144 deletions.
143 changes: 0 additions & 143 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -573,146 +573,3 @@ jobs:
# github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
# label: ${{ needs.start-runner.outputs.label }}
# ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}


# This workflow will install Python dependencies and run all tests marked as `slow` on a single Python version.
# The tests will run on a high-memory AWS compute instance to accommodate memory-intensive workloads.

slow-pytest:
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
python-version: [3.8]
test-markers: ["slow"]
include:
- python-version: 3.9
pytorch-version: 2.1.0
torchscript-version: 1.10.2
ray-version: 2.3.1
env:
PYTORCH: ${{ matrix.pytorch-version }}
MARKERS: ${{ matrix.test-markers }}
NEUROPOD_BASE_DIR: "/usr/local/lib/neuropod"
NEUROPOD_VERSION: "0.3.0-rc6"
TORCHSCRIPT_VERSION: ${{ matrix.torchscript-version }}
RAY_VERSION: ${{ matrix.ray-version }}
AWS_ACCESS_KEY_ID: ${{ secrets.LUDWIG_TESTS_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.LUDWIG_TESTS_AWS_SECRET_ACCESS_KEY }}
EXCLUDED_MARKERS: "benchmark"
TOKENIZERS_PARALLELISM: false

name: py${{ matrix.python-version }}, torch-${{ matrix.pytorch-version }}, ${{ matrix.test-markers }}, ${{ matrix.os }}, ray ${{ matrix.ray-version }}
if: needs.start-runner.result != 'skipped'
needs: start-runner # required to start the main job when the runner is ready
runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runners

timeout-minutes: 60
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}

- name: Setup Linux
if: runner.os == 'linux'
run: |
sudo apt-get update && sudo apt-get install -y libsndfile1 cmake ccache build-essential g++-8 gcc-8
cmake --version
- name: Setup macOS
if: runner.os == 'macOS'
run: |
brew install libuv
- name: pip cache
if: ${{ !env.ACT }}
uses: actions/cache@v2
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}-${{ matrix.test-markers }}-${{ hashFiles('requirements*.txt') }}
restore-keys: |
${{ runner.os }}-pip-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}-${{ matrix.test-markers }}-
- name: Install dependencies
run: |
python --version
pip --version
python -m pip install -U pip
cmake --version
echo "MARKERS:" $MARKERS
if [ "$PYTORCH" == "nightly" ]; then
cat requirements.txt | sed '/^torch[>=<]/d' | sed '/^torchtext[>=<]/d' | sed '/^torchvision[>=<]/d' | sed '/^torchaudio[>=<]/d' > requirements-temp && mv requirements-temp requirements.txt
extra_index_url=https://download.pytorch.org/whl/nightly/cpu
pip install --pre torch torchtext torchvision torchaudio --extra-index-url $extra_index_url
else
extra_index_url=https://download.pytorch.org/whl/cpu
pip install torch==$PYTORCH torchtext torchvision torchaudio --extra-index-url $extra_index_url
fi
if [ "$RAY_VERSION" == "nightly" ]; then
# NOTE: hardcoded for python 3.9 on Linux
pip install https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp39-cp39-manylinux2014_x86_64.whl
else
# installing `six` early resolves ModuleNotFound error in ray==2.1.0
pip install six
pip install ray==$RAY_VERSION
fi
ray_expected=$(python -c "import ray; print(ray.__version__)")
torch_expected=$(python -c "import torch; print(torch.__version__)")
pip install '.[test]' --extra-index-url $extra_index_url
pip list
python -c "import torch; assert torch.__version__ == \"$torch_expected\", f\"torch {torch.__version__} != $torch_expected\""
python -c "import ray; assert ray.__version__ == \"$ray_expected\", f\"ray {ray.__version__} != $ray_expected\""
shell: bash

- name: Install Neuropod backend
run: |
sudo mkdir -p "$NEUROPOD_BASE_DIR"
curl -L https://github.com/uber/neuropod/releases/download/v${{ env.NEUROPOD_VERSION }}/libneuropod-cpu-linux-v${{ env.NEUROPOD_VERSION }}-torchscript-${{ env.TORCHSCRIPT_VERSION }}-backend.tar.gz | sudo tar -xz -C "$NEUROPOD_BASE_DIR"
shell: bash

- name: Tests
env:
TRANSFORMERS_CACHE: "/root/huggingface_cache"
run: |
RUN_PRIVATE=1 LUDWIG_TEST_SUITE_TIMEOUT_S=6000 pytest -vs --timeout 450 --durations 100 -m "($MARKERS) and (not $EXCLUDED_MARKERS)" --junitxml pytest_slow.xml tests
- name: Upload Unit Test Results
if: ${{ always() && !env.ACT }}
uses: actions/upload-artifact@v2
with:
name: Unit Test Results (Python ${{ matrix.python-version }} ${{ matrix.test-markers }})
path: pytest.xml

stop-runner:
name: Stop self-hosted EC2 runner

# required to stop the runner even if the error happened in the previous job
if: always() && needs.start-runner.result != 'skipped'
needs:
- start-runner # required to get output from the start-runner job
- slow-pytest # required to wait when the main job is done
runs-on: ubuntu-latest

steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.AWS_REGION }}

- name: Stop EC2 runner
uses: machulav/[email protected]
with:
mode: stop
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
label: ${{ needs.start-runner.outputs.label }}
ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}
1 change: 0 additions & 1 deletion ludwig/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,6 @@ def __init__(
# online training state
self._online_trainer = None

# TODO: <Alex>ALEX</Alex>
def train(
self,
dataset: Optional[Union[str, dict, pd.DataFrame]] = None,
Expand Down

0 comments on commit 531b61a

Please sign in to comment.