Skip to content

[BUGFIX] Making integration tests in the group "integration_tests_e" pass in Azure CI #11367

[BUGFIX] Making integration tests in the group "integration_tests_e" pass in Azure CI

[BUGFIX] Making integration tests in the group "integration_tests_e" pass in Azure CI #11367

Workflow file for this run

# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
name: pytest
on:
push:
branches: ["master", "release-*"]
pull_request:
branches: ["master", "release-*"]
# We want an ongoing run of this workflow to be canceled by a later commit
# so that there is only one concurrent run of this workflow for each branch
concurrency:
group: pytest-${{ github.head_ref || github.sha }}
cancel-in-progress: true
jobs:
# TODO: <Alex>ALEX</Alex>
# pytest:
# runs-on: ${{ matrix.os }}
# strategy:
# fail-fast: false
# matrix:
# os: [ubuntu-latest]
# python-version: ["3.8", "3.9", "3.10"]
# test-markers: ["not distributed", "distributed"]
# include:
# - python-version: "3.8"
# pytorch-version: 1.13.0
# torchscript-version: 1.10.2
# ray-version: 2.2.0
# - python-version: "3.9"
# pytorch-version: 2.0.0
# torchscript-version: 1.10.2
# ray-version: 2.3.0
# - python-version: "3.10"
# pytorch-version: nightly
# torchscript-version: 1.10.2
# ray-version: 2.3.1
# env:
# PYTORCH: ${{ matrix.pytorch-version }}
# MARKERS: ${{ matrix.test-markers }}
# NEUROPOD_BASE_DIR: "/usr/local/lib/neuropod"
# NEUROPOD_VERISON: "0.3.0-rc6"
# TORCHSCRIPT_VERISON: ${{ matrix.torchscript-version }}
# RAY_VERSION: ${{ matrix.ray-version }}
# AWS_ACCESS_KEY_ID: ${{ secrets.LUDWIG_TESTS_AWS_ACCESS_KEY_ID }}
# AWS_SECRET_ACCESS_KEY: ${{ secrets.LUDWIG_TESTS_AWS_SECRET_ACCESS_KEY }}
# KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }}
# KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }}
# IS_NOT_FORK: ${{ !(github.event.pull_request.base.repo.full_name == 'ludwig-ai/ludwig' && github.event.pull_request.head.repo.fork) }}
#
# name: py${{ matrix.python-version }}, torch-${{ matrix.pytorch-version }}, ${{ matrix.test-markers }}, ${{ matrix.os }}, ray ${{ matrix.ray-version }}
# services:
# minio:
# image: fclairamb/minio-github-actions
# env:
# MINIO_ACCESS_KEY: minio
# MINIO_SECRET_KEY: minio123
# ports:
# - 9000:9000
#
# timeout-minutes: 150
# steps:
# - name: Setup ludwigai/ludwig-ray container for local testing with act.
# if: ${{ env.ACT }}
# run: |
# curl -fsSL https://deb.nodesource.com/setup_16.x | sudo -E bash -
# sudo apt-get install -y nodejs
# sudo mkdir -p /opt/hostedtoolcache/
# sudo chmod 777 -R /opt/hostedtoolcache/
# - uses: actions/checkout@v2
# - name: Set up Python ${{ matrix.python-version }}
# uses: actions/setup-python@v2
# with:
# python-version: ${{ matrix.python-version }}
#
# - name: Setup Linux
# if: runner.os == 'linux'
# run: |
# sudo apt-get update && sudo apt-get install -y cmake libsndfile1 wget libsox-dev
#
# - name: Setup macOS
# if: runner.os == 'macOS'
# run: |
# brew install libuv
#
# - name: pip cache
# if: ${{ !env.ACT }}
# uses: actions/cache@v2
# with:
# path: ~/.cache/pip
# key: ${{ runner.os }}-pip-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}-${{ matrix.test-markers }}-${{ hashFiles('requirements*.txt', '.github/workflows/pytest.yml') }}
#
# - name: Debug out of space
# run: |
# du -h -d 1 ~
# df -h
#
# - name: Install dependencies
# run: |
# python --version
# pip --version
# python -m pip install -U pip
# cmake --version
#
# # remove torch and ray from the dependencies so we can add them depending on the matrix args for the job.
# cat requirements.txt | sed '/^torch[>=<\b]/d' | sed '/^torchtext/d' | sed '/^torchvision/d' | sed '/^torchaudio/d' > requirements-temp && mv requirements-temp requirements.txt
# cat requirements_distributed.txt | sed '/^ray[\[]/d'
#
# if [ "$MARKERS" != "distributed" ]; then
# # Skip distributed and hyperopt requirements to test optional imports
# echo > requirements-temp && mv requirements-temp requirements_distributed.txt
# echo > requirements-temp && mv requirements-temp requirements_hyperopt.txt
#
# # Skip distributed tree requirement (lightgbm-ray)
# cat requirements_tree.txt | sed '/^lightgbm-ray/d' > requirements-temp && mv requirements-temp requirements_tree.txt
# else
# if [ "$RAY_VERSION" == "nightly" ]; then
# # NOTE: hardcoded for python 3.10 on Linux
# echo "ray[default,data,serve,tune] @ https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whl" >> requirements_distributed.txt
# else
# echo "ray[default,data,serve,tune]==$RAY_VERSION" >> requirements_distributed.txt
# fi
# fi
#
# if [ "$PYTORCH" == "nightly" ]; then
# extra_index_url=https://download.pytorch.org/whl/nightly/cpu
# pip install --pre torch torchtext torchvision torchaudio --index-url $extra_index_url
#
# else
# extra_index_url=https://download.pytorch.org/whl/cpu
# pip install torch==$PYTORCH torchtext torchvision torchaudio --extra-index-url $extra_index_url
# fi
#
# pip install '.[test]' --extra-index-url $extra_index_url
# pip list
#
# if [ "$PYTORCH" == "nightly" ]; then
# python -c "from packaging import version; import torch; assert version.parse(torch.__version__).release >= version.parse(\"2.0.0\").release, f\"torch {version.parse(torch.__version__).release} < version.parse(\'2.0.0\').release\""
# else
# python -c "from packaging import version; import torch; assert version.parse(torch.__version__).release == version.parse(\"$PYTORCH\").release, f\"torch {version.parse(torch.__version__).release} != version.parse(\'$PYTORCH\').release\""
# fi
#
# if [ "$MARKERS" == "distributed" ]; then
# python -c "from packaging import version; import ray; assert version.parse(ray.__version__).release == version.parse(\"$RAY_VERSION\").release, f\"ray {version.parse(ray.__version__).release} != version.parse(\'$RAY_VERSION\').release\""
# else
# python -c "import importlib.util; assert importlib.util.find_spec('ray') is None, \"found ray but expected it to not be installed\""
# fi
# shell: bash
#
# - name: Install Neuropod backend
# run: |
# sudo mkdir -p "$NEUROPOD_BASE_DIR"
# curl -L https://github.com/uber/neuropod/releases/download/v${{ env.NEUROPOD_VERISON }}/libneuropod-cpu-linux-v${{ env.NEUROPOD_VERISON }}-torchscript-${{ env.TORCHSCRIPT_VERISON }}-backend.tar.gz | sudo tar -xz -C "$NEUROPOD_BASE_DIR"
# shell: bash
#
# - name: Unit Tests
# run: |
# RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and not slow and not combinatorial and not horovod and not llm" --junitxml pytest.xml tests/ludwig
#
# - name: Regression Tests
# run: |
# RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and not slow and not combinatorial and not horovod or benchmark and not llm" --junitxml pytest.xml tests/regression_tests
#
# # Skip Horovod and replace with DDP.
# # https://github.com/ludwig-ai/ludwig/issues/3468
# # - name: Install Horovod if necessary
# # if: matrix.test-markers == 'distributed' && matrix.pytorch-version != 'nightly'
# # env:
# # HOROVOD_WITH_PYTORCH: 1
# # HOROVOD_WITHOUT_MPI: 1
# # HOROVOD_WITHOUT_TENSORFLOW: 1
# # HOROVOD_WITHOUT_MXNET: 1
# # run: |
# # pip install -r requirements_extra.txt
# # HOROVOD_BUILT=$(python -c "import horovod.torch; horovod.torch.nccl_built(); print('SUCCESS')" || true)
# # if [[ $HOROVOD_BUILT != "SUCCESS" ]]; then
# # pip uninstall -y horovod
# # pip install --no-cache-dir git+https://github.com/horovod/horovod.git@master
# # fi
# # horovodrun --check-build
# # shell: bash
#
# # Skip Horovod tests and replace with DDP.
# # https://github.com/ludwig-ai/ludwig/issues/3468
# # - name: Horovod Tests
# # if: matrix.test-markers == 'distributed' && matrix.pytorch-version != 'nightly'
# # run: |
# # RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and horovod and not slow and not combinatorial and not llm" --junitxml pytest.xml tests/
#
# - name: Upload Unit Test Results
# if: ${{ always() && !env.ACT }}
# uses: actions/upload-artifact@v2
# with:
# name: Unit Test Results (Python ${{ matrix.python-version }} ${{ matrix.test-markers }})
# path: pytest.xml
# TODO: <Alex>ALEX</Alex>
integration-tests:
name: ${{ matrix.test-markers }}
runs-on: ubuntu-latest
strategy:
# TODO: <Alex>ALEX</Alex>
fail-fast: false
# TODO: <Alex>ALEX</Alex>
# TODO: <Alex>ALEX</Alex>
# fail-fast: true
# TODO: <Alex>ALEX</Alex>
matrix:
test-markers:
# TODO: <Alex>ALEX</Alex>
# - "integration_tests_a"
# - "integration_tests_b"
# - "integration_tests_c"
# - "integration_tests_d"
# TODO: <Alex>ALEX</Alex>
- "integration_tests_e"
env:
AWS_ACCESS_KEY_ID: ${{ secrets.LUDWIG_TESTS_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.LUDWIG_TESTS_AWS_SECRET_ACCESS_KEY }}
KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }}
KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }}
IS_NOT_FORK: ${{ !(github.event.pull_request.base.repo.full_name == 'ludwig-ai/ludwig' && github.event.pull_request.head.repo.fork) }}
MARKERS: ${{ matrix.test-markers }}
services:
minio:
image: fclairamb/minio-github-actions
env:
MINIO_ACCESS_KEY: minio
MINIO_SECRET_KEY: minio123
ports:
- 9000:9000
timeout-minutes: 90
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.9
uses: actions/setup-python@v2
with:
python-version: 3.9
- name: Setup Linux
if: runner.os == 'linux'
run: |
sudo apt-get update && sudo apt-get install -y cmake libsndfile1
- name: Setup macOS
if: runner.os == 'macOS'
run: |
brew install libuv
- name: Install dependencies
run: |
python --version
pip --version
python -m pip install -U pip
# remove torch and ray from the dependencies so we can add them depending on the matrix args for the job.
cat requirements.txt | sed '/^torch[>=<\b]/d' | sed '/^torchtext/d' | sed '/^torchvision/d' | sed '/^torchaudio/d' > requirements-temp && mv requirements-temp requirements.txt
cat requirements_distributed.txt | sed '/^ray[\[]/d'
pip install torch==2.0.0 torchtext torchvision torchaudio
pip install ray==2.3.0
pip install '.[test]'
pip list
shell: bash
- name: Integration Tests
run: |
RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=7200 pytest -v --timeout 300 --durations 100 -m "not slow and not combinatorial and not horovod and not llm and $MARKERS" --junitxml pytest.xml tests/integration_tests
# TODO: <Alex>ALEX</Alex>
# llm-tests:
# name: LLM Tests
# runs-on: ubuntu-latest
#
# timeout-minutes: 60
# steps:
# - uses: actions/checkout@v2
# - name: Set up Python 3.9
# uses: actions/setup-python@v2
# with:
# python-version: 3.9
#
# - name: Setup Linux
# if: runner.os == 'linux'
# run: |
# sudo apt-get update && sudo apt-get install -y cmake libsndfile1
#
# - name: Setup macOS
# if: runner.os == 'macOS'
# run: |
# brew install libuv
#
# - name: Install dependencies
# run: |
# python --version
# pip --version
# python -m pip install -U pip
#
# # remove torch and ray from the dependencies so we can add them depending on the matrix args for the job.
# cat requirements.txt | sed '/^torch[>=<\b]/d' | sed '/^torchtext/d' | sed '/^torchvision/d' | sed '/^torchaudio/d' > requirements-temp && mv requirements-temp requirements.txt
# cat requirements_distributed.txt | sed '/^ray[\[]/d'
# pip install torch==2.0.0 torchtext torchvision torchaudio
# pip install ray==2.3.0
# pip install '.[test]'
# pip list
# shell: bash
#
# - name: LLM Tests
# run: |
# pytest -vs --durations 100 -m "llm" --junitxml pytest.xml tests
# TODO: <Alex>ALEX</Alex>
# TODO: <Alex>ALEX</Alex>
# combinatorial-tests:
# name: Combinatorial Tests
# runs-on: ubuntu-latest
#
# timeout-minutes: 60
# steps:
# - uses: actions/checkout@v2
# - name: Set up Python 3.8
# uses: actions/setup-python@v2
# with:
# python-version: 3.8
#
# - name: Setup Linux
# if: runner.os == 'linux'
# run: |
# sudo apt-get update && sudo apt-get install -y cmake libsndfile1
#
# - name: Setup macOS
# if: runner.os == 'macOS'
# run: |
# brew install libuv
#
# - name: Install dependencies
# run: |
# python --version
# pip --version
# python -m pip install -U pip
# pip install '.[test]'
# pip list
# shell: bash
#
# - name: Testing combinatorial config generation code
# run: |
# pytest -vs --durations 100 -m "combinatorial" --junitxml pytest.xml tests/ludwig/config_sampling
#
# - name: Combinatorial Tests
# run: |
# pytest -rx --durations 100 -m "combinatorial" --junitxml pytest.xml tests/training_success
# TODO: <Alex>ALEX</Alex>
# TODO: <Alex>ALEX</Alex>
# test-minimal-install:
# name: Test Minimal Install
# runs-on: ubuntu-latest
#
# timeout-minutes: 15
# steps:
# - uses: actions/checkout@v2
# - name: Set up Python 3.8
# uses: actions/setup-python@v2
# with:
# python-version: 3.8
#
# - name: Setup Linux
# if: runner.os == 'linux'
# run: |
# sudo apt-get update && sudo apt-get install -y cmake libsndfile1
#
# - name: Setup macOS
# if: runner.os == 'macOS'
# run: |
# brew install libuv
#
# - name: Install dependencies
# run: |
# python --version
# pip --version
# python -m pip install -U pip
# pip install torch==2.0.0 torchtext
# pip install ray==2.3.0
# pip install '.'
# pip list
# shell: bash
# - name: Check Install
# run: |
# ludwig check_install
# shell: bash
#
# - name: Test Getting Started
# run: |
# cd examples/getting_started && sh ./run.sh
# shell: bash
# TODO: <Alex>ALEX</Alex>
# start-runner:
# name: Start self-hosted EC2 runner
# if: >
# always() && needs.pytest.result != 'failure' && (
# github.event_name == 'schedule' && github.repository == 'ludwig-ai/ludwig' ||
# github.event_name == 'push' && github.repository == 'ludwig-ai/ludwig' ||
# github.event_name == 'pull_request' && github.event.pull_request.base.repo.full_name == 'ludwig-ai/ludwig' && !github.event.pull_request.head.repo.fork)
# needs: pytest
# runs-on: ubuntu-latest
# outputs:
# label: ${{ steps.start-ec2-runner.outputs.label }}
# ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
# steps:
# - name: Configure AWS credentials
# uses: aws-actions/configure-aws-credentials@v1
# with:
# aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
# aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
# aws-region: ${{ secrets.AWS_REGION }}
# - name: Start EC2 runner
# id: start-ec2-runner
# uses: machulav/[email protected]
# with:
# mode: start
# github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
# ec2-image-id: ami-0759580dedc953d1f
# ec2-instance-type: g4dn.xlarge
# subnet-id: subnet-0983be43
# security-group-id: sg-4cba0d08
# aws-resource-tags: >
# [
# {"Key": "Name", "Value": "ludwig-github-${{ github.head_ref || github.sha }}"},
# {"Key": "GitHubRepository", "Value": "${{ github.repository }}"},
# {"Key": "GitHubHeadRef", "Value": "${{ github.head_ref }}"},
# {"Key": "GitHubSHA", "Value": "${{ github.sha }}"}
# ]
# pytest-gpu:
# if: needs.start-runner.result != 'skipped'
# needs: start-runner # required to start the main job when the runner is ready
# runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runners
# strategy:
# fail-fast: false
# matrix:
# python-version: [3.7]
# include:
# - python-version: 3.7
# pytorch-version: 1.10.0
# torchscript-version: 1.10.2
# env:
# PYTORCH: ${{ matrix.pytorch-version }}
# NEUROPOD_BASE_DIR: "/usr/local/lib/neuropod"
# NEUROPOD_VERISON: "0.3.0-rc6"
# TORCHSCRIPT_VERISON: ${{ matrix.torchscript-version }}
# name: py${{ matrix.python-version }}, torch-${{ matrix.pytorch-version }}, gpu
# timeout-minutes: 70
# steps:
# - uses: actions/checkout@v2
# - name: Set up Python ${{ matrix.python-version }}
# uses: actions/setup-python@v2
# with:
# python-version: ${{ matrix.python-version }}
# - name: Setup Linux
# if: runner.os == 'linux'
# run: |
# sudo apt-get update && sudo apt-get install -y libsndfile1 cmake ccache build-essential g++-8 gcc-8
# cmake --version
# - name: Install CUDA drivers
# run: |
# wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin
# sudo mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600
# wget https://developer.download.nvidia.com/compute/cuda/11.5.1/local_installers/cuda-repo-ubuntu2004-11-5-local_11.5.1-495.29.05-1_amd64.deb
# sudo dpkg -i cuda-repo-ubuntu2004-11-5-local_11.5.1-495.29.05-1_amd64.deb
# sudo apt-key add /var/cuda-repo-ubuntu2004-11-5-local/7fa2af80.pub
# sudo apt-get update
# sudo apt-get -y install cuda
# shell: bash
# - name: pip cache
# uses: actions/cache@v2
# with:
# path: ~/.cache/pip
# key: ${{ runner.os }}-pip-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}-${{ hashFiles('requirements*.txt') }}
# restore-keys: |
# ${{ runner.os }}-pip-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}-
# - name: Install dependencies
# env:
# HOROVOD_WITH_PYTORCH: 1
# HOROVOD_WITHOUT_MPI: 1
# HOROVOD_WITHOUT_TENSORFLOW: 1
# HOROVOD_WITHOUT_MXNET: 1
# run: |
# python --version
# pip --version
# python -m pip install -U pip
# if [ $PYTORCH == "nightly" ]; then
# cat requirements.txt | sed '/^torch[>=<]/d' > requirements-temp && mv requirements-temp requirements.txt
# pip install --pre torch torchvision -f https://download.pytorch.org/whl/torch_stable.html
# else
# pip install torch==${PYTORCH}+cu111 -f https://download.pytorch.org/whl/torch_stable.html
# fi
# # pip install --no-cache-dir git+https://github.com/horovod/horovod.git@master
# pip install dulwich==0.20.26 # workaround for `/usr/bin/ld: cannot find -lpython3.7m`
# pip install '.[test]'
# pip list
# shell: bash
# - name: Install Neuropod backend
# run: |
# sudo mkdir -p "$NEUROPOD_BASE_DIR"
# curl -L https://github.com/uber/neuropod/releases/download/v${{ env.NEUROPOD_VERISON }}/libneuropod-cpu-linux-v${{ env.NEUROPOD_VERISON }}-torchscript-${{ env.TORCHSCRIPT_VERISON }}-backend.tar.gz | sudo tar -xz -C "$NEUROPOD_BASE_DIR"
# shell: bash
# - name: Reinstall Horovod if necessary
# env:
# HOROVOD_WITH_PYTORCH: 1
# HOROVOD_WITHOUT_MPI: 1
# HOROVOD_WITHOUT_TENSORFLOW: 1
# HOROVOD_WITHOUT_MXNET: 1
# run: |
# HOROVOD_BUILT=$(python -c "import horovod.torch; horovod.torch.nccl_built(); print('SUCCESS')" || true)
# if [[ $HOROVOD_BUILT != "SUCCESS" ]]; then
# pip uninstall -y horovod
# pip install --no-cache-dir git+https://github.com/horovod/horovod.git@master
# fi
# horovodrun --check-build
# shell: bash
# - name: Check CUDA is available
# run: |
# python -c "import torch; assert torch.cuda.is_available()"
# - name: Tests
# run: |
# pytest -v --timeout 300 --durations 10 --junitxml pytest.xml tests
# - name: Upload Unit Test Results
# if: always()
# uses: actions/upload-artifact@v2
# with:
# name: Unit Test Results (Python ${{ matrix.python-version }} gpu
# path: pytest.xml
event_file:
name: "Event File"
runs-on: ubuntu-latest
steps:
- name: Upload
if: ${{ !env.ACT }}
uses: actions/upload-artifact@v2
with:
name: Event File
path: ${{ github.event_path }}
# stop-runner:
# name: Stop self-hosted EC2 runner
# # required to stop the runner even if the error happened in the previous job
# if: always() && needs.start-runner.result != 'skipped'
# needs:
# - start-runner # required to get output from the start-runner job
# - pytest-gpu # required to wait when the main job is done
# runs-on: ubuntu-latest
# steps:
# - name: Configure AWS credentials
# uses: aws-actions/configure-aws-credentials@v1
# with:
# aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
# aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
# aws-region: ${{ secrets.AWS_REGION }}
# - name: Stop EC2 runner
# uses: machulav/[email protected]
# with:
# mode: stop
# github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
# label: ${{ needs.start-runner.outputs.label }}
# ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}