From d3e42ec92e7a4ad86820e9818b0efc139b6bc8ff Mon Sep 17 00:00:00 2001 From: Alex Sherstinsky Date: Tue, 17 Oct 2023 19:48:03 -0700 Subject: [PATCH] Troubleshooting -- trying to find which tests in the "integration_tests_e" group are failing. --- .github/workflows/pytest.yml | 364 +++++----- tests/integration_tests/test_torchscript.py | 703 ++++++++++---------- 2 files changed, 545 insertions(+), 522 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 876df21e051..ac9272b0b92 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -16,195 +16,197 @@ concurrency: cancel-in-progress: true jobs: - pytest: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest] - python-version: ["3.8", "3.9", "3.10"] - test-markers: ["not distributed", "distributed"] - include: - - python-version: "3.8" - pytorch-version: 1.13.0 - torchscript-version: 1.10.2 - ray-version: 2.2.0 - - python-version: "3.9" - pytorch-version: 2.0.0 - torchscript-version: 1.10.2 - ray-version: 2.3.0 - - python-version: "3.10" - pytorch-version: nightly - torchscript-version: 1.10.2 - ray-version: 2.3.1 - env: - PYTORCH: ${{ matrix.pytorch-version }} - MARKERS: ${{ matrix.test-markers }} - NEUROPOD_BASE_DIR: "/usr/local/lib/neuropod" - NEUROPOD_VERISON: "0.3.0-rc6" - TORCHSCRIPT_VERISON: ${{ matrix.torchscript-version }} - RAY_VERSION: ${{ matrix.ray-version }} - AWS_ACCESS_KEY_ID: ${{ secrets.LUDWIG_TESTS_AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.LUDWIG_TESTS_AWS_SECRET_ACCESS_KEY }} - KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }} - KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }} - IS_NOT_FORK: ${{ !(github.event.pull_request.base.repo.full_name == 'ludwig-ai/ludwig' && github.event.pull_request.head.repo.fork) }} - - name: py${{ matrix.python-version }}, torch-${{ matrix.pytorch-version }}, ${{ matrix.test-markers }}, ${{ matrix.os }}, ray ${{ matrix.ray-version }} - services: - minio: - image: fclairamb/minio-github-actions - env: - MINIO_ACCESS_KEY: minio - MINIO_SECRET_KEY: minio123 - ports: - - 9000:9000 - - timeout-minutes: 150 - steps: - - name: Setup ludwigai/ludwig-ray container for local testing with act. - if: ${{ env.ACT }} - run: | - curl -fsSL https://deb.nodesource.com/setup_16.x | sudo -E bash - - sudo apt-get install -y nodejs - sudo mkdir -p /opt/hostedtoolcache/ - sudo chmod 777 -R /opt/hostedtoolcache/ - - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - - name: Setup Linux - if: runner.os == 'linux' - run: | - sudo apt-get update && sudo apt-get install -y cmake libsndfile1 wget libsox-dev - - - name: Setup macOS - if: runner.os == 'macOS' - run: | - brew install libuv - - - name: pip cache - if: ${{ !env.ACT }} - uses: actions/cache@v2 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}-${{ matrix.test-markers }}-${{ hashFiles('requirements*.txt', '.github/workflows/pytest.yml') }} - - - name: Debug out of space - run: | - du -h -d 1 ~ - df -h - - - name: Install dependencies - run: | - python --version - pip --version - python -m pip install -U pip - cmake --version - - # remove torch and ray from the dependencies so we can add them depending on the matrix args for the job. - cat requirements.txt | sed '/^torch[>=<\b]/d' | sed '/^torchtext/d' | sed '/^torchvision/d' | sed '/^torchaudio/d' > requirements-temp && mv requirements-temp requirements.txt - cat requirements_distributed.txt | sed '/^ray[\[]/d' - - if [ "$MARKERS" != "distributed" ]; then - # Skip distributed and hyperopt requirements to test optional imports - echo > requirements-temp && mv requirements-temp requirements_distributed.txt - echo > requirements-temp && mv requirements-temp requirements_hyperopt.txt - - # Skip distributed tree requirement (lightgbm-ray) - cat requirements_tree.txt | sed '/^lightgbm-ray/d' > requirements-temp && mv requirements-temp requirements_tree.txt - else - if [ "$RAY_VERSION" == "nightly" ]; then - # NOTE: hardcoded for python 3.10 on Linux - echo "ray[default,data,serve,tune] @ https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whl" >> requirements_distributed.txt - else - echo "ray[default,data,serve,tune]==$RAY_VERSION" >> requirements_distributed.txt - fi - fi - - if [ "$PYTORCH" == "nightly" ]; then - extra_index_url=https://download.pytorch.org/whl/nightly/cpu - pip install --pre torch torchtext torchvision torchaudio --index-url $extra_index_url - - else - extra_index_url=https://download.pytorch.org/whl/cpu - pip install torch==$PYTORCH torchtext torchvision torchaudio --extra-index-url $extra_index_url - fi - - pip install '.[test]' --extra-index-url $extra_index_url - pip list - - if [ "$PYTORCH" == "nightly" ]; then - python -c "from packaging import version; import torch; assert version.parse(torch.__version__).release >= version.parse(\"2.0.0\").release, f\"torch {version.parse(torch.__version__).release} < version.parse(\'2.0.0\').release\"" - else - python -c "from packaging import version; import torch; assert version.parse(torch.__version__).release == version.parse(\"$PYTORCH\").release, f\"torch {version.parse(torch.__version__).release} != version.parse(\'$PYTORCH\').release\"" - fi - - if [ "$MARKERS" == "distributed" ]; then - python -c "from packaging import version; import ray; assert version.parse(ray.__version__).release == version.parse(\"$RAY_VERSION\").release, f\"ray {version.parse(ray.__version__).release} != version.parse(\'$RAY_VERSION\').release\"" - else - python -c "import importlib.util; assert importlib.util.find_spec('ray') is None, \"found ray but expected it to not be installed\"" - fi - shell: bash - - - name: Install Neuropod backend - run: | - sudo mkdir -p "$NEUROPOD_BASE_DIR" - curl -L https://github.com/uber/neuropod/releases/download/v${{ env.NEUROPOD_VERISON }}/libneuropod-cpu-linux-v${{ env.NEUROPOD_VERISON }}-torchscript-${{ env.TORCHSCRIPT_VERISON }}-backend.tar.gz | sudo tar -xz -C "$NEUROPOD_BASE_DIR" - shell: bash - - - name: Unit Tests - run: | - RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and not slow and not combinatorial and not horovod and not llm" --junitxml pytest.xml tests/ludwig - - - name: Regression Tests - run: | - RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and not slow and not combinatorial and not horovod or benchmark and not llm" --junitxml pytest.xml tests/regression_tests - - # Skip Horovod and replace with DDP. - # https://github.com/ludwig-ai/ludwig/issues/3468 - # - name: Install Horovod if necessary - # if: matrix.test-markers == 'distributed' && matrix.pytorch-version != 'nightly' - # env: - # HOROVOD_WITH_PYTORCH: 1 - # HOROVOD_WITHOUT_MPI: 1 - # HOROVOD_WITHOUT_TENSORFLOW: 1 - # HOROVOD_WITHOUT_MXNET: 1 - # run: | - # pip install -r requirements_extra.txt - # HOROVOD_BUILT=$(python -c "import horovod.torch; horovod.torch.nccl_built(); print('SUCCESS')" || true) - # if [[ $HOROVOD_BUILT != "SUCCESS" ]]; then - # pip uninstall -y horovod - # pip install --no-cache-dir git+https://github.com/horovod/horovod.git@master - # fi - # horovodrun --check-build - # shell: bash - - # Skip Horovod tests and replace with DDP. - # https://github.com/ludwig-ai/ludwig/issues/3468 - # - name: Horovod Tests - # if: matrix.test-markers == 'distributed' && matrix.pytorch-version != 'nightly' - # run: | - # RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and horovod and not slow and not combinatorial and not llm" --junitxml pytest.xml tests/ - - - name: Upload Unit Test Results - if: ${{ always() && !env.ACT }} - uses: actions/upload-artifact@v2 - with: - name: Unit Test Results (Python ${{ matrix.python-version }} ${{ matrix.test-markers }}) - path: pytest.xml + # TODO: ALEX +# pytest: +# runs-on: ${{ matrix.os }} +# strategy: +# fail-fast: false +# matrix: +# os: [ubuntu-latest] +# python-version: ["3.8", "3.9", "3.10"] +# test-markers: ["not distributed", "distributed"] +# include: +# - python-version: "3.8" +# pytorch-version: 1.13.0 +# torchscript-version: 1.10.2 +# ray-version: 2.2.0 +# - python-version: "3.9" +# pytorch-version: 2.0.0 +# torchscript-version: 1.10.2 +# ray-version: 2.3.0 +# - python-version: "3.10" +# pytorch-version: nightly +# torchscript-version: 1.10.2 +# ray-version: 2.3.1 +# env: +# PYTORCH: ${{ matrix.pytorch-version }} +# MARKERS: ${{ matrix.test-markers }} +# NEUROPOD_BASE_DIR: "/usr/local/lib/neuropod" +# NEUROPOD_VERISON: "0.3.0-rc6" +# TORCHSCRIPT_VERISON: ${{ matrix.torchscript-version }} +# RAY_VERSION: ${{ matrix.ray-version }} +# AWS_ACCESS_KEY_ID: ${{ secrets.LUDWIG_TESTS_AWS_ACCESS_KEY_ID }} +# AWS_SECRET_ACCESS_KEY: ${{ secrets.LUDWIG_TESTS_AWS_SECRET_ACCESS_KEY }} +# KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }} +# KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }} +# IS_NOT_FORK: ${{ !(github.event.pull_request.base.repo.full_name == 'ludwig-ai/ludwig' && github.event.pull_request.head.repo.fork) }} +# +# name: py${{ matrix.python-version }}, torch-${{ matrix.pytorch-version }}, ${{ matrix.test-markers }}, ${{ matrix.os }}, ray ${{ matrix.ray-version }} +# services: +# minio: +# image: fclairamb/minio-github-actions +# env: +# MINIO_ACCESS_KEY: minio +# MINIO_SECRET_KEY: minio123 +# ports: +# - 9000:9000 +# +# timeout-minutes: 150 +# steps: +# - name: Setup ludwigai/ludwig-ray container for local testing with act. +# if: ${{ env.ACT }} +# run: | +# curl -fsSL https://deb.nodesource.com/setup_16.x | sudo -E bash - +# sudo apt-get install -y nodejs +# sudo mkdir -p /opt/hostedtoolcache/ +# sudo chmod 777 -R /opt/hostedtoolcache/ +# - uses: actions/checkout@v2 +# - name: Set up Python ${{ matrix.python-version }} +# uses: actions/setup-python@v2 +# with: +# python-version: ${{ matrix.python-version }} +# +# - name: Setup Linux +# if: runner.os == 'linux' +# run: | +# sudo apt-get update && sudo apt-get install -y cmake libsndfile1 wget libsox-dev +# +# - name: Setup macOS +# if: runner.os == 'macOS' +# run: | +# brew install libuv +# +# - name: pip cache +# if: ${{ !env.ACT }} +# uses: actions/cache@v2 +# with: +# path: ~/.cache/pip +# key: ${{ runner.os }}-pip-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}-${{ matrix.test-markers }}-${{ hashFiles('requirements*.txt', '.github/workflows/pytest.yml') }} +# +# - name: Debug out of space +# run: | +# du -h -d 1 ~ +# df -h +# +# - name: Install dependencies +# run: | +# python --version +# pip --version +# python -m pip install -U pip +# cmake --version +# +# # remove torch and ray from the dependencies so we can add them depending on the matrix args for the job. +# cat requirements.txt | sed '/^torch[>=<\b]/d' | sed '/^torchtext/d' | sed '/^torchvision/d' | sed '/^torchaudio/d' > requirements-temp && mv requirements-temp requirements.txt +# cat requirements_distributed.txt | sed '/^ray[\[]/d' +# +# if [ "$MARKERS" != "distributed" ]; then +# # Skip distributed and hyperopt requirements to test optional imports +# echo > requirements-temp && mv requirements-temp requirements_distributed.txt +# echo > requirements-temp && mv requirements-temp requirements_hyperopt.txt +# +# # Skip distributed tree requirement (lightgbm-ray) +# cat requirements_tree.txt | sed '/^lightgbm-ray/d' > requirements-temp && mv requirements-temp requirements_tree.txt +# else +# if [ "$RAY_VERSION" == "nightly" ]; then +# # NOTE: hardcoded for python 3.10 on Linux +# echo "ray[default,data,serve,tune] @ https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whl" >> requirements_distributed.txt +# else +# echo "ray[default,data,serve,tune]==$RAY_VERSION" >> requirements_distributed.txt +# fi +# fi +# +# if [ "$PYTORCH" == "nightly" ]; then +# extra_index_url=https://download.pytorch.org/whl/nightly/cpu +# pip install --pre torch torchtext torchvision torchaudio --index-url $extra_index_url +# +# else +# extra_index_url=https://download.pytorch.org/whl/cpu +# pip install torch==$PYTORCH torchtext torchvision torchaudio --extra-index-url $extra_index_url +# fi +# +# pip install '.[test]' --extra-index-url $extra_index_url +# pip list +# +# if [ "$PYTORCH" == "nightly" ]; then +# python -c "from packaging import version; import torch; assert version.parse(torch.__version__).release >= version.parse(\"2.0.0\").release, f\"torch {version.parse(torch.__version__).release} < version.parse(\'2.0.0\').release\"" +# else +# python -c "from packaging import version; import torch; assert version.parse(torch.__version__).release == version.parse(\"$PYTORCH\").release, f\"torch {version.parse(torch.__version__).release} != version.parse(\'$PYTORCH\').release\"" +# fi +# +# if [ "$MARKERS" == "distributed" ]; then +# python -c "from packaging import version; import ray; assert version.parse(ray.__version__).release == version.parse(\"$RAY_VERSION\").release, f\"ray {version.parse(ray.__version__).release} != version.parse(\'$RAY_VERSION\').release\"" +# else +# python -c "import importlib.util; assert importlib.util.find_spec('ray') is None, \"found ray but expected it to not be installed\"" +# fi +# shell: bash +# +# - name: Install Neuropod backend +# run: | +# sudo mkdir -p "$NEUROPOD_BASE_DIR" +# curl -L https://github.com/uber/neuropod/releases/download/v${{ env.NEUROPOD_VERISON }}/libneuropod-cpu-linux-v${{ env.NEUROPOD_VERISON }}-torchscript-${{ env.TORCHSCRIPT_VERISON }}-backend.tar.gz | sudo tar -xz -C "$NEUROPOD_BASE_DIR" +# shell: bash +# +# - name: Unit Tests +# run: | +# RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and not slow and not combinatorial and not horovod and not llm" --junitxml pytest.xml tests/ludwig +# +# - name: Regression Tests +# run: | +# RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and not slow and not combinatorial and not horovod or benchmark and not llm" --junitxml pytest.xml tests/regression_tests +# +# # Skip Horovod and replace with DDP. +# # https://github.com/ludwig-ai/ludwig/issues/3468 +# # - name: Install Horovod if necessary +# # if: matrix.test-markers == 'distributed' && matrix.pytorch-version != 'nightly' +# # env: +# # HOROVOD_WITH_PYTORCH: 1 +# # HOROVOD_WITHOUT_MPI: 1 +# # HOROVOD_WITHOUT_TENSORFLOW: 1 +# # HOROVOD_WITHOUT_MXNET: 1 +# # run: | +# # pip install -r requirements_extra.txt +# # HOROVOD_BUILT=$(python -c "import horovod.torch; horovod.torch.nccl_built(); print('SUCCESS')" || true) +# # if [[ $HOROVOD_BUILT != "SUCCESS" ]]; then +# # pip uninstall -y horovod +# # pip install --no-cache-dir git+https://github.com/horovod/horovod.git@master +# # fi +# # horovodrun --check-build +# # shell: bash +# +# # Skip Horovod tests and replace with DDP. +# # https://github.com/ludwig-ai/ludwig/issues/3468 +# # - name: Horovod Tests +# # if: matrix.test-markers == 'distributed' && matrix.pytorch-version != 'nightly' +# # run: | +# # RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and horovod and not slow and not combinatorial and not llm" --junitxml pytest.xml tests/ +# +# - name: Upload Unit Test Results +# if: ${{ always() && !env.ACT }} +# uses: actions/upload-artifact@v2 +# with: +# name: Unit Test Results (Python ${{ matrix.python-version }} ${{ matrix.test-markers }}) +# path: pytest.xml + # TODO: ALEX integration-tests: name: ${{ matrix.test-markers }} runs-on: ubuntu-latest strategy: # TODO: ALEX - # fail-fast: false + fail-fast: false # TODO: ALEX # TODO: ALEX - fail-fast: true + # fail-fast: true # TODO: ALEX matrix: test-markers: diff --git a/tests/integration_tests/test_torchscript.py b/tests/integration_tests/test_torchscript.py index 52da8d2a7c1..c6d599cc530 100644 --- a/tests/integration_tests/test_torchscript.py +++ b/tests/integration_tests/test_torchscript.py @@ -13,11 +13,16 @@ # limitations under the License. # ============================================================================== import os -import shutil -from copy import deepcopy + +# TODO: ALEX +# import shutil +# from copy import deepcopy +# TODO: ALEX from typing import List -import numpy as np +# TODO: ALEX +# import numpy as np +# TODO: ALEX import pandas as pd import pytest import torch @@ -25,12 +30,27 @@ from ludwig.api import LudwigModel from ludwig.backend import RAY -from ludwig.constants import BATCH_SIZE, COMBINER, EVAL_BATCH_SIZE, LOGITS, NAME, PREDICTIONS, PROBABILITIES, TRAINER + +# TODO: ALEX +# from ludwig.constants import BATCH_SIZE, COMBINER, EVAL_BATCH_SIZE, LOGITS, NAME, PREDICTIONS, PROBABILITIES, TRAINER +# TODO: ALEX +# TODO: ALEX +from ludwig.constants import BATCH_SIZE, EVAL_BATCH_SIZE, LOGITS, NAME, PREDICTIONS, PROBABILITIES, TRAINER + +# TODO: ALEX from ludwig.data.preprocessing import preprocess_for_prediction -from ludwig.features.number_feature import numeric_transformation_registry -from ludwig.globals import TRAIN_SET_METADATA_FILE_NAME + +# TODO: ALEX +# from ludwig.features.number_feature import numeric_transformation_registry +# TODO: ALEX +# TODO: ALEX +# from ludwig.globals import TRAIN_SET_METADATA_FILE_NAME +# TODO: ALEX from ludwig.models.inference import to_inference_module_input_from_dataframe -from ludwig.utils import output_feature_utils + +# TODO: ALEX +# from ludwig.utils import output_feature_utils +# TODO: ALEX from ludwig.utils.tokenizers import TORCHSCRIPT_COMPATIBLE_TOKENIZERS from tests.integration_tests import utils from tests.integration_tests.utils import ( @@ -51,340 +71,341 @@ vector_feature, ) - -@pytest.mark.parametrize("should_load_model", [True, False]) -@pytest.mark.parametrize("model_type", ["ecd", "gbm"]) -@pytest.mark.integration_tests_e -def test_torchscript(tmpdir, csv_filename, should_load_model, model_type): - ####### - # Setup - ####### - dir_path = tmpdir - data_csv_path = os.path.join(tmpdir, csv_filename) - - # Single sequence input, single category output - input_features = [ - binary_feature(), - number_feature(), - category_feature(encoder={"type": "passthrough", "vocab_size": 3}), - category_feature(encoder={"type": "onehot", "vocab_size": 3}), - ] - if model_type == "ecd": - image_dest_folder = os.path.join(tmpdir, "generated_images") - audio_dest_folder = os.path.join(tmpdir, "generated_audio") - input_features.extend( - [ - category_feature(encoder={"type": "dense", "vocab_size": 3}), - sequence_feature(encoder={"vocab_size": 3}), - text_feature(encoder={"vocab_size": 3}), - vector_feature(), - image_feature(image_dest_folder), - audio_feature(audio_dest_folder), - timeseries_feature(), - date_feature(), - date_feature(), - h3_feature(), - set_feature(encoder={"vocab_size": 3}), - bag_feature(encoder={"vocab_size": 3}), - ] - ) - - output_features = [ - category_feature(decoder={"vocab_size": 3}), - ] - if model_type == "ecd": - output_features.extend( - [ - binary_feature(), - number_feature(), - set_feature(decoder={"vocab_size": 3}), - vector_feature(), - sequence_feature(decoder={"vocab_size": 3}), - text_feature(decoder={"vocab_size": 3}), - ] - ) - - predictions_column_name = "{}_predictions".format(output_features[0]["name"]) - - # Generate test data - data_csv_path = generate_data(input_features, output_features, data_csv_path) - - ############# - # Train model - ############# - backend = LocalTestBackend() - config = { - "model_type": model_type, - "input_features": input_features, - "output_features": output_features, - } - if model_type == "ecd": - config[TRAINER] = {"epochs": 2} - else: - # Disable feature filtering to avoid having no features due to small test dataset, - # see https://stackoverflow.com/a/66405983/5222402 - config[TRAINER] = {"num_boost_round": 2, "feature_pre_filter": False} - ludwig_model = LudwigModel(config, backend=backend) - ludwig_model.train( - dataset=data_csv_path, - skip_save_training_description=True, - skip_save_training_statistics=True, - skip_save_model=True, - skip_save_progress=True, - skip_save_log=True, - skip_save_processed_input=True, - ) - - ################### - # save Ludwig model - ################### - ludwigmodel_path = os.path.join(dir_path, "ludwigmodel") - shutil.rmtree(ludwigmodel_path, ignore_errors=True) - ludwig_model.save(ludwigmodel_path) - - ################### - # load Ludwig model - ################### - if should_load_model: - ludwig_model = LudwigModel.load(ludwigmodel_path, backend=backend) - - ############################## - # collect weight tensors names - ############################## - original_predictions_df, _ = ludwig_model.predict(dataset=data_csv_path) - original_weights = deepcopy(list(ludwig_model.model.parameters())) - original_weights = [t.cpu() for t in original_weights] - - # Move the model to CPU for tracing - ludwig_model.model.cpu() - - ################# - # save torchscript - ################# - torchscript_path = os.path.join(dir_path, "torchscript") - shutil.rmtree(torchscript_path, ignore_errors=True) - ludwig_model.model.save_torchscript(torchscript_path) - - ################################################### - # load Ludwig model, obtain predictions and weights - ################################################### - ludwig_model = LudwigModel.load(ludwigmodel_path, backend=backend) - loaded_prediction_df, _ = ludwig_model.predict(dataset=data_csv_path) - loaded_weights = deepcopy(list(ludwig_model.model.parameters())) - loaded_weights = [t.cpu() for t in loaded_weights] - - ##################################################### - # restore torchscript, obtain predictions and weights - ##################################################### - training_set_metadata_json_fp = os.path.join(ludwigmodel_path, TRAIN_SET_METADATA_FILE_NAME) - - dataset, training_set_metadata = preprocess_for_prediction( - ludwig_model.config_obj.to_dict(), - dataset=data_csv_path, - training_set_metadata=training_set_metadata_json_fp, - include_outputs=False, - backend=backend, - ) - - restored_model = torch.jit.load(torchscript_path) - - # Check the outputs for one of the features for correctness - # Here we choose the first output feature (categorical) - of_name = list(ludwig_model.model.output_features.keys())[0] - - data_to_predict = { - name: torch.from_numpy(dataset.dataset[feature.proc_column]) - for name, feature in ludwig_model.model.input_features.items() - } - - # Get predictions from restored torchscript. - logits = restored_model(data_to_predict) - restored_predictions = torch.argmax(output_feature_utils.get_output_feature_tensor(logits, of_name, "logits"), -1) - - restored_predictions = [training_set_metadata[of_name]["idx2str"][idx] for idx in restored_predictions] - - restored_weights = deepcopy(list(restored_model.parameters())) - restored_weights = [t.cpu() for t in restored_weights] - - ############################################### - # Check if weights and predictions are the same - ############################################### - - # Check to weight values match the original model. - assert utils.is_all_close(original_weights, loaded_weights) - assert utils.is_all_close(original_weights, restored_weights) - - # Check that predictions are identical to the original model. - assert np.all(original_predictions_df[predictions_column_name] == loaded_prediction_df[predictions_column_name]) - - assert np.all(original_predictions_df[predictions_column_name] == restored_predictions) - - -@pytest.mark.integration_tests_e -def test_torchscript_e2e_tabular(csv_filename, tmpdir): - data_csv_path = os.path.join(tmpdir, csv_filename) - # Configure features to be tested: - bin_str_feature_input_feature = binary_feature() - bin_str_feature_output_feature = binary_feature(output_feature=True) - transformed_number_features = [ - number_feature(preprocessing={"normalization": numeric_transformer}) - for numeric_transformer in numeric_transformation_registry.keys() - ] - input_features = [ - bin_str_feature_input_feature, - binary_feature(), - *transformed_number_features, - number_feature(preprocessing={"outlier_strategy": "fill_with_mean"}), - category_feature(encoder={"vocab_size": 3}), - bag_feature(encoder={"vocab_size": 3}), - set_feature(encoder={"vocab_size": 3}), - vector_feature(), - # TODO: future support - # date_feature(), - # h3_feature(), - ] - output_features = [ - bin_str_feature_output_feature, - binary_feature(output_feature=True), - number_feature(), - category_feature(decoder={"vocab_size": 3}), - set_feature(decoder={"vocab_size": 3}), - vector_feature(), - sequence_feature(decoder={"vocab_size": 3}), - text_feature(decoder={"vocab_size": 3}), - ] - backend = LocalTestBackend() - config = { - "input_features": input_features, - "output_features": output_features, - TRAINER: {"epochs": 2, BATCH_SIZE: 128}, - } - - # Generate training data - training_data_csv_path = generate_data(input_features, output_features, data_csv_path) - - # Convert bool values to strings, e.g., {'Yes', 'No'} - df = pd.read_csv(training_data_csv_path) - false_value, true_value = "No", "Yes" - df[bin_str_feature_input_feature[NAME]] = df[bin_str_feature_input_feature[NAME]].map( - lambda x: true_value if x else false_value - ) - df[bin_str_feature_output_feature[NAME]] = df[bin_str_feature_output_feature[NAME]].map( - lambda x: true_value if x else false_value - ) - df.to_csv(training_data_csv_path) - - validate_torchscript_outputs(tmpdir, config, backend, training_data_csv_path) - - -@pytest.mark.integration_tests_e -def test_torchscript_e2e_binary_only(csv_filename, tmpdir): - data_csv_path = os.path.join(tmpdir, csv_filename) - - input_features = [ - binary_feature(), - ] - output_features = [ - binary_feature(), - ] - backend = LocalTestBackend() - config = { - "input_features": input_features, - "output_features": output_features, - TRAINER: {"epochs": 2, BATCH_SIZE: 128}, - } - - # Generate training data - training_data_csv_path = generate_data(input_features, output_features, data_csv_path) - - validate_torchscript_outputs(tmpdir, config, backend, training_data_csv_path) - - -@pytest.mark.integration_tests_e -def test_torchscript_e2e_tabnet_combiner(csv_filename, tmpdir): - data_csv_path = os.path.join(tmpdir, csv_filename) - # Configure features to be tested: - input_features = [ - binary_feature(), - number_feature(), - category_feature(encoder={"vocab_size": 3}), - bag_feature(encoder={"vocab_size": 3}), - set_feature(encoder={"vocab_size": 3}), - ] - output_features = [ - binary_feature(), - number_feature(), - category_feature(decoder={"vocab_size": 3}), - ] - backend = LocalTestBackend() - config = { - "input_features": input_features, - "output_features": output_features, - COMBINER: { - "type": "tabnet", - "num_total_blocks": 2, - "num_shared_blocks": 2, - }, - TRAINER: {"epochs": 2, BATCH_SIZE: 128}, - } - - # Generate training data - training_data_csv_path = generate_data(input_features, output_features, data_csv_path) - - validate_torchscript_outputs(tmpdir, config, backend, training_data_csv_path) - - -@pytest.mark.integration_tests_e -def test_torchscript_e2e_audio(csv_filename, tmpdir): - data_csv_path = os.path.join(tmpdir, csv_filename) - audio_dest_folder = os.path.join(tmpdir, "generated_audio") - - input_features = [ - audio_feature(audio_dest_folder), - ] - output_features = [ - binary_feature(), - ] - backend = LocalTestBackend() - config = { - "input_features": input_features, - "output_features": output_features, - TRAINER: {"epochs": 2, BATCH_SIZE: 128}, - } - training_data_csv_path = generate_data(input_features, output_features, data_csv_path) - - # NOTE: audio preprocessing mismatches by very small margins ~O(1e-6) but causes flakiness in e2e test. - # Increasing tolerance is a workaround to reduce flakiness for now. - # TODO: remove this workaround when audio preprocessing is fixed. - validate_torchscript_outputs(tmpdir, config, backend, training_data_csv_path, tolerance=1e-6) - - -@pytest.mark.parametrize( - "kwargs", - [ - {"encoder": {"type": "stacked_cnn"}}, # Ludwig custom encoder - {"encoder": {"type": "alexnet", "use_pretrained": False}}, # TorchVision pretrained model encoder - ], -) -@pytest.mark.integration_tests_e -def test_torchscript_e2e_image(tmpdir, csv_filename, kwargs): - data_csv_path = os.path.join(tmpdir, csv_filename) - image_dest_folder = os.path.join(tmpdir, "generated_images") - input_features = [ - image_feature(image_dest_folder, **kwargs), - ] - output_features = [ - binary_feature(), - ] - backend = LocalTestBackend() - config = { - "input_features": input_features, - "output_features": output_features, - TRAINER: {"epochs": 2, BATCH_SIZE: 128}, - } - training_data_csv_path = generate_data(input_features, output_features, data_csv_path) - - validate_torchscript_outputs(tmpdir, config, backend, training_data_csv_path) +# TODO: ALEX +# @pytest.mark.parametrize("should_load_model", [True, False]) +# @pytest.mark.parametrize("model_type", ["ecd", "gbm"]) +# @pytest.mark.integration_tests_e +# def test_torchscript(tmpdir, csv_filename, should_load_model, model_type): +# ####### +# # Setup +# ####### +# dir_path = tmpdir +# data_csv_path = os.path.join(tmpdir, csv_filename) +# +# # Single sequence input, single category output +# input_features = [ +# binary_feature(), +# number_feature(), +# category_feature(encoder={"type": "passthrough", "vocab_size": 3}), +# category_feature(encoder={"type": "onehot", "vocab_size": 3}), +# ] +# if model_type == "ecd": +# image_dest_folder = os.path.join(tmpdir, "generated_images") +# audio_dest_folder = os.path.join(tmpdir, "generated_audio") +# input_features.extend( +# [ +# category_feature(encoder={"type": "dense", "vocab_size": 3}), +# sequence_feature(encoder={"vocab_size": 3}), +# text_feature(encoder={"vocab_size": 3}), +# vector_feature(), +# image_feature(image_dest_folder), +# audio_feature(audio_dest_folder), +# timeseries_feature(), +# date_feature(), +# date_feature(), +# h3_feature(), +# set_feature(encoder={"vocab_size": 3}), +# bag_feature(encoder={"vocab_size": 3}), +# ] +# ) +# +# output_features = [ +# category_feature(decoder={"vocab_size": 3}), +# ] +# if model_type == "ecd": +# output_features.extend( +# [ +# binary_feature(), +# number_feature(), +# set_feature(decoder={"vocab_size": 3}), +# vector_feature(), +# sequence_feature(decoder={"vocab_size": 3}), +# text_feature(decoder={"vocab_size": 3}), +# ] +# ) +# +# predictions_column_name = "{}_predictions".format(output_features[0]["name"]) +# +# # Generate test data +# data_csv_path = generate_data(input_features, output_features, data_csv_path) +# +# ############# +# # Train model +# ############# +# backend = LocalTestBackend() +# config = { +# "model_type": model_type, +# "input_features": input_features, +# "output_features": output_features, +# } +# if model_type == "ecd": +# config[TRAINER] = {"epochs": 2} +# else: +# # Disable feature filtering to avoid having no features due to small test dataset, +# # see https://stackoverflow.com/a/66405983/5222402 +# config[TRAINER] = {"num_boost_round": 2, "feature_pre_filter": False} +# ludwig_model = LudwigModel(config, backend=backend) +# ludwig_model.train( +# dataset=data_csv_path, +# skip_save_training_description=True, +# skip_save_training_statistics=True, +# skip_save_model=True, +# skip_save_progress=True, +# skip_save_log=True, +# skip_save_processed_input=True, +# ) +# +# ################### +# # save Ludwig model +# ################### +# ludwigmodel_path = os.path.join(dir_path, "ludwigmodel") +# shutil.rmtree(ludwigmodel_path, ignore_errors=True) +# ludwig_model.save(ludwigmodel_path) +# +# ################### +# # load Ludwig model +# ################### +# if should_load_model: +# ludwig_model = LudwigModel.load(ludwigmodel_path, backend=backend) +# +# ############################## +# # collect weight tensors names +# ############################## +# original_predictions_df, _ = ludwig_model.predict(dataset=data_csv_path) +# original_weights = deepcopy(list(ludwig_model.model.parameters())) +# original_weights = [t.cpu() for t in original_weights] +# +# # Move the model to CPU for tracing +# ludwig_model.model.cpu() +# +# ################# +# # save torchscript +# ################# +# torchscript_path = os.path.join(dir_path, "torchscript") +# shutil.rmtree(torchscript_path, ignore_errors=True) +# ludwig_model.model.save_torchscript(torchscript_path) +# +# ################################################### +# # load Ludwig model, obtain predictions and weights +# ################################################### +# ludwig_model = LudwigModel.load(ludwigmodel_path, backend=backend) +# loaded_prediction_df, _ = ludwig_model.predict(dataset=data_csv_path) +# loaded_weights = deepcopy(list(ludwig_model.model.parameters())) +# loaded_weights = [t.cpu() for t in loaded_weights] +# +# ##################################################### +# # restore torchscript, obtain predictions and weights +# ##################################################### +# training_set_metadata_json_fp = os.path.join(ludwigmodel_path, TRAIN_SET_METADATA_FILE_NAME) +# +# dataset, training_set_metadata = preprocess_for_prediction( +# ludwig_model.config_obj.to_dict(), +# dataset=data_csv_path, +# training_set_metadata=training_set_metadata_json_fp, +# include_outputs=False, +# backend=backend, +# ) +# +# restored_model = torch.jit.load(torchscript_path) +# +# # Check the outputs for one of the features for correctness +# # Here we choose the first output feature (categorical) +# of_name = list(ludwig_model.model.output_features.keys())[0] +# +# data_to_predict = { +# name: torch.from_numpy(dataset.dataset[feature.proc_column]) +# for name, feature in ludwig_model.model.input_features.items() +# } +# +# # Get predictions from restored torchscript. +# logits = restored_model(data_to_predict) +# restored_predictions = torch.argmax(output_feature_utils.get_output_feature_tensor(logits, of_name, "logits"), -1) +# +# restored_predictions = [training_set_metadata[of_name]["idx2str"][idx] for idx in restored_predictions] +# +# restored_weights = deepcopy(list(restored_model.parameters())) +# restored_weights = [t.cpu() for t in restored_weights] +# +# ############################################### +# # Check if weights and predictions are the same +# ############################################### +# +# # Check to weight values match the original model. +# assert utils.is_all_close(original_weights, loaded_weights) +# assert utils.is_all_close(original_weights, restored_weights) +# +# # Check that predictions are identical to the original model. +# assert np.all(original_predictions_df[predictions_column_name] == loaded_prediction_df[predictions_column_name]) +# +# assert np.all(original_predictions_df[predictions_column_name] == restored_predictions) +# +# +# @pytest.mark.integration_tests_e +# def test_torchscript_e2e_tabular(csv_filename, tmpdir): +# data_csv_path = os.path.join(tmpdir, csv_filename) +# # Configure features to be tested: +# bin_str_feature_input_feature = binary_feature() +# bin_str_feature_output_feature = binary_feature(output_feature=True) +# transformed_number_features = [ +# number_feature(preprocessing={"normalization": numeric_transformer}) +# for numeric_transformer in numeric_transformation_registry.keys() +# ] +# input_features = [ +# bin_str_feature_input_feature, +# binary_feature(), +# *transformed_number_features, +# number_feature(preprocessing={"outlier_strategy": "fill_with_mean"}), +# category_feature(encoder={"vocab_size": 3}), +# bag_feature(encoder={"vocab_size": 3}), +# set_feature(encoder={"vocab_size": 3}), +# vector_feature(), +# # TODO: future support +# # date_feature(), +# # h3_feature(), +# ] +# output_features = [ +# bin_str_feature_output_feature, +# binary_feature(output_feature=True), +# number_feature(), +# category_feature(decoder={"vocab_size": 3}), +# set_feature(decoder={"vocab_size": 3}), +# vector_feature(), +# sequence_feature(decoder={"vocab_size": 3}), +# text_feature(decoder={"vocab_size": 3}), +# ] +# backend = LocalTestBackend() +# config = { +# "input_features": input_features, +# "output_features": output_features, +# TRAINER: {"epochs": 2, BATCH_SIZE: 128}, +# } +# +# # Generate training data +# training_data_csv_path = generate_data(input_features, output_features, data_csv_path) +# +# # Convert bool values to strings, e.g., {'Yes', 'No'} +# df = pd.read_csv(training_data_csv_path) +# false_value, true_value = "No", "Yes" +# df[bin_str_feature_input_feature[NAME]] = df[bin_str_feature_input_feature[NAME]].map( +# lambda x: true_value if x else false_value +# ) +# df[bin_str_feature_output_feature[NAME]] = df[bin_str_feature_output_feature[NAME]].map( +# lambda x: true_value if x else false_value +# ) +# df.to_csv(training_data_csv_path) +# +# validate_torchscript_outputs(tmpdir, config, backend, training_data_csv_path) +# +# +# @pytest.mark.integration_tests_e +# def test_torchscript_e2e_binary_only(csv_filename, tmpdir): +# data_csv_path = os.path.join(tmpdir, csv_filename) +# +# input_features = [ +# binary_feature(), +# ] +# output_features = [ +# binary_feature(), +# ] +# backend = LocalTestBackend() +# config = { +# "input_features": input_features, +# "output_features": output_features, +# TRAINER: {"epochs": 2, BATCH_SIZE: 128}, +# } +# +# # Generate training data +# training_data_csv_path = generate_data(input_features, output_features, data_csv_path) +# +# validate_torchscript_outputs(tmpdir, config, backend, training_data_csv_path) +# +# +# @pytest.mark.integration_tests_e +# def test_torchscript_e2e_tabnet_combiner(csv_filename, tmpdir): +# data_csv_path = os.path.join(tmpdir, csv_filename) +# # Configure features to be tested: +# input_features = [ +# binary_feature(), +# number_feature(), +# category_feature(encoder={"vocab_size": 3}), +# bag_feature(encoder={"vocab_size": 3}), +# set_feature(encoder={"vocab_size": 3}), +# ] +# output_features = [ +# binary_feature(), +# number_feature(), +# category_feature(decoder={"vocab_size": 3}), +# ] +# backend = LocalTestBackend() +# config = { +# "input_features": input_features, +# "output_features": output_features, +# COMBINER: { +# "type": "tabnet", +# "num_total_blocks": 2, +# "num_shared_blocks": 2, +# }, +# TRAINER: {"epochs": 2, BATCH_SIZE: 128}, +# } +# +# # Generate training data +# training_data_csv_path = generate_data(input_features, output_features, data_csv_path) +# +# validate_torchscript_outputs(tmpdir, config, backend, training_data_csv_path) +# +# +# @pytest.mark.integration_tests_e +# def test_torchscript_e2e_audio(csv_filename, tmpdir): +# data_csv_path = os.path.join(tmpdir, csv_filename) +# audio_dest_folder = os.path.join(tmpdir, "generated_audio") +# +# input_features = [ +# audio_feature(audio_dest_folder), +# ] +# output_features = [ +# binary_feature(), +# ] +# backend = LocalTestBackend() +# config = { +# "input_features": input_features, +# "output_features": output_features, +# TRAINER: {"epochs": 2, BATCH_SIZE: 128}, +# } +# training_data_csv_path = generate_data(input_features, output_features, data_csv_path) +# +# # NOTE: audio preprocessing mismatches by very small margins ~O(1e-6) but causes flakiness in e2e test. +# # Increasing tolerance is a workaround to reduce flakiness for now. +# # TODO: remove this workaround when audio preprocessing is fixed. +# validate_torchscript_outputs(tmpdir, config, backend, training_data_csv_path, tolerance=1e-6) +# +# +# @pytest.mark.parametrize( +# "kwargs", +# [ +# {"encoder": {"type": "stacked_cnn"}}, # Ludwig custom encoder +# {"encoder": {"type": "alexnet", "use_pretrained": False}}, # TorchVision pretrained model encoder +# ], +# ) +# @pytest.mark.integration_tests_e +# def test_torchscript_e2e_image(tmpdir, csv_filename, kwargs): +# data_csv_path = os.path.join(tmpdir, csv_filename) +# image_dest_folder = os.path.join(tmpdir, "generated_images") +# input_features = [ +# image_feature(image_dest_folder, **kwargs), +# ] +# output_features = [ +# binary_feature(), +# ] +# backend = LocalTestBackend() +# config = { +# "input_features": input_features, +# "output_features": output_features, +# TRAINER: {"epochs": 2, BATCH_SIZE: 128}, +# } +# training_data_csv_path = generate_data(input_features, output_features, data_csv_path) +# +# validate_torchscript_outputs(tmpdir, config, backend, training_data_csv_path) +# TODO: ALEX @pytest.mark.integration_tests_e