Skip to content

Commit

Permalink
fix: remove deprecated distutils (#4837)
Browse files Browse the repository at this point in the history
* fix: remove deprecated distutils imports by local mode

* fix: remove deprecated distutils api calls in workflows

Also remove from tests.

Closes #4534

* fix pytorch

* use latest py_version

* fix format

* use latest default sklearn

* unit test

* unit test

---------

Co-authored-by: Justin Mahlik <[email protected]>
  • Loading branch information
benieric and jmahlik authored Sep 30, 2024
1 parent ec89f7d commit 92e493c
Show file tree
Hide file tree
Showing 11 changed files with 26 additions and 34 deletions.
2 changes: 1 addition & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ ignore-mixin-members=yes
# (useful for modules/projects where namespaces are manipulated during runtime
# and thus existing member attributes cannot be deduced by static analysis. It
# supports qualified module names, as well as Unix pattern matching.
ignored-modules=distutils
ignored-modules=

# List of class names for which member attributes should not be checked (useful
# for classes with dynamically set attributes). This supports the use of
Expand Down
3 changes: 1 addition & 2 deletions src/sagemaker/local/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
import tarfile
import tempfile

from distutils.spawn import find_executable
from threading import Thread
from typing import Dict, List
from six.moves.urllib.parse import urlparse
Expand Down Expand Up @@ -170,7 +169,7 @@ def _get_compose_cmd_prefix():
compose_cmd_prefix.extend(["docker", "compose"])
return compose_cmd_prefix

if find_executable("docker-compose") is not None:
if shutil.which("docker-compose") is not None:
logger.info("'Docker Compose' found using Docker Compose CLI.")
compose_cmd_prefix.extend(["docker-compose"])
return compose_cmd_prefix
Expand Down
5 changes: 2 additions & 3 deletions src/sagemaker/local/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import re
import errno

from distutils.dir_util import copy_tree
from six.moves.urllib.parse import urlparse

from sagemaker import s3
Expand Down Expand Up @@ -102,7 +101,7 @@ def move_to_destination(source, destination, job_name, sagemaker_session, prefix


def recursive_copy(source, destination):
"""A wrapper around distutils.dir_util.copy_tree.
"""A wrapper around shutil.copy_tree.
This won't throw any exception when the source directory does not exist.
Expand All @@ -111,7 +110,7 @@ def recursive_copy(source, destination):
destination (str): destination path
"""
if os.path.isdir(source):
copy_tree(source, destination)
shutil.copytree(source, destination, dirs_exist_ok=True)


def kill_child_processes(pid):
Expand Down
9 changes: 1 addition & 8 deletions src/sagemaker/workflow/_repack_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,6 @@
# is unpacked for inference, the custom entry point will be used.
# Reference: https://docs.aws.amazon.com/sagemaker/latest/dg/amazon-sagemaker-toolkits.html

# distutils.dir_util.copy_tree works way better than the half-baked
# shutil.copytree which bombs on previously existing target dirs...
# alas ... https://bugs.python.org/issue10948
# we'll go ahead and use the copy_tree function anyways because this
# repacking is some short-lived hackery, right??
from distutils.dir_util import copy_tree

from os.path import abspath, realpath, dirname, normpath, join as joinpath

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -188,7 +181,7 @@ def repack(inference_script, model_archive, dependencies=None, source_dir=None):

# copy the "src" dir, which includes the previous training job's model and the
# custom inference script, to the output of this training job
copy_tree(src_dir, "/opt/ml/model")
shutil.copytree(src_dir, "/opt/ml/model", dirs_exist_ok=True)


if __name__ == "__main__": # pragma: no cover
Expand Down
2 changes: 1 addition & 1 deletion src/sagemaker/workflow/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@

logger = logging.getLogger(__name__)

FRAMEWORK_VERSION = "0.23-1"
FRAMEWORK_VERSION = "1.2-1"
INSTANCE_TYPE = "ml.m5.large"
REPACK_SCRIPT = "_repack_model.py"
REPACK_SCRIPT_LAUNCHER = "_repack_script_launcher.sh"
Expand Down
9 changes: 1 addition & 8 deletions tests/data/_repack_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,6 @@
# is unpacked for inference, the custom entry point will be used.
# Reference: https://docs.aws.amazon.com/sagemaker/latest/dg/amazon-sagemaker-toolkits.html

# distutils.dir_util.copy_tree works way better than the half-baked
# shutil.copytree which bombs on previously existing target dirs...
# alas ... https://bugs.python.org/issue10948
# we'll go ahead and use the copy_tree function anyways because this
# repacking is some short-lived hackery, right??
from distutils.dir_util import copy_tree


def repack(inference_script, model_archive, dependencies=None, source_dir=None): # pragma: no cover
"""Repack custom dependencies and code into an existing model TAR archive
Expand Down Expand Up @@ -92,7 +85,7 @@ def repack(inference_script, model_archive, dependencies=None, source_dir=None):

# copy the "src" dir, which includes the previous training job's model and the
# custom inference script, to the output of this training job
copy_tree(src_dir, "/opt/ml/model")
shutil.copytree(src_dir, "/opt/ml/model", dirs_exist_ok=True)


if __name__ == "__main__": # pragma: no cover
Expand Down
6 changes: 4 additions & 2 deletions tests/integ/sagemaker/workflow/test_retry.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,8 @@ def test_model_registration_with_model_repack(
role,
pipeline_name,
region_name,
pytorch_training_latest_version,
pytorch_training_latest_py_version,
):
base_dir = os.path.join(DATA_DIR, "pytorch_mnist")
entry_point = os.path.join(base_dir, "mnist.py")
Expand All @@ -166,8 +168,8 @@ def test_model_registration_with_model_repack(
pytorch_estimator = PyTorch(
entry_point=entry_point,
role=role,
framework_version="1.5.0",
py_version="py3",
framework_version=pytorch_training_latest_version,
py_version=pytorch_training_latest_py_version,
instance_count=instance_count,
instance_type=instance_type,
sagemaker_session=pipeline_session,
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/sagemaker/local/test_local_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def test_get_compose_cmd_prefix_with_docker_cli():
"subprocess.check_output",
side_effect=subprocess.CalledProcessError(returncode=1, cmd="docker compose version"),
)
@patch("sagemaker.local.image.find_executable", Mock(return_value="/usr/bin/docker-compose"))
@patch("sagemaker.local.image.shutil.which", Mock(return_value="/usr/bin/docker-compose"))
def test_get_compose_cmd_prefix_with_docker_compose_cli(check_output):
compose_cmd_prefix = _SageMakerContainer._get_compose_cmd_prefix()
assert compose_cmd_prefix == ["docker-compose"]
Expand All @@ -170,7 +170,7 @@ def test_get_compose_cmd_prefix_with_docker_compose_cli(check_output):
"subprocess.check_output",
side_effect=subprocess.CalledProcessError(returncode=1, cmd="docker compose version"),
)
@patch("sagemaker.local.image.find_executable", Mock(return_value=None))
@patch("sagemaker.local.image.shutil.which", Mock(return_value=None))
def test_get_compose_cmd_prefix_raises_import_error(check_output):
with pytest.raises(ImportError) as e:
_SageMakerContainer._get_compose_cmd_prefix()
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/sagemaker/local/test_local_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,11 @@ def test_move_to_destination_illegal_destination():


@patch("sagemaker.local.utils.os.path")
@patch("sagemaker.local.utils.copy_tree")
@patch("sagemaker.local.utils.shutil.copytree")
def test_recursive_copy(copy_tree, m_os_path):
m_os_path.isdir.return_value = True
sagemaker.local.utils.recursive_copy("source", "destination")
copy_tree.assert_called_with("source", "destination")
copy_tree.assert_called_with("source", "destination", dirs_exist_ok=True)


@patch("sagemaker.local.utils.os")
Expand Down
5 changes: 2 additions & 3 deletions tests/unit/sagemaker/workflow/test_step_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@

MODEL_NAME = "gisele"
MODEL_REPACKING_IMAGE_URI = (
"246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-scikit-learn:0.23-1-cpu-py3"
"246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-scikit-learn:1.2-1-cpu-py3"
)


Expand Down Expand Up @@ -1219,8 +1219,7 @@ def test_estimator_transformer_with_model_repack_with_estimator(estimator, sourc
assert arguments == {
"AlgorithmSpecification": {
"TrainingInputMode": "File",
"TrainingImage": "246618743249.dkr.ecr.us-west-2.amazonaws.com/"
+ "sagemaker-scikit-learn:0.23-1-cpu-py3",
"TrainingImage": MODEL_REPACKING_IMAGE_URI,
},
"ProfilerConfig": {"DisableProfiler": True},
"OutputDataConfig": {"S3OutputPath": "s3://my-bucket/"},
Expand Down
11 changes: 9 additions & 2 deletions tests/unit/test_chainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import logging
import json
import os
from distutils.util import strtobool

import pytest
from mock import MagicMock, Mock, ANY
Expand Down Expand Up @@ -174,7 +173,15 @@ def test_additional_hyperparameters(sagemaker_session, chainer_version, chainer_
framework_version=chainer_version,
py_version=chainer_py_version,
)
assert bool(strtobool(chainer.hyperparameters()["sagemaker_use_mpi"]))

assert chainer.hyperparameters()["sagemaker_use_mpi"].lower() in (
"y",
"yes",
"t",
"true",
"on",
"1",
)
assert int(chainer.hyperparameters()["sagemaker_num_processes"]) == 4
assert int(chainer.hyperparameters()["sagemaker_process_slots_per_host"]) == 10
assert (
Expand Down

0 comments on commit 92e493c

Please sign in to comment.