Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge multiple Dockerfiles into a single one #2167

Open
wants to merge 23 commits into
base: staging
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 32 additions & 25 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -1,22 +1,21 @@
{
"name": "Recommenders",
// Version list: https://github.com/devcontainers/images/tree/main/src/base-ubuntu
// Includes: curl, wget, ca-certificates, git, Oh My Zsh!,
"image": "mcr.microsoft.com/devcontainers/base:ubuntu-24.04",
"hostRequirements": {
"cpus": 4,
"memory": "16gb",
"storage": "32gb"
},
"features": {
// https://github.com/devcontainers/features/blob/main/src/anaconda/devcontainer-feature.json
"ghcr.io/devcontainers/features/anaconda:1": {
"version": "2024.06-1"
"build": {
"dockerfile": "../tools/docker/Dockerfile",
"context": "..",
"target": "deps",
"args": {
"COMPUTE": "cpu",
"EXTRAS": "[dev,spark]",
"GIT_REF": "",
"JDK_VERSION": "21",
"PYTHON_VERSION": "3.11"
}
},
"customizations": {
"vscode": {
// Set *default* container specific settings.json values on container create.
// Set default container specific settings.json values on container
// create
"settings": {
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter",
Expand All @@ -27,24 +26,32 @@
},
"isort.args": ["--profile", "black"],
"python.analysis.autoImportCompletions": true,
"python.defaultInterpreterPath": "/usr/local/conda/envs/Recommenders/bin/python",
// Conda env name *must* align with ENV_HOME in Dockerfle
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think, as Miguel suggested, that it would be good to have the instructions in a SETUP page somewhere as well.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure

"python.defaultInterpreterPath": "/root/conda/envs/Recommenders/bin/python",
"python.testing.pytestEnabled": true,
// set the directory where all tests are
// Test directory
"python.testing.pytestArgs": ["tests"]
},
// Add the IDs of extensions you want installed when the container is created.
// VS Code extensions to install on container create
"extensions": [
"ms-python.black-formatter", // https://marketplace.visualstudio.com/items?itemName=ms-python.black-formatter
"ms-python.isort", // https://marketplace.visualstudio.com/items?itemName=ms-python.isort
"ms-python.mypy-type-checker", // https://marketplace.visualstudio.com/items?itemName=ms-python.mypy-type-checker
"ms-python.pylint", // https://marketplace.visualstudio.com/items?itemName=ms-python.pylint
"ms-python.python", // https://marketplace.visualstudio.com/items?itemName=ms-python.python
"ms-toolsai.datawrangler", // https://marketplace.visualstudio.com/items?itemName=ms-toolsai.datawrangler
"ms-toolsai.jupyter" // https://marketplace.visualstudio.com/items?itemName=ms-toolsai.jupyter
// https://marketplace.visualstudio.com/items?itemName=ms-python.black-formatter
"ms-python.black-formatter",
// https://marketplace.visualstudio.com/items?itemName=ms-python.isort
"ms-python.isort",
// https://marketplace.visualstudio.com/items?itemName=ms-python.mypy-type-checker
"ms-python.mypy-type-checker",
// https://marketplace.visualstudio.com/items?itemName=ms-python.pylint
"ms-python.pylint",
// https://marketplace.visualstudio.com/items?itemName=ms-python.python
"ms-python.python",
// https://marketplace.visualstudio.com/items?itemName=ms-toolsai.datawrangler
SimonYansenZhao marked this conversation as resolved.
Show resolved Hide resolved
"ms-toolsai.datawrangler",
// https://marketplace.visualstudio.com/items?itemName=ms-toolsai.jupyter
"ms-toolsai.jupyter"
]
}
},

// Use 'postCreateCommand' to run commands after the container is created.
"postCreateCommand": "conda create -n Recommenders -c conda-forge -y python=3.10 openjdk=21 pip && conda init bash && bash -c -i 'conda activate Recommenders && pip install -e .[dev,spark]' && conda config --set auto_activate_base false"
// Install Recommenders in development mode after container create
"postCreateCommand": "pip install -e .[dev,spark]"
}
110 changes: 34 additions & 76 deletions tests/ci/azureml_tests/aml_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,15 @@
* https://learn.microsoft.com/en-us/azure/machine-learning/reference-migrate-sdk-v1-mlflow-tracking?view=azureml-api-2&tabs=aml%2Ccli%2Cmlflow
"""
import pathlib
import tempfile
import re

from azure.ai.ml import MLClient, command
from azure.ai.ml.entities import AmlCompute, BuildContext, Environment, Workspace
from azure.ai.ml.exceptions import JobException
from azure.core.exceptions import ResourceExistsError
from azure.identity import DefaultAzureCredential


def get_client(subscription_id, resource_group, workspace_name):
"""
Get the client with specified AzureML workspace, or create one if not existing.
Expand Down Expand Up @@ -61,9 +62,8 @@ def get_or_create_environment(
environment_name,
use_gpu,
use_spark,
conda_pkg_jdk,
conda_openjdk_version,
python_version,
commit_sha,
):
"""
AzureML requires the run environment to be setup prior to submission.
Expand All @@ -77,81 +77,39 @@ def get_or_create_environment(
added to the conda environment, else False
use_spark (bool): True if PySpark packages should be
added to the conda environment, else False
conda_pkg_jdk (str): "openjdk=8" by default
python_version (str): python version, such as "3.9"
commit_sha (str): the commit that triggers the workflow
conda_openjdk_version (str): "21" by default
python_version (str): python version, such as "3.11"
"""
conda_env_name = "reco"
conda_env_yml = "environment.yml"
condafile = fr"""
name: {conda_env_name}
channels:
- conda-forge
dependencies:
- python={python_version}
- {conda_pkg_jdk}
- pip
- pip:
- recommenders[dev{",gpu" if use_gpu else ""}{",spark" if use_spark else ""}]@git+https://github.com/recommenders-team/recommenders.git@{commit_sha}
"""
# See https://github.com/Azure/AzureML-Containers/blob/master/base/cpu/openmpi4.1.0-ubuntu22.04
image = "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu22.04"
# See https://github.com/Azure/AzureML-Containers/blob/master/base/gpu/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04
dockerfile = fr"""# syntax=docker/dockerfile:1
FROM nvcr.io/nvidia/cuda:12.5.1-devel-ubuntu22.04
SHELL ["/bin/bash", "-c"]
USER root:root
ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
ENV DEBIAN_FRONTEND noninteractive
RUN apt-get update && \
apt-get install -y wget git-all && \
apt-get clean -y && \
rm -rf /var/lib/apt/lists/*

# Install Conda
ENV CONDA_PREFIX /opt/miniconda
RUN wget -qO /tmp/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-py311_24.5.0-0-Linux-x86_64.sh && \
bash /tmp/miniconda.sh -bf -p ${{CONDA_PREFIX}} && \
${{CONDA_PREFIX}}/bin/conda update --all -c conda-forge -y && \
${{CONDA_PREFIX}}/bin/conda clean -ay && \
rm -rf ${{CONDA_PREFIX}}/pkgs && \
rm /tmp/miniconda.sh && \
find / -type d -name __pycache__ | xargs rm -rf

# Create Conda environment
COPY {conda_env_yml} /tmp/{conda_env_yml}
RUN ${{CONDA_PREFIX}}/bin/conda env create -f /tmp/{conda_env_yml}

# Activate Conda environment
ENV CONDA_DEFAULT_ENV {conda_env_name}
ENV CONDA_PREFIX ${{CONDA_PREFIX}}/envs/${{CONDA_DEFAULT_ENV}}
ENV PATH="${{CONDA_PREFIX}}/bin:${{PATH}}" LD_LIBRARY_PATH="${{CONDA_PREFIX}}/lib:$LD_LIBRARY_PATH"
"""

with tempfile.TemporaryDirectory() as tmpdir:
tmpdir = pathlib.Path(tmpdir)
dockerfile_path = tmpdir / "Dockerfile"
condafile_path = tmpdir / conda_env_yml
build = BuildContext(path=tmpdir, dockerfile_path=dockerfile_path.name)

with open(dockerfile_path, "w") as file:
file.write(dockerfile)
with open(condafile_path, "w") as file:
file.write(condafile)

try:
client.environments.create_or_update(
Environment(
name=environment_name,
image=None if use_gpu else image,
build=build if use_gpu else None,
conda_file=None if use_gpu else condafile_path,
)
compute = "gpu" if use_gpu else "cpu"
extras = (
"[dev" + (",gpu" if use_gpu else "") + (",spark" if use_spark else "") + "]"
)
dockerfile = pathlib.Path("tools/docker/Dockerfile")

# Docker's --build-args is not supported by AzureML Python SDK v2 as shown
# in [the issue #33902](https://github.com/Azure/azure-sdk-for-python/issues/33902)
# so the build args are configured by regex substituion
text = dockerfile.read_text()
text = re.sub(r"(ARG\sCOMPUTE=).*", rf'\1"{compute}"', text)
text = re.sub(r"(ARG\sGIT_REF=).*", r'\1""', text)
text = re.sub(r"(ARG\sEXTRAS=).*", rf'\1"{extras}"', text)
text = re.sub(r"(ARG\sPYTHON_VERSION=).*", rf'\1"{python_version}"', text)
text = re.sub(r"(ARG\sJDK_VERSION=).*", rf'\1"{conda_openjdk_version}"', text)
dockerfile.write_text(text)

try:
client.environments.create_or_update(
Environment(
name=environment_name,
build=BuildContext(
# Set path for Docker to access to Recommenders root
path=".",
dockerfile_path=dockerfile,
),
)
except ResourceExistsError:
pass
)
except ResourceExistsError:
pass


def run_tests(
Expand Down
28 changes: 12 additions & 16 deletions tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,15 +108,15 @@ def parse_args():
help="Environment name on AzureML",
)
parser.add_argument(
"--conda_pkg_jdk",
"--conda-openjdk-version",
action="store",
default="openjdk=8",
help="Conda package for JDK",
default="21",
help="Conda OpenJDK package version",
)
parser.add_argument(
"--python-version",
action="store",
default="3.8",
default="3.11",
help="Python version",
)
parser.add_argument(
Expand All @@ -133,19 +133,16 @@ def parse_args():
logger = logging.getLogger("submit_groupwise_azureml_pytest.py")
args = parse_args()

logger.info(f"Setting up workspace {args.ws}")
logger.info("Setting up workspace %s", args.ws)
miguelgfierro marked this conversation as resolved.
Show resolved Hide resolved
client = get_client(
subscription_id=args.subid,
resource_group=args.rg,
workspace_name=args.ws,
)

logger.info(f"Setting up compute {args.cluster}")
logger.info("Setting up compute %s", args.cluster)
create_or_start_compute(
client=client,
name=args.cluster,
size=args.vmsize,
max_instances=args.maxnodes
client=client, name=args.cluster, size=args.vmsize, max_instances=args.maxnodes
)

# TODO: Unlike Azure DevOps pipelines, GitHub Actions only has simple
Expand All @@ -159,19 +156,18 @@ def parse_args():
# * on AzureML
# recommenders-unit-group_cpu_001-python3_8-c8adeafabc011b549f875dc145313ffbe3fc53a8
environment_name = correct_resource_name(args.envname)
logger.info(f"Setting up environment {environment_name}")
logger.info("Setting up environment %s", environment_name)
get_or_create_environment(
client=client,
environment_name=environment_name,
use_gpu=True if "gpu" in args.testgroup else False,
use_spark=True if "spark" in args.testgroup else False,
conda_pkg_jdk=args.conda_pkg_jdk,
use_gpu="gpu" in args.testgroup,
use_spark="spark" in args.testgroup,
conda_openjdk_version=args.conda_openjdk_version,
python_version=args.python_version,
commit_sha=args.sha,
)

experiment_name = correct_resource_name(args.expname)
logger.info(f"Running experiment {experiment_name}")
logger.info("Running experiment %s", experiment_name)
run_tests(
client=client,
compute=args.cluster,
Expand Down
Loading
Loading