Skip to content

Commit

Permalink
Update (base update)
Browse files Browse the repository at this point in the history
[ghstack-poisoned]
  • Loading branch information
wconstab committed May 18, 2024
0 parents commit 513dd94
Show file tree
Hide file tree
Showing 75 changed files with 52,890 additions and 0 deletions.
22 changes: 22 additions & 0 deletions .ci/docker/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Docker images for TorchTitan CI

This directory contains everything needed to build the Docker images
that are used in TorchTitan CI. The content of this directory are copied
from PyTorch CI https://github.com/pytorch/pytorch/tree/main/.ci/docker.
It also uses the same directory structure as PyTorch.

## Contents

* `build.sh` -- dispatch script to launch all builds
* `common` -- scripts used to execute individual Docker build stages
* `ubuntu` -- Dockerfile for Ubuntu image for CPU build and test jobs

## Usage

```bash
# Generic usage
./build.sh "${IMAGE_NAME}" "${DOCKER_BUILD_PARAMETERS}"

# Build a specific image
./build.sh torchtitan-ubuntu-20.04-clang12 -t myimage:latest
```
39 changes: 39 additions & 0 deletions .ci/docker/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set -exu

IMAGE_NAME="$1"
shift

echo "Building ${IMAGE_NAME} Docker image"

OS=ubuntu
OS_VERSION=20.04
CLANG_VERSION=""
PYTHON_VERSION=3.11
MINICONDA_VERSION=24.3.0-0

case "${IMAGE_NAME}" in
torchtitan-ubuntu-20.04-clang12)
CLANG_VERSION=12
;;
*)
echo "Invalid image name ${IMAGE_NAME}"
exit 1
esac

docker build \
--no-cache \
--progress=plain \
--build-arg "OS_VERSION=${OS_VERSION}" \
--build-arg "CLANG_VERSION=${CLANG_VERSION}" \
--build-arg "PYTHON_VERSION=${PYTHON_VERSION}" \
--build-arg "MINICONDA_VERSION=${MINICONDA_VERSION}" \
-f "${OS}"/Dockerfile \
"$@" \
.
44 changes: 44 additions & 0 deletions .ci/docker/common/install_base.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set -ex

install_ubuntu() {
apt-get update

apt-get install -y --no-install-recommends \
build-essential \
ca-certificates \
curl \
git \
wget \
sudo \
vim \
jq \
vim \
unzip \
gdb \
rsync \
libssl-dev \
zip

# Cleanup package manager
apt-get autoclean && apt-get clean
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
}

# Install base packages depending on the base OS
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
case "$ID" in
ubuntu)
install_ubuntu
;;
*)
echo "Unable to determine OS..."
exit 1
;;
esac
42 changes: 42 additions & 0 deletions .ci/docker/common/install_clang.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set -ex

install_ubuntu() {
apt-get update

apt-get install -y --no-install-recommends clang-"$CLANG_VERSION"
apt-get install -y --no-install-recommends llvm-"$CLANG_VERSION"
# Also require LLD linker from llvm and libomp to build PyTorch from source
apt-get install -y lld "libomp-${CLANG_VERSION}-dev"

# Use update-alternatives to make this version the default
update-alternatives --install /usr/bin/clang clang /usr/bin/clang-"$CLANG_VERSION" 50
update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-"$CLANG_VERSION" 50
# Override cc/c++ to clang as well
update-alternatives --install /usr/bin/cc cc /usr/bin/clang 50
update-alternatives --install /usr/bin/c++ c++ /usr/bin/clang++ 50

# Cleanup package manager
apt-get autoclean && apt-get clean
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
}

if [ -n "$CLANG_VERSION" ]; then
# Install base packages depending on the base OS
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
case "$ID" in
ubuntu)
install_ubuntu
;;
*)
echo "Unable to determine OS..."
exit 1
;;
esac
fi
64 changes: 64 additions & 0 deletions .ci/docker/common/install_conda.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set -ex

# shellcheck source=/dev/null
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"

install_miniconda() {
BASE_URL="https://repo.anaconda.com/miniconda"
CONDA_FILE="Miniconda3-py${PYTHON_VERSION//./}_${MINICONDA_VERSION}-Linux-x86_64.sh"

mkdir -p /opt/conda
chown ci-user:ci-user /opt/conda

pushd /tmp
wget -q "${BASE_URL}/${CONDA_FILE}"
# Install miniconda
as_ci_user bash "${CONDA_FILE}" -b -f -p "/opt/conda"
# Clean up the download file
rm "${CONDA_FILE}"
popd

sed -e 's|PATH="\(.*\)"|PATH="/opt/conda/bin:\1"|g' -i /etc/environment
export PATH="/opt/conda/bin:$PATH"
}

install_python() {
pushd /opt/conda
# Install the correct Python version
as_ci_user conda create -n "py_${PYTHON_VERSION}" -y --file /opt/conda/conda-env-ci.txt python="${PYTHON_VERSION}"
popd
}

install_pip_dependencies() {
pushd /opt/conda
# Install all Python dependencies
pip_install -r /opt/conda/dev-requirements.txt
pip_install -r /opt/conda/requirements.txt
popd
}

fix_conda_ubuntu_libstdcxx() {
cat /etc/issue
# WARNING: This is a HACK from PyTorch core to be able to build PyTorch on 22.04.
# Specifically, ubuntu-20+ all comes lib libstdc++ newer than 3.30+, but anaconda
# is stuck with 3.29. So, remove libstdc++6.so.3.29 as installed by
# https://anaconda.org/anaconda/libstdcxx-ng/files?version=11.2.0
#
# PyTorch sev: https://github.com/pytorch/pytorch/issues/105248
# Ref: https://github.com/pytorch/pytorch/blob/main/.ci/docker/common/install_conda.sh
if grep -e "2[02].04." /etc/issue >/dev/null; then
rm "/opt/conda/envs/py_${PYTHON_VERSION}/lib/libstdc++.so.6"
fi
}

install_miniconda
install_python
install_pip_dependencies
fix_conda_ubuntu_libstdcxx
22 changes: 22 additions & 0 deletions .ci/docker/common/install_gcc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set -ex

if [ -n "$GCC_VERSION" ]; then

apt-get update
apt-get install -y g++-"$GCC_VERSION"
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-"$GCC_VERSION" 50
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-"$GCC_VERSION" 50
update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-"$GCC_VERSION" 50

# Cleanup package manager
apt-get autoclean && apt-get clean
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*

fi
24 changes: 24 additions & 0 deletions .ci/docker/common/install_user.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set -ex

# Same as ec2-user
echo "ci-user:x:1000:1000::/var/lib/ci-user:" >> /etc/passwd
echo "ci-user:x:1000:" >> /etc/group
# Needed on Focal or newer
echo "ci-user:*:19110:0:99999:7:::" >> /etc/shadow

# Create $HOME
mkdir -p /var/lib/ci-user
chown ci-user:ci-user /var/lib/ci-user

# Allow sudo
echo 'ci-user ALL=(ALL) NOPASSWD:ALL' > /etc/sudoers.d/ci-user

# Test that sudo works
sudo -u ci-user sudo -v
29 changes: 29 additions & 0 deletions .ci/docker/common/utils.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

as_ci_user() {
# NB: unsetting the environment variables works around a conda bug
# https://github.com/conda/conda/issues/6576
# NB: Pass on PATH and LD_LIBRARY_PATH to sudo invocation
# NB: This must be run from a directory that the user has access to
sudo -E -H -u ci-user env -u SUDO_UID -u SUDO_GID -u SUDO_COMMAND -u SUDO_USER env "PATH=${PATH}" "LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}" "$@"
}

conda_install() {
# Ensure that the install command don't upgrade/downgrade Python
# This should be called as
# conda_install pkg1 pkg2 ... [-c channel]
as_ci_user conda install -q -n "py_${PYTHON_VERSION}" -y python="${PYTHON_VERSION}" "$@"
}

conda_run() {
as_ci_user conda run -n "py_${PYTHON_VERSION}" --no-capture-output "$@"
}

pip_install() {
as_ci_user conda run -n "py_${PYTHON_VERSION}" pip install --progress-bar off "$@"
}
2 changes: 2 additions & 0 deletions .ci/docker/conda-env-ci.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
cmake=3.22.1
ninja=1.10.2
3 changes: 3 additions & 0 deletions .ci/docker/dev-requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pytest
pytest-cov
pre-commit
7 changes: 7 additions & 0 deletions .ci/docker/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
torch >= 2.2.0.dev
datasets
tomli >= 1.1.0 ; python_version < "3.11"
tensorboard
sentencepiece
tiktoken
blobfile
40 changes: 40 additions & 0 deletions .ci/docker/ubuntu/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
ARG OS_VERSION

FROM nvidia/cuda:12.1.0-cudnn8-runtime-ubuntu${OS_VERSION}

ARG OS_VERSION

ENV DEBIAN_FRONTEND noninteractive

# Install common dependencies
COPY ./common/install_base.sh install_base.sh
RUN bash ./install_base.sh && rm install_base.sh

# Install clang
ARG CLANG_VERSION
COPY ./common/install_clang.sh install_clang.sh
RUN bash ./install_clang.sh && rm install_clang.sh

# Install gcc
ARG GCC_VERSION
COPY ./common/install_gcc.sh install_gcc.sh
RUN bash ./install_gcc.sh && rm install_gcc.sh

# Setup user
COPY ./common/install_user.sh install_user.sh
RUN bash ./install_user.sh && rm install_user.sh

# Install conda and other dependencies
ARG MINICONDA_VERSION
ARG PYTHON_VERSION
ENV PYTHON_VERSION=$PYTHON_VERSION
ENV PATH /opt/conda/envs/py_$PYTHON_VERSION/bin:/opt/conda/bin:$PATH
COPY dev-requirements.txt /opt/conda/
COPY requirements.txt /opt/conda/
COPY conda-env-ci.txt /opt/conda/
COPY ./common/install_conda.sh install_conda.sh
COPY ./common/utils.sh utils.sh
RUN bash ./install_conda.sh && rm install_conda.sh utils.sh /opt/conda/dev-requirements.txt /opt/conda/requirements.txt /opt/conda/conda-env-ci.txt

USER ci-user
CMD ["bash"]
28 changes: 28 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
[flake8]
# Suggested config from pytorch that we can adapt
select = B,C,E,F,N,P,T4,W,B9,TOR0,TOR1,TOR2
max-line-length = 120
# C408 ignored because we like the dict keyword argument syntax
# E501 is not flexible enough, we're using B950 instead
# N812 ignored because import torch.nn.functional as F is PyTorch convention
# N817 ignored because importing using acronyms is convention (DistributedDataParallel as DDP)
# E731 allow usage of assigning lambda expressions
# N803,N806 allow caps and mixed case in function params. This is to work with Triton kernel coding style.
ignore =
E203,E305,E402,E501,E721,E741,F405,F821,F841,F999,W503,W504,C408,E302,W291,E303,N812,N817,E731,N803,N806
# shebang has extra meaning in fbcode lints, so I think it's not worth trying
# to line this up with executable bit
EXE001,
# these ignores are from flake8-bugbear; please fix!
B007,B008,
optional-ascii-coding = True
exclude =
./.git,
./docs
./build
./scripts,
./venv,
*.pyi
.pre-commit-config.yaml
*.md
.flake8
Loading

0 comments on commit 513dd94

Please sign in to comment.