Skip to content

Commit

Permalink
ROCm support
Browse files Browse the repository at this point in the history
  • Loading branch information
nazar-pc committed Oct 8, 2024
1 parent c43e2a3 commit d1eb6c0
Show file tree
Hide file tree
Showing 15 changed files with 855 additions and 16 deletions.
54 changes: 53 additions & 1 deletion .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ jobs:
cargo-clippy:
strategy:
matrix:
os: ${{ fromJson(github.repository_owner == 'autonomys' && '[["self-hosted", "ubuntu-20.04-x86-64"], ["self-hosted", "macos-14-arm64"], ["self-hosted", "windows-server-2022-x86-64"]]' || '["ubuntu-22.04", "macos-14", "windows-2022"]') }}
os: ${{ fromJson(github.repository_owner == 'autonomys' && '[["self-hosted", "ubuntu-20.04-x86-64"], ["self-hosted", "macos-14-arm64"], ["self-hosted", "windows-server-2022-x86-64"]]' || '["ubuntu-20.04", "macos-14", "windows-2022"]') }}

runs-on: ${{ matrix.os }}

Expand Down Expand Up @@ -104,6 +104,42 @@ jobs:
sub-packages: '["nvcc", "cudart"]'
if: runner.os == 'Linux' || runner.os == 'Windows'

# TODO: ROCm compilation doesn't work in CI right now, good luck fixing it
# - name: Configure ROCm cache (Windows)
# uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
# id: rocm-cache
# with:
# path: C:\Program Files\AMD\ROCm
# key: ${{ runner.os }}-rocm
# if: runner.os == 'Windows'

- name: ROCm toolchain
run: |
ROCM_VERSION=6.2.2
sudo mkdir -p --mode=0755 /etc/apt/keyrings
curl -L https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/$ROCM_VERSION focal main" | sudo tee /etc/apt/sources.list.d/rocm.list > /dev/null
echo "Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600" | sudo tee /etc/apt/preferences.d/rocm-pin-600 > /dev/null
sudo apt-get update
DEBIAN_FRONTEND=noninteractive sudo apt-get install -y --no-install-recommends rocm-hip-runtime-dev
echo "/opt/rocm/lib" | sudo tee /etc/ld.so.conf.d/rocm.conf > /dev/null
sudo ldconfig
if: runner.os == 'Linux'

# TODO: ROCm compilation doesn't work in CI right now, good luck fixing it
# - name: ROCm toolchain
# run: |
# $ErrorActionPreference = "Stop"
# Invoke-WebRequest -Uri https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe -OutFile "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe"
# Start-Process "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe" -ArgumentList '-install' -NoNewWindow -Wait
# Remove-Item "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe"
# if: runner.os == 'Windows' && steps.rocm-cache.outputs.cache-hit != 'true'
#
# - name: ROCm toolchain environment (Windows)
# run: |
# Add-Content $env:GITHUB_PATH "C:\Program Files\AMD\ROCm\6.1\bin"
# if: runner.os == 'Windows'

- name: Configure cache
uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
with:
Expand All @@ -124,6 +160,22 @@ jobs:
cargo -Zgitoxide -Zgit clippy --locked --all-targets --features runtime-benchmarks,cuda -- -D warnings
if: runner.os == 'Linux' || runner.os == 'Windows'

- name: cargo clippy (ROCm)
env:
NVCC: off
run: |
cargo -Zgitoxide -Zgit clippy --locked --all-targets --features rocm -- -D warnings
if: runner.os == 'Linux'

# TODO: ROCm compilation doesn't work in CI right now, good luck fixing it
# - name: cargo clippy (ROCm)
# env:
# NVCC: off
# HIPCC: hipcc.bin.exe
# run: |
# cargo -Zgitoxide -Zgit clippy --locked --all-targets --features rocm -- -D warnings
# if: runner.os == 'Windows'

cargo-docs:
runs-on: ${{ fromJson(github.repository_owner == 'autonomys' && '["self-hosted", "ubuntu-20.04-x86-64"]' || '"ubuntu-22.04"') }}
steps:
Expand Down
69 changes: 69 additions & 0 deletions .github/workflows/snapshot-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,43 @@ jobs:
sub-packages: '["nvcc", "cudart"]'
if: runner.os == 'Linux' || runner.os == 'Windows'

# TODO: ROCm compilation doesn't work in CI right now, good luck fixing it
# - name: Configure ROCm cache (Windows)
# uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
# id: rocm-cache
# with:
# path: C:\Program Files\AMD\ROCm
# key: ${{ runner.os }}-rocm
# if: runner.os == 'Windows'

- name: ROCm toolchain
run: |
ROCM_VERSION=6.2.2
sudo mkdir -p --mode=0755 /etc/apt/keyrings
curl -L https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/$ROCM_VERSION focal main" | sudo tee /etc/apt/sources.list.d/rocm.list > /dev/null
echo "Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600" | sudo tee /etc/apt/preferences.d/rocm-pin-600 > /dev/null
sudo apt-get update
DEBIAN_FRONTEND=noninteractive sudo apt-get install -y --no-install-recommends rocm-hip-runtime-dev
echo "/opt/rocm/lib" | sudo tee /etc/ld.so.conf.d/rocm.conf > /dev/null
sudo ldconfig
# TODO: ROCm packages are only available for x86-64 for now
if: runner.os == 'Linux' && startsWith(matrix.build.target, 'x86_64')

# TODO: ROCm compilation doesn't work in CI right now, good luck fixing it
# - name: ROCm toolchain
# run: |
# $ErrorActionPreference = "Stop"
# Invoke-WebRequest -Uri https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe -OutFile "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe"
# Start-Process "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe" -ArgumentList '-install' -NoNewWindow -Wait
# Remove-Item "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe"
# if: runner.os == 'Windows' && steps.rocm-cache.outputs.cache-hit != 'true'
#
# - name: ROCm toolchain environment (Windows)
# run: |
# Add-Content $env:GITHUB_PATH "C:\Program Files\AMD\ROCm\6.1\bin"
# if: runner.os == 'Windows'

- name: Configure cache
uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
with:
Expand All @@ -193,6 +230,28 @@ jobs:
cargo -Zgitoxide -Zgit build --locked -Z build-std --target ${{ matrix.build.target }} --profile production --bin subspace-farmer
if: runner.os == 'macOS' || !startsWith(matrix.build.target, 'x86_64')

# TODO: ROCm compilation doesn't work in CI right now, good luck fixing it
# # ROCm can't be enabled together with CUDA for now
# - name: Build farmer (ROCm, Windows)
# env:
# NVCC: off
# HIPCC: hipcc.bin.exe
# run: |
# cargo -Zgitoxide -Zgit build --locked -Z build-std --target ${{ matrix.build.target }} --profile production --bin subspace-farmer --features rocm
# move ${{ env.PRODUCTION_TARGET }}/subspace-farmer.exe ${{ env.PRODUCTION_TARGET }}/subspace-farmer-rocm.exe
# # TODO: ROCm packages are only available for x86-64 for now
# if: runner.os == 'Windows' && startsWith(matrix.build.target, 'x86_64')

# ROCm can't be enabled together with CUDA for now
- name: Build farmer (ROCm, Ubuntu)
env:
NVCC: off
run: |
cargo -Zgitoxide -Zgit build --locked -Z build-std --target ${{ matrix.build.target }} --profile production --bin subspace-farmer --features rocm
mv ${{ env.PRODUCTION_TARGET }}/subspace-farmer ${{ env.PRODUCTION_TARGET }}/subspace-farmer-rocm
# TODO: ROCm packages are only available for x86-64 for now
if: runner.os == 'Linux' && startsWith(matrix.build.target, 'x86_64')

- name: Build farmer
run: |
cargo -Zgitoxide -Zgit build --locked -Z build-std --target ${{ matrix.build.target }} --profile production --bin subspace-farmer --features cuda
Expand Down Expand Up @@ -240,6 +299,8 @@ jobs:
- name: Sign Application (Windows)
run: |
AzureSignTool sign --azure-key-vault-url "${{ secrets.AZURE_KEY_VAULT_URI }}" --azure-key-vault-client-id "${{ secrets.AZURE_CLIENT_ID }}" --azure-key-vault-client-secret "${{ secrets.AZURE_CLIENT_SECRET }}" --azure-key-vault-tenant-id "${{ secrets.AZURE_TENANT_ID }}" --azure-key-vault-certificate "${{ secrets.AZURE_CERT_NAME }}" --file-digest sha512 --timestamp-rfc3161 http://timestamp.digicert.com -v "${{ env.PRODUCTION_TARGET }}/subspace-farmer.exe"
# TODO: ROCm compilation doesn't work in CI right now, good luck fixing it
# AzureSignTool sign --azure-key-vault-url "${{ secrets.AZURE_KEY_VAULT_URI }}" --azure-key-vault-client-id "${{ secrets.AZURE_CLIENT_ID }}" --azure-key-vault-client-secret "${{ secrets.AZURE_CLIENT_SECRET }}" --azure-key-vault-tenant-id "${{ secrets.AZURE_TENANT_ID }}" --azure-key-vault-certificate "${{ secrets.AZURE_CERT_NAME }}" --file-digest sha512 --timestamp-rfc3161 http://timestamp.digicert.com -v "${{ env.PRODUCTION_TARGET }}/subspace-farmer-rocm.exe"
AzureSignTool sign --azure-key-vault-url "${{ secrets.AZURE_KEY_VAULT_URI }}" --azure-key-vault-client-id "${{ secrets.AZURE_CLIENT_ID }}" --azure-key-vault-client-secret "${{ secrets.AZURE_CLIENT_SECRET }}" --azure-key-vault-tenant-id "${{ secrets.AZURE_TENANT_ID }}" --azure-key-vault-certificate "${{ secrets.AZURE_CERT_NAME }}" --file-digest sha512 --timestamp-rfc3161 http://timestamp.digicert.com -v "${{ env.PRODUCTION_TARGET }}/subspace-node.exe"
# Allow code signing to fail on non-release builds and in non-subspace repos (forks)
continue-on-error: ${{ github.repository_owner != 'autonomys' || github.event_name != 'push' || github.ref_type != 'tag' }}
Expand All @@ -252,6 +313,12 @@ jobs:
mv ${{ env.PRODUCTION_TARGET }}/subspace-node executables/subspace-node-${{ matrix.build.suffix }}
if: runner.os == 'Linux'

- name: Prepare executables for uploading (Ubuntu, ROCm)
run: |
mv ${{ env.PRODUCTION_TARGET }}/subspace-farmer-rocm executables/subspace-farmer-rocm-${{ matrix.build.suffix }}
# TODO: ROCm packages are only available for x86-64 for now
if: runner.os == 'Linux' && startsWith(matrix.build.target, 'x86_64')

- name: Prepare executables for uploading (macOS)
run: |
mkdir executables
Expand All @@ -268,6 +335,8 @@ jobs:
run: |
mkdir executables
move ${{ env.PRODUCTION_TARGET }}/subspace-farmer.exe executables/subspace-farmer-${{ matrix.build.suffix }}.exe
# TODO: ROCm compilation doesn't work in CI right now, good luck fixing it
# move ${{ env.PRODUCTION_TARGET }}/subspace-farmer-rocm.exe executables/subspace-farmer-rocm-${{ matrix.build.suffix }}.exe
move ${{ env.PRODUCTION_TARGET }}/subspace-node.exe executables/subspace-node-${{ matrix.build.suffix }}.exe
if: runner.os == 'Windows'

Expand Down
3 changes: 1 addition & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

53 changes: 50 additions & 3 deletions Dockerfile-farmer
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,39 @@ RUN \
curl -OL https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/$CUDA_ARCH/cuda-ubuntu2004.pin && \
mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 && \
apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends cuda-minimal-build-12-4
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends cuda-minimal-build-12-4 && \
echo "/usr/local/cuda/lib64" > /etc/ld.so.conf.d/cuda.conf && \
ldconfig

# ROCm is only used on x86-64 since they don't have other packages
ARG ROCM_VERSION=6.2.2
RUN \
if [ $(uname -p) = "x86_64" ]; then \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends gpg && \
mkdir -p --mode=0755 /etc/apt/keyrings && \
curl -L https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor > /etc/apt/keyrings/rocm.gpg && \
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/$ROCM_VERSION focal main" > /etc/apt/sources.list.d/rocm.list && \
echo "Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600" > /etc/apt/preferences.d/rocm-pin-600 && \
apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends rocm-hip-runtime-dev && \
echo "/opt/rocm/lib" > /etc/ld.so.conf.d/rocm.conf && \
ldconfig \
; fi

# TODO: Remove `NVCC=off` hack once `sppark` has proper features for CUDA and ROCm
# ROCm is only used on x86-64 since they don't have other packages
RUN \
export PATH=/usr/local/cuda/bin${PATH:+:${PATH}} && \
export LD_LIBRARY_PATH=/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} && \
if [ $(uname -p) = "x86_64" ]; then \
NVCC=off /root/.cargo/bin/cargo -Zgitoxide -Zgit build \
--locked \
-Z build-std \
--profile $PROFILE \
--bin subspace-farmer \
--features rocm \
--target $(uname -p)-unknown-linux-gnu && \
mv target/*/*/subspace-farmer subspace-farmer-rocm \
; fi && \
/root/.cargo/bin/cargo -Zgitoxide -Zgit build \
--locked \
-Z build-std \
Expand All @@ -63,7 +91,26 @@ RUN \

FROM ubuntu:20.04

COPY --from=0 /code/subspace-farmer /subspace-farmer
# Next block is for ROCm support
# ROCm is only used on x86-64 since they don't have other packages
ARG ROCM_VERSION=6.2.2
RUN \
if [ $(uname -p) = "x86_64" ]; then \
apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends curl ca-certificates gpg && \
mkdir -p --mode=0755 /etc/apt/keyrings && \
curl -L https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor > /etc/apt/keyrings/rocm.gpg && \
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/$ROCM_VERSION focal main" > /etc/apt/sources.list.d/rocm.list && \
echo "Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600" > /etc/apt/preferences.d/rocm-pin-600 && \
apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends hip-runtime-amd && \
DEBIAN_FRONTEND=noninteractive apt-get remove -y --purge --autoremove curl ca-certificates gpg && \
rm -rf /var/lib/apt/lists/* && \
echo "/opt/rocm/lib" > /etc/ld.so.conf.d/rocm.conf && \
ldconfig \
; fi

COPY --from=0 /code/subspace-farmer* /

RUN mkdir /var/subspace && chown nobody:nogroup /var/subspace

Expand Down
5 changes: 4 additions & 1 deletion crates/subspace-farmer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,11 @@ zeroize = "1.8.1"
default = ["default-library", "binary"]
cluster = ["dep:async-nats"]
numa = ["dep:hwlocality"]
# Only Volta+ architectures are supported (GeForce RTX 20xx consumer GPUs and newer)
# Only Volta+ architectures are supported (GeForce RTX 16xx consumer GPUs and newer)
cuda = ["_gpu", "subspace-proof-of-space-gpu/cuda"]
# TODO: ROCm can't be enabled at the same time as `cuda` feature at the moment
# Seems to support RDNA 2+, at least on Linux
rocm = ["_gpu", "subspace-proof-of-space-gpu/rocm"]
# Internal feature, shouldn't be used directly
_gpu = []

Expand Down
Loading

0 comments on commit d1eb6c0

Please sign in to comment.