From d1eb6c0549cc396d687a00c8591109f783927e0d Mon Sep 17 00:00:00 2001 From: Nazar Mokrynskyi Date: Mon, 16 Sep 2024 00:08:57 +0300 Subject: [PATCH] ROCm support --- .github/workflows/rust.yml | 54 ++++- .github/workflows/snapshot-build.yml | 69 +++++++ Cargo.lock | 3 +- Dockerfile-farmer | 53 ++++- crates/subspace-farmer/Cargo.toml | 5 +- .../commands/cluster/plotter.rs | 123 +++++++++++ .../src/bin/subspace-farmer/commands/farm.rs | 123 +++++++++++ crates/subspace-farmer/src/plotter/gpu.rs | 2 + .../subspace-farmer/src/plotter/gpu/rocm.rs | 108 ++++++++++ shared/subspace-proof-of-space-gpu/Cargo.toml | 10 +- shared/subspace-proof-of-space-gpu/README.md | 5 + shared/subspace-proof-of-space-gpu/build.rs | 39 +++- shared/subspace-proof-of-space-gpu/src/lib.rs | 2 + .../subspace-proof-of-space-gpu/src/rocm.rs | 193 ++++++++++++++++++ .../src/rocm/tests.rs | 82 ++++++++ 15 files changed, 855 insertions(+), 16 deletions(-) create mode 100644 crates/subspace-farmer/src/plotter/gpu/rocm.rs create mode 100644 shared/subspace-proof-of-space-gpu/src/rocm.rs create mode 100644 shared/subspace-proof-of-space-gpu/src/rocm/tests.rs diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 86197593a9..042877ec7e 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -54,7 +54,7 @@ jobs: cargo-clippy: strategy: matrix: - os: ${{ fromJson(github.repository_owner == 'autonomys' && '[["self-hosted", "ubuntu-20.04-x86-64"], ["self-hosted", "macos-14-arm64"], ["self-hosted", "windows-server-2022-x86-64"]]' || '["ubuntu-22.04", "macos-14", "windows-2022"]') }} + os: ${{ fromJson(github.repository_owner == 'autonomys' && '[["self-hosted", "ubuntu-20.04-x86-64"], ["self-hosted", "macos-14-arm64"], ["self-hosted", "windows-server-2022-x86-64"]]' || '["ubuntu-20.04", "macos-14", "windows-2022"]') }} runs-on: ${{ matrix.os }} @@ -104,6 +104,42 @@ jobs: sub-packages: '["nvcc", "cudart"]' if: runner.os == 'Linux' || runner.os == 'Windows' + # TODO: ROCm compilation doesn't work in CI right now, good luck fixing it + # - name: Configure ROCm cache (Windows) + # uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2 + # id: rocm-cache + # with: + # path: C:\Program Files\AMD\ROCm + # key: ${{ runner.os }}-rocm + # if: runner.os == 'Windows' + + - name: ROCm toolchain + run: | + ROCM_VERSION=6.2.2 + sudo mkdir -p --mode=0755 /etc/apt/keyrings + curl -L https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null + echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/$ROCM_VERSION focal main" | sudo tee /etc/apt/sources.list.d/rocm.list > /dev/null + echo "Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600" | sudo tee /etc/apt/preferences.d/rocm-pin-600 > /dev/null + sudo apt-get update + DEBIAN_FRONTEND=noninteractive sudo apt-get install -y --no-install-recommends rocm-hip-runtime-dev + echo "/opt/rocm/lib" | sudo tee /etc/ld.so.conf.d/rocm.conf > /dev/null + sudo ldconfig + if: runner.os == 'Linux' + + # TODO: ROCm compilation doesn't work in CI right now, good luck fixing it + # - name: ROCm toolchain + # run: | + # $ErrorActionPreference = "Stop" + # Invoke-WebRequest -Uri https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe -OutFile "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe" + # Start-Process "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe" -ArgumentList '-install' -NoNewWindow -Wait + # Remove-Item "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe" + # if: runner.os == 'Windows' && steps.rocm-cache.outputs.cache-hit != 'true' + # + # - name: ROCm toolchain environment (Windows) + # run: | + # Add-Content $env:GITHUB_PATH "C:\Program Files\AMD\ROCm\6.1\bin" + # if: runner.os == 'Windows' + - name: Configure cache uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2 with: @@ -124,6 +160,22 @@ jobs: cargo -Zgitoxide -Zgit clippy --locked --all-targets --features runtime-benchmarks,cuda -- -D warnings if: runner.os == 'Linux' || runner.os == 'Windows' + - name: cargo clippy (ROCm) + env: + NVCC: off + run: | + cargo -Zgitoxide -Zgit clippy --locked --all-targets --features rocm -- -D warnings + if: runner.os == 'Linux' + + # TODO: ROCm compilation doesn't work in CI right now, good luck fixing it + # - name: cargo clippy (ROCm) + # env: + # NVCC: off + # HIPCC: hipcc.bin.exe + # run: | + # cargo -Zgitoxide -Zgit clippy --locked --all-targets --features rocm -- -D warnings + # if: runner.os == 'Windows' + cargo-docs: runs-on: ${{ fromJson(github.repository_owner == 'autonomys' && '["self-hosted", "ubuntu-20.04-x86-64"]' || '"ubuntu-22.04"') }} steps: diff --git a/.github/workflows/snapshot-build.yml b/.github/workflows/snapshot-build.yml index 713802c4ad..185d09b44c 100644 --- a/.github/workflows/snapshot-build.yml +++ b/.github/workflows/snapshot-build.yml @@ -178,6 +178,43 @@ jobs: sub-packages: '["nvcc", "cudart"]' if: runner.os == 'Linux' || runner.os == 'Windows' + # TODO: ROCm compilation doesn't work in CI right now, good luck fixing it + # - name: Configure ROCm cache (Windows) + # uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2 + # id: rocm-cache + # with: + # path: C:\Program Files\AMD\ROCm + # key: ${{ runner.os }}-rocm + # if: runner.os == 'Windows' + + - name: ROCm toolchain + run: | + ROCM_VERSION=6.2.2 + sudo mkdir -p --mode=0755 /etc/apt/keyrings + curl -L https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null + echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/$ROCM_VERSION focal main" | sudo tee /etc/apt/sources.list.d/rocm.list > /dev/null + echo "Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600" | sudo tee /etc/apt/preferences.d/rocm-pin-600 > /dev/null + sudo apt-get update + DEBIAN_FRONTEND=noninteractive sudo apt-get install -y --no-install-recommends rocm-hip-runtime-dev + echo "/opt/rocm/lib" | sudo tee /etc/ld.so.conf.d/rocm.conf > /dev/null + sudo ldconfig + # TODO: ROCm packages are only available for x86-64 for now + if: runner.os == 'Linux' && startsWith(matrix.build.target, 'x86_64') + + # TODO: ROCm compilation doesn't work in CI right now, good luck fixing it + # - name: ROCm toolchain + # run: | + # $ErrorActionPreference = "Stop" + # Invoke-WebRequest -Uri https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe -OutFile "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe" + # Start-Process "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe" -ArgumentList '-install' -NoNewWindow -Wait + # Remove-Item "${env:RUNNER_TEMP}\HIP-SDK-Installer.exe" + # if: runner.os == 'Windows' && steps.rocm-cache.outputs.cache-hit != 'true' + # + # - name: ROCm toolchain environment (Windows) + # run: | + # Add-Content $env:GITHUB_PATH "C:\Program Files\AMD\ROCm\6.1\bin" + # if: runner.os == 'Windows' + - name: Configure cache uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2 with: @@ -193,6 +230,28 @@ jobs: cargo -Zgitoxide -Zgit build --locked -Z build-std --target ${{ matrix.build.target }} --profile production --bin subspace-farmer if: runner.os == 'macOS' || !startsWith(matrix.build.target, 'x86_64') + # TODO: ROCm compilation doesn't work in CI right now, good luck fixing it + # # ROCm can't be enabled together with CUDA for now + # - name: Build farmer (ROCm, Windows) + # env: + # NVCC: off + # HIPCC: hipcc.bin.exe + # run: | + # cargo -Zgitoxide -Zgit build --locked -Z build-std --target ${{ matrix.build.target }} --profile production --bin subspace-farmer --features rocm + # move ${{ env.PRODUCTION_TARGET }}/subspace-farmer.exe ${{ env.PRODUCTION_TARGET }}/subspace-farmer-rocm.exe + # # TODO: ROCm packages are only available for x86-64 for now + # if: runner.os == 'Windows' && startsWith(matrix.build.target, 'x86_64') + + # ROCm can't be enabled together with CUDA for now + - name: Build farmer (ROCm, Ubuntu) + env: + NVCC: off + run: | + cargo -Zgitoxide -Zgit build --locked -Z build-std --target ${{ matrix.build.target }} --profile production --bin subspace-farmer --features rocm + mv ${{ env.PRODUCTION_TARGET }}/subspace-farmer ${{ env.PRODUCTION_TARGET }}/subspace-farmer-rocm + # TODO: ROCm packages are only available for x86-64 for now + if: runner.os == 'Linux' && startsWith(matrix.build.target, 'x86_64') + - name: Build farmer run: | cargo -Zgitoxide -Zgit build --locked -Z build-std --target ${{ matrix.build.target }} --profile production --bin subspace-farmer --features cuda @@ -240,6 +299,8 @@ jobs: - name: Sign Application (Windows) run: | AzureSignTool sign --azure-key-vault-url "${{ secrets.AZURE_KEY_VAULT_URI }}" --azure-key-vault-client-id "${{ secrets.AZURE_CLIENT_ID }}" --azure-key-vault-client-secret "${{ secrets.AZURE_CLIENT_SECRET }}" --azure-key-vault-tenant-id "${{ secrets.AZURE_TENANT_ID }}" --azure-key-vault-certificate "${{ secrets.AZURE_CERT_NAME }}" --file-digest sha512 --timestamp-rfc3161 http://timestamp.digicert.com -v "${{ env.PRODUCTION_TARGET }}/subspace-farmer.exe" + # TODO: ROCm compilation doesn't work in CI right now, good luck fixing it + # AzureSignTool sign --azure-key-vault-url "${{ secrets.AZURE_KEY_VAULT_URI }}" --azure-key-vault-client-id "${{ secrets.AZURE_CLIENT_ID }}" --azure-key-vault-client-secret "${{ secrets.AZURE_CLIENT_SECRET }}" --azure-key-vault-tenant-id "${{ secrets.AZURE_TENANT_ID }}" --azure-key-vault-certificate "${{ secrets.AZURE_CERT_NAME }}" --file-digest sha512 --timestamp-rfc3161 http://timestamp.digicert.com -v "${{ env.PRODUCTION_TARGET }}/subspace-farmer-rocm.exe" AzureSignTool sign --azure-key-vault-url "${{ secrets.AZURE_KEY_VAULT_URI }}" --azure-key-vault-client-id "${{ secrets.AZURE_CLIENT_ID }}" --azure-key-vault-client-secret "${{ secrets.AZURE_CLIENT_SECRET }}" --azure-key-vault-tenant-id "${{ secrets.AZURE_TENANT_ID }}" --azure-key-vault-certificate "${{ secrets.AZURE_CERT_NAME }}" --file-digest sha512 --timestamp-rfc3161 http://timestamp.digicert.com -v "${{ env.PRODUCTION_TARGET }}/subspace-node.exe" # Allow code signing to fail on non-release builds and in non-subspace repos (forks) continue-on-error: ${{ github.repository_owner != 'autonomys' || github.event_name != 'push' || github.ref_type != 'tag' }} @@ -252,6 +313,12 @@ jobs: mv ${{ env.PRODUCTION_TARGET }}/subspace-node executables/subspace-node-${{ matrix.build.suffix }} if: runner.os == 'Linux' + - name: Prepare executables for uploading (Ubuntu, ROCm) + run: | + mv ${{ env.PRODUCTION_TARGET }}/subspace-farmer-rocm executables/subspace-farmer-rocm-${{ matrix.build.suffix }} + # TODO: ROCm packages are only available for x86-64 for now + if: runner.os == 'Linux' && startsWith(matrix.build.target, 'x86_64') + - name: Prepare executables for uploading (macOS) run: | mkdir executables @@ -268,6 +335,8 @@ jobs: run: | mkdir executables move ${{ env.PRODUCTION_TARGET }}/subspace-farmer.exe executables/subspace-farmer-${{ matrix.build.suffix }}.exe + # TODO: ROCm compilation doesn't work in CI right now, good luck fixing it + # move ${{ env.PRODUCTION_TARGET }}/subspace-farmer-rocm.exe executables/subspace-farmer-rocm-${{ matrix.build.suffix }}.exe move ${{ env.PRODUCTION_TARGET }}/subspace-node.exe executables/subspace-node-${{ matrix.build.suffix }}.exe if: runner.os == 'Windows' diff --git a/Cargo.lock b/Cargo.lock index 1a5227689b..f0af35c7fb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -12260,8 +12260,7 @@ dependencies = [ [[package]] name = "sppark" version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c55f3833d30846a26110dccb1d5366314c2c52516a9173b74238c16b24b1a9f9" +source = "git+https://github.com/autonomys/sppark?rev=71c49160d7aa24f92c20592d2d26ef16f5400a04#71c49160d7aa24f92c20592d2d26ef16f5400a04" dependencies = [ "cc", "which", diff --git a/Dockerfile-farmer b/Dockerfile-farmer index 112f5b1db4..a53327616b 100644 --- a/Dockerfile-farmer +++ b/Dockerfile-farmer @@ -46,11 +46,39 @@ RUN \ curl -OL https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/$CUDA_ARCH/cuda-ubuntu2004.pin && \ mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 && \ apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends cuda-minimal-build-12-4 + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends cuda-minimal-build-12-4 && \ + echo "/usr/local/cuda/lib64" > /etc/ld.so.conf.d/cuda.conf && \ + ldconfig +# ROCm is only used on x86-64 since they don't have other packages +ARG ROCM_VERSION=6.2.2 +RUN \ + if [ $(uname -p) = "x86_64" ]; then \ + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends gpg && \ + mkdir -p --mode=0755 /etc/apt/keyrings && \ + curl -L https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor > /etc/apt/keyrings/rocm.gpg && \ + echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/$ROCM_VERSION focal main" > /etc/apt/sources.list.d/rocm.list && \ + echo "Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600" > /etc/apt/preferences.d/rocm-pin-600 && \ + apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends rocm-hip-runtime-dev && \ + echo "/opt/rocm/lib" > /etc/ld.so.conf.d/rocm.conf && \ + ldconfig \ + ; fi + +# TODO: Remove `NVCC=off` hack once `sppark` has proper features for CUDA and ROCm +# ROCm is only used on x86-64 since they don't have other packages RUN \ export PATH=/usr/local/cuda/bin${PATH:+:${PATH}} && \ - export LD_LIBRARY_PATH=/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} && \ + if [ $(uname -p) = "x86_64" ]; then \ + NVCC=off /root/.cargo/bin/cargo -Zgitoxide -Zgit build \ + --locked \ + -Z build-std \ + --profile $PROFILE \ + --bin subspace-farmer \ + --features rocm \ + --target $(uname -p)-unknown-linux-gnu && \ + mv target/*/*/subspace-farmer subspace-farmer-rocm \ + ; fi && \ /root/.cargo/bin/cargo -Zgitoxide -Zgit build \ --locked \ -Z build-std \ @@ -63,7 +91,26 @@ RUN \ FROM ubuntu:20.04 -COPY --from=0 /code/subspace-farmer /subspace-farmer +# Next block is for ROCm support +# ROCm is only used on x86-64 since they don't have other packages +ARG ROCM_VERSION=6.2.2 +RUN \ + if [ $(uname -p) = "x86_64" ]; then \ + apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends curl ca-certificates gpg && \ + mkdir -p --mode=0755 /etc/apt/keyrings && \ + curl -L https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor > /etc/apt/keyrings/rocm.gpg && \ + echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/$ROCM_VERSION focal main" > /etc/apt/sources.list.d/rocm.list && \ + echo "Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600" > /etc/apt/preferences.d/rocm-pin-600 && \ + apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends hip-runtime-amd && \ + DEBIAN_FRONTEND=noninteractive apt-get remove -y --purge --autoremove curl ca-certificates gpg && \ + rm -rf /var/lib/apt/lists/* && \ + echo "/opt/rocm/lib" > /etc/ld.so.conf.d/rocm.conf && \ + ldconfig \ + ; fi + +COPY --from=0 /code/subspace-farmer* / RUN mkdir /var/subspace && chown nobody:nogroup /var/subspace diff --git a/crates/subspace-farmer/Cargo.toml b/crates/subspace-farmer/Cargo.toml index 5a0b4ef17f..d324efffd6 100644 --- a/crates/subspace-farmer/Cargo.toml +++ b/crates/subspace-farmer/Cargo.toml @@ -77,8 +77,11 @@ zeroize = "1.8.1" default = ["default-library", "binary"] cluster = ["dep:async-nats"] numa = ["dep:hwlocality"] -# Only Volta+ architectures are supported (GeForce RTX 20xx consumer GPUs and newer) +# Only Volta+ architectures are supported (GeForce RTX 16xx consumer GPUs and newer) cuda = ["_gpu", "subspace-proof-of-space-gpu/cuda"] +# TODO: ROCm can't be enabled at the same time as `cuda` feature at the moment +# Seems to support RDNA 2+, at least on Linux +rocm = ["_gpu", "subspace-proof-of-space-gpu/rocm"] # Internal feature, shouldn't be used directly _gpu = [] diff --git a/crates/subspace-farmer/src/bin/subspace-farmer/commands/cluster/plotter.rs b/crates/subspace-farmer/src/bin/subspace-farmer/commands/cluster/plotter.rs index 48c7812a8b..bb2e9a89c6 100644 --- a/crates/subspace-farmer/src/bin/subspace-farmer/commands/cluster/plotter.rs +++ b/crates/subspace-farmer/src/bin/subspace-farmer/commands/cluster/plotter.rs @@ -16,6 +16,8 @@ use subspace_farmer::cluster::plotter::plotter_service; use subspace_farmer::plotter::cpu::CpuPlotter; #[cfg(feature = "cuda")] use subspace_farmer::plotter::gpu::cuda::CudaRecordsEncoder; +#[cfg(feature = "rocm")] +use subspace_farmer::plotter::gpu::rocm::RocmRecordsEncoder; #[cfg(feature = "_gpu")] use subspace_farmer::plotter::gpu::GpuPlotter; use subspace_farmer::plotter::pool::PoolPlotter; @@ -100,6 +102,24 @@ struct CudaPlottingOptions { cuda_gpus: Option, } +#[cfg(feature = "rocm")] +#[derive(Debug, Parser)] +struct RocmPlottingOptions { + /// Defines how many sectors farmer will download concurrently during plotting with ROCm GPU, + /// allows to limit memory usage of the plotting process, defaults to number of ROCm GPUs found + /// + 1 to download future sector ahead of time. + /// + /// Increase will result in higher memory usage. + #[arg(long)] + rocm_sector_downloading_concurrency: Option, + /// Specify exact GPUs to be used for plotting instead of using all GPUs (default behavior). + /// + /// GPUs are coma-separated: `--rocm-gpus 0,1,3`. Empty string can be specified to disable ROCm + /// GPU usage. + #[arg(long)] + rocm_gpus: Option, +} + /// Arguments for plotter #[derive(Debug, Parser)] pub(super) struct PlotterArgs { @@ -116,6 +136,10 @@ pub(super) struct PlotterArgs { #[cfg(feature = "cuda")] #[clap(flatten)] cuda_plotting_options: CudaPlottingOptions, + /// Plotting options only used by ROCm GPU plotter + #[cfg(feature = "rocm")] + #[clap(flatten)] + rocm_plotting_options: RocmPlottingOptions, /// Additional cluster components #[clap(raw = true)] pub(super) additional_components: Vec, @@ -134,6 +158,8 @@ where cpu_plotting_options, #[cfg(feature = "cuda")] cuda_plotting_options, + #[cfg(feature = "rocm")] + rocm_plotting_options, additional_components: _, } = plotter_args; @@ -164,6 +190,21 @@ where plotters.push(Box::new(cuda_plotter)); } } + #[cfg(feature = "rocm")] + { + let maybe_rocm_plotter = init_rocm_plotter( + rocm_plotting_options, + piece_getter.clone(), + Arc::clone(&global_mutex), + kzg.clone(), + erasure_coding.clone(), + registry, + )?; + + if let Some(rocm_plotter) = maybe_rocm_plotter { + plotters.push(Box::new(rocm_plotter)); + } + } { let cpu_sector_encoding_concurrency = cpu_plotting_options.cpu_sector_encoding_concurrency; let maybe_cpu_plotter = init_cpu_plotter::<_, PosTable>( @@ -376,3 +417,85 @@ where .map_err(|error| anyhow::anyhow!("Failed to initialize CUDA plotter: {error}"))?, )) } + +#[cfg(feature = "rocm")] +fn init_rocm_plotter( + rocm_plotting_options: RocmPlottingOptions, + piece_getter: PG, + global_mutex: Arc>, + kzg: Kzg, + erasure_coding: ErasureCoding, + registry: &mut Registry, +) -> anyhow::Result>> +where + PG: PieceGetter + Clone + Send + Sync + 'static, +{ + use std::collections::BTreeSet; + use subspace_proof_of_space_gpu::rocm::rocm_devices; + use tracing::{debug, warn}; + + let RocmPlottingOptions { + rocm_sector_downloading_concurrency, + rocm_gpus, + } = rocm_plotting_options; + + let mut rocm_devices = rocm_devices(); + let mut used_rocm_devices = (0..rocm_devices.len()).collect::>(); + + if let Some(rocm_gpus) = rocm_gpus { + if rocm_gpus.is_empty() { + info!("ROCm GPU plotting was explicitly disabled"); + return Ok(None); + } + + let mut rocm_gpus_to_use = rocm_gpus + .split(',') + .map(|gpu_index| gpu_index.parse()) + .collect::, _>>()?; + + (used_rocm_devices, rocm_devices) = rocm_devices + .into_iter() + .enumerate() + .filter(|(index, _rocm_device)| rocm_gpus_to_use.remove(index)) + .unzip(); + + if !rocm_gpus_to_use.is_empty() { + warn!( + ?rocm_gpus_to_use, + "Some ROCm GPUs were not found on the system" + ); + } + } + + if rocm_devices.is_empty() { + debug!("No ROCm GPU devices found"); + return Ok(None); + } + + info!(?used_rocm_devices, "Using ROCm GPUs"); + + let rocm_downloading_semaphore = Arc::new(Semaphore::new( + rocm_sector_downloading_concurrency + .map(|rocm_sector_downloading_concurrency| rocm_sector_downloading_concurrency.get()) + .unwrap_or(rocm_devices.len() + 1), + )); + + Ok(Some( + GpuPlotter::new( + piece_getter, + rocm_downloading_semaphore, + rocm_devices + .into_iter() + .map(|rocm_device| RocmRecordsEncoder::new(rocm_device, Arc::clone(&global_mutex))) + .collect::>() + .map_err(|error| { + anyhow::anyhow!("Failed to create ROCm records encoder: {error}") + })?, + global_mutex, + kzg, + erasure_coding, + Some(registry), + ) + .map_err(|error| anyhow::anyhow!("Failed to initialize ROCm plotter: {error}"))?, + )) +} diff --git a/crates/subspace-farmer/src/bin/subspace-farmer/commands/farm.rs b/crates/subspace-farmer/src/bin/subspace-farmer/commands/farm.rs index 4c61943eb6..44ac306070 100644 --- a/crates/subspace-farmer/src/bin/subspace-farmer/commands/farm.rs +++ b/crates/subspace-farmer/src/bin/subspace-farmer/commands/farm.rs @@ -32,6 +32,8 @@ use subspace_farmer::node_client::NodeClient; use subspace_farmer::plotter::cpu::CpuPlotter; #[cfg(feature = "cuda")] use subspace_farmer::plotter::gpu::cuda::CudaRecordsEncoder; +#[cfg(feature = "rocm")] +use subspace_farmer::plotter::gpu::rocm::RocmRecordsEncoder; #[cfg(feature = "_gpu")] use subspace_farmer::plotter::gpu::GpuPlotter; use subspace_farmer::plotter::pool::PoolPlotter; @@ -160,6 +162,24 @@ struct CudaPlottingOptions { cuda_gpus: Option, } +#[cfg(feature = "rocm")] +#[derive(Debug, Parser)] +struct RocmPlottingOptions { + /// Defines how many sectors farmer will download concurrently during plotting with ROCm GPU, + /// allows to limit memory usage of the plotting process, defaults to number of ROCm GPUs found + /// + 1 to download future sector ahead of time. + /// + /// Increase will result in higher memory usage. + #[arg(long)] + rocm_sector_downloading_concurrency: Option, + /// Specify exact GPUs to be used for plotting instead of using all GPUs (default behavior). + /// + /// GPUs are coma-separated: `--rocm-gpus 0,1,3`. Empty string can be specified to disable ROCm + /// GPU usage. + #[arg(long)] + rocm_gpus: Option, +} + /// Arguments for farmer #[derive(Debug, Parser)] pub(crate) struct FarmingArgs { @@ -233,6 +253,10 @@ pub(crate) struct FarmingArgs { #[cfg(feature = "cuda")] #[clap(flatten)] cuda_plotting_options: CudaPlottingOptions, + /// Plotting options only used by ROCm GPU plotter + #[cfg(feature = "rocm")] + #[clap(flatten)] + rocm_plotting_options: RocmPlottingOptions, /// Enable plot cache. /// /// Plot cache uses unplotted space as additional cache improving plotting speeds, especially @@ -291,6 +315,8 @@ where cpu_plotting_options, #[cfg(feature = "cuda")] cuda_plotting_options, + #[cfg(feature = "rocm")] + rocm_plotting_options, plot_cache, disable_farm_locking, create, @@ -485,6 +511,21 @@ where plotters.push(Box::new(cuda_plotter)); } } + #[cfg(feature = "rocm")] + { + let maybe_rocm_plotter = init_rocm_plotter( + rocm_plotting_options, + piece_getter.clone(), + Arc::clone(&global_mutex), + kzg.clone(), + erasure_coding.clone(), + &mut registry, + )?; + + if let Some(rocm_plotter) = maybe_rocm_plotter { + plotters.push(Box::new(rocm_plotter)); + } + } { let cpu_sector_encoding_concurrency = cpu_plotting_options.cpu_sector_encoding_concurrency; let maybe_cpu_plotter = init_cpu_plotter::<_, PosTable>( @@ -1024,3 +1065,85 @@ where .map_err(|error| anyhow::anyhow!("Failed to initialize CUDA plotter: {error}"))?, )) } + +#[cfg(feature = "rocm")] +fn init_rocm_plotter( + rocm_plotting_options: RocmPlottingOptions, + piece_getter: PG, + global_mutex: Arc>, + kzg: Kzg, + erasure_coding: ErasureCoding, + registry: &mut Registry, +) -> anyhow::Result>> +where + PG: PieceGetter + Clone + Send + Sync + 'static, +{ + use std::collections::BTreeSet; + use subspace_proof_of_space_gpu::rocm::rocm_devices; + use tracing::debug; + + let RocmPlottingOptions { + rocm_sector_downloading_concurrency, + rocm_gpus, + } = rocm_plotting_options; + + let mut rocm_devices = rocm_devices(); + let mut used_rocm_devices = (0..rocm_devices.len()).collect::>(); + + if let Some(rocm_gpus) = rocm_gpus { + if rocm_gpus.is_empty() { + info!("ROCm GPU plotting was explicitly disabled"); + return Ok(None); + } + + let mut rocm_gpus_to_use = rocm_gpus + .split(',') + .map(|gpu_index| gpu_index.parse()) + .collect::, _>>()?; + + (used_rocm_devices, rocm_devices) = rocm_devices + .into_iter() + .enumerate() + .filter(|(index, _rocm_device)| rocm_gpus_to_use.remove(index)) + .unzip(); + + if !rocm_gpus_to_use.is_empty() { + warn!( + ?rocm_gpus_to_use, + "Some ROCm GPUs were not found on the system" + ); + } + } + + if rocm_devices.is_empty() { + debug!("No ROCm GPU devices found"); + return Ok(None); + } + + info!(?used_rocm_devices, "Using ROCm GPUs"); + + let rocm_downloading_semaphore = Arc::new(Semaphore::new( + rocm_sector_downloading_concurrency + .map(|rocm_sector_downloading_concurrency| rocm_sector_downloading_concurrency.get()) + .unwrap_or(rocm_devices.len() + 1), + )); + + Ok(Some( + GpuPlotter::new( + piece_getter, + rocm_downloading_semaphore, + rocm_devices + .into_iter() + .map(|rocm_device| RocmRecordsEncoder::new(rocm_device, Arc::clone(&global_mutex))) + .collect::>() + .map_err(|error| { + anyhow::anyhow!("Failed to create ROCm records encoder: {error}") + })?, + global_mutex, + kzg, + erasure_coding, + Some(registry), + ) + .map_err(|error| anyhow::anyhow!("Failed to initialize ROCm plotter: {error}"))?, + )) +} diff --git a/crates/subspace-farmer/src/plotter/gpu.rs b/crates/subspace-farmer/src/plotter/gpu.rs index fb3ecdacf2..68556d43b1 100644 --- a/crates/subspace-farmer/src/plotter/gpu.rs +++ b/crates/subspace-farmer/src/plotter/gpu.rs @@ -4,6 +4,8 @@ pub mod cuda; mod gpu_encoders_manager; pub mod metrics; +#[cfg(feature = "rocm")] +pub mod rocm; use crate::plotter::gpu::gpu_encoders_manager::GpuRecordsEncoderManager; use crate::plotter::gpu::metrics::GpuPlotterMetrics; diff --git a/crates/subspace-farmer/src/plotter/gpu/rocm.rs b/crates/subspace-farmer/src/plotter/gpu/rocm.rs new file mode 100644 index 0000000000..d5257d98b3 --- /dev/null +++ b/crates/subspace-farmer/src/plotter/gpu/rocm.rs @@ -0,0 +1,108 @@ +//! ROCm GPU records encoder + +use crate::plotter::gpu::GpuRecordsEncoder; +use async_lock::Mutex as AsyncMutex; +use parking_lot::Mutex; +use rayon::{ThreadPool, ThreadPoolBuildError, ThreadPoolBuilder}; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; +use subspace_core_primitives::pieces::{PieceOffset, Record}; +use subspace_core_primitives::sectors::SectorId; +use subspace_farmer_components::plotting::RecordsEncoder; +use subspace_farmer_components::sector::SectorContentsMap; +use subspace_proof_of_space_gpu::rocm::RocmDevice; + +/// ROCm implementation of [`GpuRecordsEncoder`] +#[derive(Debug)] +pub struct RocmRecordsEncoder { + rocm_device: RocmDevice, + thread_pool: ThreadPool, + global_mutex: Arc>, +} + +impl GpuRecordsEncoder for RocmRecordsEncoder { + const TYPE: &'static str = "rocm"; +} + +impl RecordsEncoder for RocmRecordsEncoder { + fn encode_records( + &mut self, + sector_id: &SectorId, + records: &mut [Record], + abort_early: &AtomicBool, + ) -> Result> { + let pieces_in_sector = records + .len() + .try_into() + .map_err(|error| format!("Failed to convert pieces in sector: {error}"))?; + let mut sector_contents_map = SectorContentsMap::new(pieces_in_sector); + + self.thread_pool.install(|| { + let iter = Mutex::new( + (PieceOffset::ZERO..) + .zip(records.iter_mut()) + .zip(sector_contents_map.iter_record_bitfields_mut()), + ); + let plotting_error = Mutex::new(None::); + + rayon::scope(|scope| { + scope.spawn_broadcast(|_scope, _ctx| loop { + // Take mutex briefly to make sure encoding is allowed right now + self.global_mutex.lock_blocking(); + + // This instead of `while` above because otherwise mutex will be held for the + // duration of the loop and will limit concurrency to 1 record + let Some(((piece_offset, record), mut encoded_chunks_used)) = + iter.lock().next() + else { + return; + }; + let pos_seed = sector_id.derive_evaluation_seed(piece_offset); + + if let Err(error) = self.rocm_device.generate_and_encode_pospace( + &pos_seed, + record, + encoded_chunks_used.iter_mut(), + ) { + plotting_error.lock().replace(error); + return; + } + + if abort_early.load(Ordering::Relaxed) { + return; + } + }); + }); + + let plotting_error = plotting_error.lock().take(); + if let Some(error) = plotting_error { + return Err(error); + } + + Ok(()) + })?; + + Ok(sector_contents_map) + } +} + +impl RocmRecordsEncoder { + /// Create new instance + pub fn new( + rocm_device: RocmDevice, + global_mutex: Arc>, + ) -> Result { + let id = rocm_device.id(); + let thread_pool = ThreadPoolBuilder::new() + .thread_name(move |thread_index| format!("rocm-{id}.{thread_index}")) + // Make sure there is overlap between records, so GPU is almost always busy + .num_threads(2) + .build()?; + + Ok(Self { + rocm_device, + thread_pool, + global_mutex, + }) + } +} diff --git a/shared/subspace-proof-of-space-gpu/Cargo.toml b/shared/subspace-proof-of-space-gpu/Cargo.toml index 3888584b7d..b84322dc8e 100644 --- a/shared/subspace-proof-of-space-gpu/Cargo.toml +++ b/shared/subspace-proof-of-space-gpu/Cargo.toml @@ -15,9 +15,8 @@ include = [ [dependencies] blst = { version = "0.3.13", optional = true } rust-kzg-blst = { git = "https://github.com/grandinetech/rust-kzg", rev = "6c8fcc623df3d7e8c0f30951a49bfea764f90bf4", default-features = false, optional = true } -# TODO: This is `rocm` branch, it is needed for ROCm support -#sppark = { git = "https://github.com/dot-asm/sppark", rev = "8eeafe0f6cc0ca8211b1be93922df1b5a118bbd2", optional = true } -sppark = { version = "0.1.8", optional = true } +# TODO: Fork with ROCm support, switch to upstream once `rocm` branch from `https://github.com/dot-asm/sppark` is upstreamed +sppark = { version = "0.1.8", git = "https://github.com/autonomys/sppark", rev = "71c49160d7aa24f92c20592d2d26ef16f5400a04", optional = true } subspace-core-primitives = { version = "0.1.0", path = "../../crates/subspace-core-primitives", default-features = false, optional = true } subspace-kzg = { version = "0.1.0", path = "../subspace-kzg", optional = true } @@ -30,9 +29,10 @@ subspace-proof-of-space = { version = "0.1.0", path = "../../crates/subspace-pro cc = "1.1.23" [features] -# Only Volta+ architectures are supported (GeForce RTX 20xx consumer GPUs and newer) +# Only Volta+ architectures are supported (GeForce RTX 16xx consumer GPUs and newer) cuda = ["_gpu"] -# TODO: ROCm can't be enabled at the same time as `cuda` feature at the moment and is not exposed on library level +# TODO: ROCm can't be enabled at the same time as `cuda` feature at the moment +# Seems to support RDNA 2+, at least on Linux rocm = ["_gpu"] # Internal feature, shouldn't be used directly _gpu = [ diff --git a/shared/subspace-proof-of-space-gpu/README.md b/shared/subspace-proof-of-space-gpu/README.md index ed437f5e01..8149227dab 100644 --- a/shared/subspace-proof-of-space-gpu/README.md +++ b/shared/subspace-proof-of-space-gpu/README.md @@ -24,3 +24,8 @@ For other operating systems/platforms check official documentation: + +For compilation `NVCC=off` environment variable must be additionally used: +```bash +NVCC=off cargo build +``` diff --git a/shared/subspace-proof-of-space-gpu/build.rs b/shared/subspace-proof-of-space-gpu/build.rs index 0e9d891da8..32f9b8b3f8 100644 --- a/shared/subspace-proof-of-space-gpu/build.rs +++ b/shared/subspace-proof-of-space-gpu/build.rs @@ -3,6 +3,7 @@ // SPDX-License-Identifier: Apache-2.0 use std::env; +use std::thread::available_parallelism; fn main() { // TODO: Lift this restriction @@ -21,11 +22,37 @@ fn main() { hipcc.compiler(env::var("HIPCC").unwrap_or("hipcc".to_string())); hipcc.cpp(true); if cfg!(debug_assertions) { - hipcc.opt_level(1); + hipcc.opt_level(2); } - hipcc.flag("--offload-arch=native,gfx1100,gfx1030,gfx942,gfx90a,gfx908"); - // 6 corresponds to the number of offload-arch - hipcc.flag("-parallel-jobs=6"); + // Architectures: https://llvm.org/docs/AMDGPUUsage.html + // GCN GFX9 (CDNA) + hipcc.flag("--offload-arch=gfx908"); + // GCN GFX9 (CDNA 2) + hipcc.flag("--offload-arch=gfx90a"); + // GCN GFX9 (CDNA 3) + hipcc.flag("--offload-arch=gfx942"); + // GCN GFX10.1 (RDNA 1) dGPU + hipcc.flag("--offload-arch=gfx1010,gfx1011,gfx1012"); + // GCN GFX10.1 (RDNA 1) APU + hipcc.flag("--offload-arch=gfx1013"); + // GCN GFX10.3 (RDNA 2) dGPU + hipcc.flag("--offload-arch=gfx1030,gfx1031,gfx1032,gfx1034"); + // GCN GFX10.3 (RDNA 2) APU + hipcc.flag("--offload-arch=gfx1033,gfx1035,gfx1036"); + // GCN GFX11 (RDNA 3) dGPU + hipcc.flag("--offload-arch=gfx1100,gfx1101,gfx1102"); + // GCN GFX11 (RDNA 3) APU + hipcc.flag("--offload-arch=gfx1103,gfx1150,gfx1151"); + // Architecture is too new for hipcc 5.7.1 in stock Ubuntu repos + hipcc.flag_if_supported("--offload-arch=gfx1152"); + // GCN GFX12 (RDNA 4) dGPU + // Architecture is too new for hipcc 5.7.1 in stock Ubuntu repos + hipcc.flag_if_supported("--offload-arch=gfx1200,gfx1201"); + // Flag is too new for hipcc in stock Ubuntu repos + hipcc.flag_if_supported(format!( + "-parallel-jobs={}", + available_parallelism().unwrap() + )); // This controls how error strings get handled in the FFI. When defined error strings get // returned from the FFI, and Rust must then free them. When not defined error strings are // not returned. @@ -35,6 +62,9 @@ fn main() { hipcc.flag("-include").flag("util/cuda2hip.hpp"); } hipcc.file("src/subspace_api.cu").compile("subspace_rocm"); + + // Doesn't link otherwise + println!("cargo::rustc-link-lib=amdhip64"); } if cfg!(feature = "cuda") { @@ -57,5 +87,6 @@ fn main() { nvcc.file("src/subspace_api.cu").compile("subspace_cuda"); } + println!("cargo::rerun-if-changed=src"); println!("cargo::rerun-if-env-changed=CXXFLAGS"); } diff --git a/shared/subspace-proof-of-space-gpu/src/lib.rs b/shared/subspace-proof-of-space-gpu/src/lib.rs index 572db957bf..0b1269fb89 100644 --- a/shared/subspace-proof-of-space-gpu/src/lib.rs +++ b/shared/subspace-proof-of-space-gpu/src/lib.rs @@ -1,2 +1,4 @@ #[cfg(feature = "cuda")] pub mod cuda; +#[cfg(feature = "rocm")] +pub mod rocm; diff --git a/shared/subspace-proof-of-space-gpu/src/rocm.rs b/shared/subspace-proof-of-space-gpu/src/rocm.rs new file mode 100644 index 0000000000..3f90350573 --- /dev/null +++ b/shared/subspace-proof-of-space-gpu/src/rocm.rs @@ -0,0 +1,193 @@ +// Copyright Supranational LLC +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#[cfg(test)] +mod tests; + +use rust_kzg_blst::types::fr::FsFr; +use std::ops::DerefMut; +use subspace_core_primitives::pieces::Record; +use subspace_core_primitives::pos::{PosProof, PosSeed}; +use subspace_core_primitives::ScalarBytes; +use subspace_kzg::Scalar; + +extern "C" { + /// # Returns + /// * `usize` - The number of available GPUs. + fn gpu_count() -> usize; + + /// # Parameters + /// * `k: The size parameter for the table. + /// * `seed: A pointer to the seed data. + /// * `lg_record_size: The logarithm of the record size. + /// * `challenge_index: A mutable pointer to store the index of the challenge. + /// * `record: A pointer to the record data. + /// * `chunks_scratch: A mutable pointer to a scratch space for chunk data. + /// * `proof_count: A mutable pointer to store the count of proofs. + /// * `source_record_chunks: A mutable pointer to the source record chunks. + /// * `parity_record_chunks: A mutable pointer to the parity record chunks. + /// * `gpu_id: The ID of the GPU to use. + /// + /// # Returns + /// * `sppark::Error` - An error code indicating the result of the operation. + /// + /// # Assumptions + /// * `seed` must be a valid pointer to a 32-byte. + /// * `record` must be a valid pointer to the record data (`*const Record`), with a length of `1 << lg_record_size`. + /// * `source_record_chunks` and `parity_record_chunks` must be valid mutable pointers to `Scalar` elements, each with a length of `1 << lg_record_size`. + /// * `chunks_scratch` must be a valid mutable pointer where up to `challenges_count` 32-byte chunks of GPU-calculated data will be written. + /// * `gpu_id` must be a valid identifier of an available GPU. The available GPUs can be determined by using the `gpu_count` function. + fn generate_and_encode_pospace_dispatch( + k: u32, + seed: *const [u8; 32], + lg_record_size: u32, + challenge_index: *mut u32, + record: *const [u8; 32], + chunks_scratch: *mut [u8; 32], + proof_count: *mut u32, + parity_record_chunks: *mut FsFr, + gpu_id: i32, + ) -> sppark::Error; +} + +/// Returns [`RocmDevice`] for each available device +pub fn rocm_devices() -> Vec { + let num_devices = unsafe { gpu_count() }; + + (0i32..) + .take(num_devices) + .map(|gpu_id| RocmDevice { gpu_id }) + .collect() +} + +/// Wrapper data structure encapsulating a single ROCm-capable device +#[derive(Debug)] +pub struct RocmDevice { + gpu_id: i32, +} + +impl RocmDevice { + /// ROCm device ID + pub fn id(&self) -> i32 { + self.gpu_id + } + + /// Generates and encodes PoSpace on the GPU. + /// + /// This function performs the generation and encoding of PoSpace + /// on a GPU. It uses the specified parameters to perform the computations and + /// ensures that errors are properly handled by returning a `Result` type. + /// + /// # Parameters + /// + /// ## Input + /// + /// - `k`: The size parameter for the table. + /// - `seed`: A 32-byte seed used for the table generation process. + /// - `record`: A slice of bytes (`&[u8]`). These records are the data on which the proof of space will be generated. + /// - `gpu_id`: ID of the GPU to use. This parameter specifies which GPU to use for the computation. + /// + /// ## Output + /// + /// - `source_record_chunks`: A mutable vector of original data chunks of type FsFr, each 32 bytes in size. + /// - `parity_record_chunks`: A mutable vector of parity chunks derived from the source, each 32 bytes in size. + /// - `proof_count`: A mutable reference to the proof count. This value will be updated with the number of proofs generated. + /// - `chunks_scratch`: A mutable vector used to store the processed chunks. This vector holds the final results after combining record chunks and proof hashes. + /// - `challenge_index`: A mutable vector used to map the challenges to specific parts of the data. + pub fn generate_and_encode_pospace( + &self, + seed: &PosSeed, + record: &mut Record, + encoded_chunks_used_output: impl ExactSizeIterator>, + ) -> Result<(), String> { + let record_len = Record::NUM_CHUNKS; + let challenge_len = Record::NUM_S_BUCKETS; + let lg_record_size = record_len.ilog2(); + + if challenge_len > u32::MAX as usize { + return Err(String::from("challenge_len is too large to fit in u32")); + } + + let mut proof_count = 0u32; + let mut chunks_scratch_gpu = + Vec::<[u8; ScalarBytes::FULL_BYTES]>::with_capacity(challenge_len); + let mut challenge_index_gpu = Vec::::with_capacity(challenge_len); + let mut parity_record_chunks = Vec::::with_capacity(Record::NUM_CHUNKS); + + let error = unsafe { + generate_and_encode_pospace_dispatch( + u32::from(PosProof::K), + &**seed, + lg_record_size, + challenge_index_gpu.as_mut_ptr(), + record.as_ptr(), + chunks_scratch_gpu.as_mut_ptr(), + &mut proof_count, + Scalar::slice_mut_to_repr(&mut parity_record_chunks).as_mut_ptr(), + self.gpu_id, + ) + }; + + if error.code != 0 { + return Err(error.to_string()); + } + + let proof_count = proof_count as usize; + unsafe { + chunks_scratch_gpu.set_len(proof_count); + challenge_index_gpu.set_len(proof_count); + parity_record_chunks.set_len(Record::NUM_CHUNKS); + } + + let mut encoded_chunks_used = vec![false; challenge_len]; + let source_record_chunks = record.to_vec(); + + let mut chunks_scratch = challenge_index_gpu + .into_iter() + .zip(chunks_scratch_gpu) + .collect::>(); + + chunks_scratch + .sort_unstable_by(|(a_out_index, _), (b_out_index, _)| a_out_index.cmp(b_out_index)); + + // We don't need all the proofs + chunks_scratch.truncate(proof_count.min(Record::NUM_CHUNKS)); + + for (out_index, _chunk) in &chunks_scratch { + encoded_chunks_used[*out_index as usize] = true; + } + + encoded_chunks_used_output + .zip(&encoded_chunks_used) + .for_each(|(mut output, input)| *output = *input); + + record + .iter_mut() + .zip( + chunks_scratch + .into_iter() + .map(|(_out_index, chunk)| chunk) + .chain( + source_record_chunks + .into_iter() + .zip(parity_record_chunks) + .flat_map(|(a, b)| [a, b.to_bytes()]) + .zip(encoded_chunks_used.iter()) + // Skip chunks that were used previously + .filter_map(|(record_chunk, encoded_chunk_used)| { + if *encoded_chunk_used { + None + } else { + Some(record_chunk) + } + }), + ), + ) + .for_each(|(output_chunk, input_chunk)| { + *output_chunk = input_chunk; + }); + + Ok(()) + } +} diff --git a/shared/subspace-proof-of-space-gpu/src/rocm/tests.rs b/shared/subspace-proof-of-space-gpu/src/rocm/tests.rs new file mode 100644 index 0000000000..3b690aaa71 --- /dev/null +++ b/shared/subspace-proof-of-space-gpu/src/rocm/tests.rs @@ -0,0 +1,82 @@ +use crate::rocm::rocm_devices; +use std::num::NonZeroUsize; +use std::slice; +use subspace_core_primitives::hashes::{blake3_254_hash_to_scalar, blake3_hash}; +use subspace_core_primitives::pieces::{PieceOffset, Record}; +use subspace_core_primitives::sectors::SectorId; +use subspace_core_primitives::segments::HistorySize; +use subspace_erasure_coding::ErasureCoding; +use subspace_farmer_components::plotting::{CpuRecordsEncoder, RecordsEncoder}; +use subspace_farmer_components::sector::SectorContentsMap; +use subspace_proof_of_space::chia::ChiaTable; +use subspace_proof_of_space::Table; + +type PosTable = ChiaTable; + +#[test] +fn basic() { + let rocm_device = rocm_devices() + .into_iter() + .next() + .expect("Need ROCm device to run this test"); + + let mut table_generator = PosTable::generator(); + let erasure_coding = ErasureCoding::new( + NonZeroUsize::new(Record::NUM_S_BUCKETS.next_power_of_two().ilog2() as usize) + .expect("Not zero; qed"), + ) + .unwrap(); + let global_mutex = Default::default(); + let mut cpu_records_encoder = CpuRecordsEncoder::::new( + slice::from_mut(&mut table_generator), + &erasure_coding, + &global_mutex, + ); + + let history_size = HistorySize::ONE; + let sector_id = SectorId::new(blake3_hash(b"hello"), 500, history_size); + let mut record = Record::new_boxed(); + record + .iter_mut() + .enumerate() + .for_each(|(index, chunk)| *chunk = *blake3_254_hash_to_scalar(&index.to_le_bytes())); + + let mut cpu_encoded_records = Record::new_zero_vec(2); + for cpu_encoded_record in &mut cpu_encoded_records { + cpu_encoded_record.clone_from(&record); + } + let cpu_sector_contents_map = cpu_records_encoder + .encode_records(§or_id, &mut cpu_encoded_records, &Default::default()) + .unwrap(); + + let mut gpu_encoded_records = Record::new_zero_vec(2); + for gpu_encoded_record in &mut gpu_encoded_records { + gpu_encoded_record.clone_from(&record); + } + let mut gpu_sector_contents_map = SectorContentsMap::new(2); + rocm_device + .generate_and_encode_pospace( + §or_id.derive_evaluation_seed(PieceOffset::ZERO), + &mut gpu_encoded_records[0], + gpu_sector_contents_map + .iter_record_bitfields_mut() + .next() + .unwrap() + .iter_mut(), + ) + .unwrap(); + rocm_device + .generate_and_encode_pospace( + §or_id.derive_evaluation_seed(PieceOffset::ONE), + &mut gpu_encoded_records[1], + gpu_sector_contents_map + .iter_record_bitfields_mut() + .nth(1) + .unwrap() + .iter_mut(), + ) + .unwrap(); + + assert!(cpu_sector_contents_map == gpu_sector_contents_map); + assert!(cpu_encoded_records == gpu_encoded_records); +}