From 3c570a1ab95e7c175931d4978e70891c0b6c9114 Mon Sep 17 00:00:00 2001 From: Jacob Trombetta Date: Fri, 27 Sep 2024 13:41:28 -0400 Subject: [PATCH] build: downgrade CUDA toolkit (PROOF-916) (#182) * downgrade CUDA toolkit * make _VSTD workaround * fix format * remove extra line * fix format * add sxt/base/device/cub to isolate workaround --- README.md | 4 +- nix/cuda.nix | 5 +-- sxt/algorithm/block/BUILD | 2 +- sxt/algorithm/block/runlength_count.h | 2 +- sxt/base/device/BUILD | 8 ++++ sxt/base/device/cub.cc | 17 ++++++++ sxt/base/device/cub.h | 40 +++++++++++++++++++ sxt/multiexp/base/BUILD | 2 +- sxt/multiexp/base/scalar_array.cc | 2 +- sxt/multiexp/bucket_method2/BUILD | 2 +- .../bucket_method2/multiproduct_table.cc | 2 +- .../multiproduct_table_kernel.h | 2 +- 12 files changed, 76 insertions(+), 12 deletions(-) create mode 100644 sxt/base/device/cub.cc create mode 100644 sxt/base/device/cub.h diff --git a/README.md b/README.md index 2323dd6f..044c9ab8 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ - CUDA + CUDA @@ -182,7 +182,7 @@ See the [example](./example) folder for some examples. Prerequisites: * `x86_64` Linux instance. * Nix with flake support (check out [The Determinate Nix Installer](https://github.com/DeterminateSystems/nix-installer)) -* Nvidia GPU capable of running CUDA 12.6 code. +* Nvidia GPU capable of running CUDA 12.4.1 code. From your terminal, run the following command in the root of the source directory to set up a build environment. diff --git a/nix/cuda.nix b/nix/cuda.nix index 832e5f1a..dd277e74 100644 --- a/nix/cuda.nix +++ b/nix/cuda.nix @@ -7,9 +7,8 @@ with pkgs; pkgs.stdenvNoCC.mkDerivation { name = "cudatoolkit"; src = fetchurl { - url = "https://developer.download.nvidia.com/compute/cuda/12.6.0/local_installers/cuda_12.6.0_560.28.03_linux.run"; - hash = "sha256-MasEOU5psU3YZW4rRMKHfbGg6Jjf+KdUakxihDgQG5Q="; - + url = "https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_12.4.1_550.54.15_linux.run"; + sha256 = "367d2299b3a4588ab487a6d27276ca5d9ead6e394904f18bccb9e12433b9c4fb"; }; patches = [ # patch host_defines.h to work with libc++ diff --git a/sxt/algorithm/block/BUILD b/sxt/algorithm/block/BUILD index 0fe468c0..41487c91 100644 --- a/sxt/algorithm/block/BUILD +++ b/sxt/algorithm/block/BUILD @@ -10,8 +10,8 @@ sxt_cc_component( "//sxt/memory/resource:managed_device_resource", ], deps = [ + "//sxt/base/device:cub", "//sxt/base/device:synchronization", "//sxt/base/macro:cuda_callable", - "@local_cuda//:cub", ], ) diff --git a/sxt/algorithm/block/runlength_count.h b/sxt/algorithm/block/runlength_count.h index 89cb1deb..13a8b869 100644 --- a/sxt/algorithm/block/runlength_count.h +++ b/sxt/algorithm/block/runlength_count.h @@ -16,7 +16,7 @@ */ #pragma once -#include "cub/cub.cuh" +#include "sxt/base/device/cub.h" #include "sxt/base/macro/cuda_callable.h" namespace sxt::algbk { diff --git a/sxt/base/device/BUILD b/sxt/base/device/BUILD index 786b1360..e5cf6dc8 100644 --- a/sxt/base/device/BUILD +++ b/sxt/base/device/BUILD @@ -14,6 +14,14 @@ sxt_cc_component( ], ) +sxt_cc_component( + name = "cub", + with_test = False, + deps = [ + "@local_cuda//:cub", + ], +) + sxt_cc_component( name = "pointer_attributes", with_test = False, diff --git a/sxt/base/device/cub.cc b/sxt/base/device/cub.cc new file mode 100644 index 00000000..11631ba7 --- /dev/null +++ b/sxt/base/device/cub.cc @@ -0,0 +1,17 @@ +/** Proofs GPU - Space and Time's cryptographic proof algorithms on the CPU and GPU. + * + * Copyright 2024-present Space and Time Labs, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "sxt/base/device/cub.h" diff --git a/sxt/base/device/cub.h b/sxt/base/device/cub.h new file mode 100644 index 00000000..da62a43a --- /dev/null +++ b/sxt/base/device/cub.h @@ -0,0 +1,40 @@ +/** Proofs GPU - Space and Time's cryptographic proof algorithms on the CPU and GPU. + * + * Copyright 2024-present Space and Time Labs, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +/* + * This is a workaround to define _VSTD before including cub/cub.cuh. + * It should be removed when we can upgrade to a newer version of CUDA. + * + * We need to define _VSTD in order to use the clang version defined in + * clang.nix and the CUDA toolkit version defined in cuda.nix. + * + * _VSTD was deprecated and removed from the LLVM truck. + * NVIDIA: https://github.com/NVIDIA/cccl/pull/1331 + * LLVM: https://github.com/llvm/llvm-project/commit/683bc94e1637bd9bacc978f5dc3c79cfc8ff94b9 + * + * We cannot currently use any CUDA toolkit above 12.4.1 because the Kubernetes + * cluster currently cannot install a driver above 550. + * + * See CUDA toolkit and driver support: + * https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html + */ +#include <__config> + +#define _VSTD std::_LIBCPP_ABI_NAMESPACE + +#include "cub/cub.cuh" diff --git a/sxt/multiexp/base/BUILD b/sxt/multiexp/base/BUILD index 533c2cdc..5f294417 100644 --- a/sxt/multiexp/base/BUILD +++ b/sxt/multiexp/base/BUILD @@ -46,8 +46,8 @@ sxt_cc_component( sxt_cc_component( name = "scalar_array", impl_deps = [ - "@local_cuda//:cub", "//sxt/base/container:span_utility", + "//sxt/base/device:cub", "//sxt/base/device:memory_utility", "//sxt/base/device:stream", "//sxt/base/error:assert", diff --git a/sxt/multiexp/base/scalar_array.cc b/sxt/multiexp/base/scalar_array.cc index c0f95de7..d933505c 100644 --- a/sxt/multiexp/base/scalar_array.cc +++ b/sxt/multiexp/base/scalar_array.cc @@ -18,8 +18,8 @@ #include -#include "cub/cub.cuh" #include "sxt/base/container/span_utility.h" +#include "sxt/base/device/cub.h" #include "sxt/base/device/memory_utility.h" #include "sxt/base/device/stream.h" #include "sxt/base/num/ceil_log2.h" diff --git a/sxt/multiexp/bucket_method2/BUILD b/sxt/multiexp/bucket_method2/BUILD index f03c32c0..01dc8368 100644 --- a/sxt/multiexp/bucket_method2/BUILD +++ b/sxt/multiexp/bucket_method2/BUILD @@ -93,8 +93,8 @@ sxt_cc_component( deps = [ ":constants", "//sxt/algorithm/block:runlength_count", + "//sxt/base/device:cub", "//sxt/base/device:stream", - "@local_cuda//:cub", ], ) diff --git a/sxt/multiexp/bucket_method2/multiproduct_table.cc b/sxt/multiexp/bucket_method2/multiproduct_table.cc index 767c8225..720a5f42 100644 --- a/sxt/multiexp/bucket_method2/multiproduct_table.cc +++ b/sxt/multiexp/bucket_method2/multiproduct_table.cc @@ -16,8 +16,8 @@ */ #include "sxt/multiexp/bucket_method2/multiproduct_table.h" -#include "cub/cub.cuh" #include "sxt/algorithm/iteration/for_each.h" +#include "sxt/base/device/cub.h" #include "sxt/base/device/memory_utility.h" #include "sxt/base/device/stream.h" #include "sxt/base/log/log.h" diff --git a/sxt/multiexp/bucket_method2/multiproduct_table_kernel.h b/sxt/multiexp/bucket_method2/multiproduct_table_kernel.h index 9b828932..ac993035 100644 --- a/sxt/multiexp/bucket_method2/multiproduct_table_kernel.h +++ b/sxt/multiexp/bucket_method2/multiproduct_table_kernel.h @@ -18,8 +18,8 @@ #include -#include "cub/cub.cuh" #include "sxt/algorithm/block/runlength_count.h" +#include "sxt/base/device/cub.h" #include "sxt/base/device/stream.h" #include "sxt/base/num/constexpr_switch.h" #include "sxt/base/num/divide_up.h"