Skip to content
This repository has been archived by the owner on Aug 16, 2024. It is now read-only.

Commit

Permalink
build refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
robik75 committed Aug 4, 2024
1 parent e217aa5 commit 5f301b7
Show file tree
Hide file tree
Showing 20 changed files with 274 additions and 343 deletions.
388 changes: 119 additions & 269 deletions Cargo.lock

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,21 @@ description = "Boojum-CUDA is a library implementing GPU-accelerated cryptograph

[build-dependencies]
boojum = "=0.2.0"
cudart-sys = { version = "=0.1.0", package = "era_cudart_sys" }
cudart-sys = { git = "https://github.com/matter-labs/era-cuda.git", branch = "rr-build-refactor", package = "era_cudart_sys" }
cmake = "0.1"
itertools = "0.13"

[dependencies]
boojum = "=0.2.0"
cudart = { version = "=0.1.0", package = "era_cudart" }
cudart-sys = { version = "=0.1.0", package = "era_cudart_sys" }
cudart = { git = "https://github.com/matter-labs/era-cuda.git", branch = "rr-build-refactor", package = "era_cudart" }
cudart-sys = { git = "https://github.com/matter-labs/era-cuda.git", branch = "rr-build-refactor" , package = "era_cudart_sys" }
itertools = "0.13"
lazy_static = "1.4"

[dev-dependencies]
blake2 = "0.10"
criterion = "0.5"
criterion-cuda = { git = "https://github.com/matter-labs/era-cuda.git", branch = "main", package = "criterion-cuda" }
criterion-cuda = { git = "https://github.com/matter-labs/era-cuda.git", branch = "rr-build-refactor", package = "era_criterion_cuda" }
criterion-macro = "0.4"
itertools = "0.13"
rand = "0.8"
Expand Down
6 changes: 3 additions & 3 deletions build/gates.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ pub(super) fn generate() {
}

fn generate_cuda(descriptions: &[Description]) {
const TEMPLATE_PATH: &str = "native/gates_template.cu";
const RESULT_PATH: &str = "native/gates.cu";
const TEMPLATE_PATH: &str = "native/gate_kernels_template.cuh";
const RESULT_PATH: &str = "gate_kernels.cuh";
let mut code = String::new();
let s = &mut code;
new_line(s);
Expand Down Expand Up @@ -180,7 +180,7 @@ fn generate_cuda(descriptions: &[Description]) {

fn generate_rust(descriptions: &[Description]) {
const TEMPLATE_PATH: &str = "src/gates_data_template.rs";
const RESULT_PATH: &str = "src/gates_data.rs";
const RESULT_PATH: &str = "gates_data.rs";
let mut hash_map = String::new();
let mut bindings = String::new();
let mut mappings = String::new();
Expand Down
46 changes: 29 additions & 17 deletions build/main.rs
Original file line number Diff line number Diff line change
@@ -1,28 +1,40 @@
#![allow(incomplete_features)]
#![allow(unexpected_cfgs)]
#![feature(generic_const_exprs)]

use cudart_sys::{cuda_lib_path, cuda_path};

mod gates;
mod poseidon_constants;
mod template;

fn main() {
gates::generate();
poseidon_constants::generate();
#[cfg(target_os = "macos")]
std::process::exit(0);
let dst = cmake::Config::new("native")
.profile("Release")
.define(
"CMAKE_CUDA_ARCHITECTURES",
std::env::var("CUDAARCHS").unwrap_or("native".to_string()),
)
.build();
println!("cargo:rustc-link-search=native={}", dst.display());
println!("cargo:rustc-link-lib=static=boojum-cuda-native");
println!("cargo:rustc-link-search=native={}", cuda_lib_path!());
println!("cargo:rustc-link-lib=cudart");
#[cfg(target_os = "linux")]
println!("cargo:rustc-link-lib=stdc++");
println!("cargo::rustc-check-cfg=cfg(no_cuda)");
#[cfg(no_cuda)]
{
println!("cargo::warning={}", cudart_sys::no_cuda_message!());
}
#[cfg(not(no_cuda))]
{
use cudart_sys::{get_cuda_lib_path, get_cuda_version};
use std::env::var;
let cuda_version = get_cuda_version().expect("Failed to determine CUDA version");
if !cuda_version.starts_with("12.") {
println!("cargo::warning=CUDA version {cuda_version} detected. This crate is only tested with CUDA 12.*.");
}
let cudaarchs = var("CUDAARCHS").unwrap_or("native".to_string());
let dst = cmake::Config::new("native")
.profile("Release")
.define("CMAKE_CUDA_ARCHITECTURES", cudaarchs)
.build();
let boojum_lib_path = dst.to_str().unwrap();
println!("cargo:rustc-link-search=native={boojum_lib_path}");
println!("cargo:rustc-link-lib=static=boojum-cuda-native");
let cuda_lib_path = get_cuda_lib_path().unwrap();
let cuda_lib_path_str = cuda_lib_path.to_str().unwrap();
println!("cargo:rustc-link-search=native={cuda_lib_path_str}");
println!("cargo:rustc-link-lib=cudart");
#[cfg(target_os = "linux")]
println!("cargo:rustc-link-lib=stdc++");
}
}
2 changes: 1 addition & 1 deletion build/poseidon_constants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use boojum::implementations::poseidon_goldilocks_params::*;
// use itertools::Itertools;

const TEMPLATE_PATH: &str = "native/poseidon_constants_template.cuh";
const RESULT_PATH: &str = "native/poseidon_constants.cuh";
const RESULT_PATH: &str = "poseidon_constants.cuh";

fn split_u64(value: u64) -> (u32, u32) {
let lo = value as u32;
Expand Down
8 changes: 6 additions & 2 deletions build/template.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use std::env::var;
use std::fs;
use std::path::Path;

const PREFIX: char = '%';
const SUFFIX: char = '%';
Expand All @@ -11,8 +13,10 @@ pub(crate) fn generate(replacements: &[(&str, String)], template_path: &str, res
from.push(SUFFIX);
text = text.replace(&from, value);
}
let current = fs::read_to_string(result_path).unwrap_or_default();
let out_dir = var("OUT_DIR").unwrap();
let result_path = Path::new(&out_dir).join(result_path);
let current = fs::read_to_string(&result_path).unwrap_or_default();
if !text.eq(&current) {
fs::write(result_path, text).unwrap();
fs::write(&result_path, text).unwrap();
}
}
2 changes: 0 additions & 2 deletions native/.gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1 @@
/cmake-build-*/
/gates.cu
/poseidon_constants.cuh
16 changes: 9 additions & 7 deletions native/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,41 +5,43 @@ if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
set(CMAKE_CUDA_ARCHITECTURES native)
endif ()
add_library(boojum-cuda-native STATIC
ops_cub/common.cuh
ops_cub/device_radix_sort.cu
ops_cub/device_reduce.cu
ops_cub/device_run_length_encode.cu
ops_cub/device_scan.cu
barycentric.cu
blake2s.cu
carry_chain.cuh
common.cuh
context.cu
context.cuh
${CMAKE_INSTALL_PREFIX}/gate_kernels.cuh
gates.cu
gates.cuh
gates_poseidon.cuh
goldilocks.cuh
goldilocks_extension.cuh
goldilocks_extension.cu
goldilocks_extension.cuh
memory.cuh
ntt.cu
ntt_b2n.cuh
ntt_n2b.cuh
ops_complex.cu
ops_complex.cuh
ops_cub/common.cuh
ops_cub/device_radix_sort.cu
ops_cub/device_reduce.cu
ops_cub/device_run_length_encode.cu
ops_cub/device_scan.cu
ops_simple.cu
poseidon2_cooperative.cu
poseidon2_single_thread.cu
poseidon2_single_thread.cuh
poseidon_common.cu
poseidon_constants.cuh
${CMAKE_INSTALL_PREFIX}/poseidon_constants.cuh
poseidon_cooperative.cu
poseidon_single_thread.cu
poseidon_single_thread.cuh
poseidon_utils.cuh
ptx.cuh
)
target_include_directories(boojum-cuda-native PRIVATE ${CMAKE_INSTALL_PREFIX})
set_target_properties(boojum-cuda-native PROPERTIES CUDA_STANDARD 17)
set_target_properties(boojum-cuda-native PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
set_target_properties(boojum-cuda-native PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON)
Expand Down
3 changes: 3 additions & 0 deletions native/gate_kernels_template.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
namespace gates {
%CODE%
} // namespace gates
4 changes: 4 additions & 0 deletions native/gates.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#include "gates.cuh"
#include "gates_poseidon.cuh"
// do not reorder includes
#include "gate_kernels.cuh"
6 changes: 0 additions & 6 deletions native/gates_template.cu

This file was deleted.

2 changes: 0 additions & 2 deletions native/poseidon_constants_template.cuh
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
// clang-format off
#pragma once

#include "goldilocks.cuh"

namespace poseidon_common {

using namespace goldilocks;
Expand Down
1 change: 0 additions & 1 deletion src/.gitignore

This file was deleted.

20 changes: 11 additions & 9 deletions src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use boojum::field::{Field, PrimeField};
use cudart::memory::{memory_copy, DeviceAllocation};
use cudart::result::{CudaResult, CudaResultWrap};
use cudart::slice::DeviceSlice;
use cudart_sys::{cudaMemcpyToSymbol, CudaMemoryCopyKind};
use cudart_sys::{cudaMemcpyToSymbol, cuda_struct_and_stub, CudaMemoryCopyKind};
use std::mem::size_of;
use std::os::raw::c_void;

Expand All @@ -29,6 +29,8 @@ impl PowersLayerData {
}
}

unsafe impl Sync for PowersLayerData {}

#[repr(C)]
struct PowersData {
fine: PowersLayerData,
Expand All @@ -48,14 +50,14 @@ impl PowersData {
}
}

extern "C" {
static powers_data_w: PowersData;
static powers_data_w_bitrev_for_ntt: PowersData;
static powers_data_w_inv_bitrev_for_ntt: PowersData;
static powers_data_g_f: PowersData;
static powers_data_g_i: PowersData;
static inv_sizes: [GoldilocksField; OMEGA_LOG_ORDER as usize + 1];
}
unsafe impl Sync for PowersData {}

cuda_struct_and_stub! { static powers_data_w: PowersData; }
cuda_struct_and_stub! { static powers_data_w_bitrev_for_ntt: PowersData; }
cuda_struct_and_stub! { static powers_data_w_inv_bitrev_for_ntt: PowersData; }
cuda_struct_and_stub! { static powers_data_g_f: PowersData; }
cuda_struct_and_stub! { static powers_data_g_i: PowersData; }
cuda_struct_and_stub! { static inv_sizes: [GoldilocksField; OMEGA_LOG_ORDER as usize + 1]; }

unsafe fn copy_to_symbol<T>(symbol: &T, src: &T) -> CudaResult<()> {
cudaMemcpyToSymbol(
Expand Down
2 changes: 1 addition & 1 deletion src/gates.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ struct GateData {
kernel: GateEvalSignature,
}

include!("gates_data.rs");
include!(concat!(env!("OUT_DIR"), "/gates_data.rs"));

pub fn find_gate_id_by_name(name: &str) -> Option<u32> {
HASH_MAP.get(name).copied()
Expand Down
2 changes: 1 addition & 1 deletion src/ops_complex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1485,7 +1485,7 @@ mod tests {
let stream = CudaStream::default();
let base_ef =
ExtensionField::from_coeff_in_base([GoldilocksField(42), GoldilocksField(42)]);
let base_vf = unsafe { mem::transmute(base_ef) };
let base_vf = unsafe { mem::transmute::<ExtensionField, EF>(base_ef) };
let mut d_base = DeviceAllocation::alloc(1).unwrap();
memory_copy_async(&mut d_base, &[base_vf], &stream).unwrap();
let b = &d_base[0];
Expand Down
26 changes: 23 additions & 3 deletions src/ops_cub/device_radix_sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ use boojum::field::goldilocks::GoldilocksField;
use cudart::result::{CudaResult, CudaResultWrap};
use cudart::slice::DeviceSlice;
use cudart::stream::CudaStream;
use cudart_sys::{cudaError_t, cudaStream_t};
use cudart_sys::{cudaError_t, cudaStream_t, cuda_fn_and_stub};

extern "C" {
cuda_fn_and_stub! {
fn sort_keys_u32(
d_temp_storage: *mut u8,
temp_storage_bytes: &mut usize,
Expand All @@ -18,7 +18,9 @@ extern "C" {
end_bit: i32,
stream: cudaStream_t,
) -> cudaError_t;
}

cuda_fn_and_stub! {
fn sort_keys_descending_u32(
d_temp_storage: *mut u8,
temp_storage_bytes: &mut usize,
Expand All @@ -29,7 +31,9 @@ extern "C" {
end_bit: i32,
stream: cudaStream_t,
) -> cudaError_t;
}

cuda_fn_and_stub! {
fn sort_keys_u64(
d_temp_storage: *mut u8,
temp_storage_bytes: &mut usize,
Expand All @@ -40,7 +44,9 @@ extern "C" {
end_bit: i32,
stream: cudaStream_t,
) -> cudaError_t;
}

cuda_fn_and_stub! {
fn sort_keys_descending_u64(
d_temp_storage: *mut u8,
temp_storage_bytes: &mut usize,
Expand Down Expand Up @@ -200,7 +206,7 @@ pub fn sort_keys<T: SortKeys>(
)
}

extern "C" {
cuda_fn_and_stub! {
fn sort_pairs_u32_by_u32(
d_temp_storage: *mut u8,
temp_storage_bytes: &mut usize,
Expand All @@ -213,7 +219,9 @@ extern "C" {
end_bit: i32,
stream: cudaStream_t,
) -> cudaError_t;
}

cuda_fn_and_stub! {
fn sort_pairs_descending_u32_by_u32(
d_temp_storage: *mut u8,
temp_storage_bytes: &mut usize,
Expand All @@ -226,7 +234,9 @@ extern "C" {
end_bit: i32,
stream: cudaStream_t,
) -> cudaError_t;
}

cuda_fn_and_stub! {
fn sort_pairs_u32_by_u64(
d_temp_storage: *mut u8,
temp_storage_bytes: &mut usize,
Expand All @@ -239,7 +249,9 @@ extern "C" {
end_bit: i32,
stream: cudaStream_t,
) -> cudaError_t;
}

cuda_fn_and_stub! {
fn sort_pairs_descending_u32_by_u64(
d_temp_storage: *mut u8,
temp_storage_bytes: &mut usize,
Expand All @@ -252,7 +264,9 @@ extern "C" {
end_bit: i32,
stream: cudaStream_t,
) -> cudaError_t;
}

cuda_fn_and_stub! {
fn sort_pairs_u64_by_u32(
d_temp_storage: *mut u8,
temp_storage_bytes: &mut usize,
Expand All @@ -265,7 +279,9 @@ extern "C" {
end_bit: i32,
stream: cudaStream_t,
) -> cudaError_t;
}

cuda_fn_and_stub! {
fn sort_pairs_descending_u64_by_u32(
d_temp_storage: *mut u8,
temp_storage_bytes: &mut usize,
Expand All @@ -278,7 +294,9 @@ extern "C" {
end_bit: i32,
stream: cudaStream_t,
) -> cudaError_t;
}

cuda_fn_and_stub! {
fn sort_pairs_u64_by_u64(
d_temp_storage: *mut u8,
temp_storage_bytes: &mut usize,
Expand All @@ -291,7 +309,9 @@ extern "C" {
end_bit: i32,
stream: cudaStream_t,
) -> cudaError_t;
}

cuda_fn_and_stub! {
fn sort_pairs_descending_u64_by_u64(
d_temp_storage: *mut u8,
temp_storage_bytes: &mut usize,
Expand Down
Loading

0 comments on commit 5f301b7

Please sign in to comment.