Skip to content
This repository has been archived by the owner on Aug 16, 2024. It is now read-only.

Commit

Permalink
improve device allocator memory allocation logic
Browse files Browse the repository at this point in the history
  • Loading branch information
robik75 committed Apr 29, 2024
1 parent f29d5be commit 79c5f91
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 24 deletions.
2 changes: 1 addition & 1 deletion src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ impl ProverContext {
let cuda_ctx = CudaContext::create(12, 12)?;
// grab small slice then consume everything
let small_device_alloc = SmallStaticDeviceAllocator::init()?;
let device_alloc = StaticDeviceAllocator::init(num_blocks, block_size)?;
let device_alloc = StaticDeviceAllocator::init(num_blocks, num_blocks, block_size)?;
let small_host_alloc = SmallStaticHostAllocator::init()?;
let host_alloc = StaticHostAllocator::init(1 << 8, block_size)?;
Self::create_internal(
Expand Down
65 changes: 42 additions & 23 deletions src/static_allocator/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use std::ptr::NonNull;
use std::sync::{Arc, Mutex};

pub const FREE_MEMORY_SLACK: usize = 1 << 23; // 8 MB
pub const MIN_NUM_BLOCKS: usize = 512;
pub const SMALL_ALLOCATOR_BLOCKS_COUNT: usize = 1 << 10; // 256 KB

#[derive(Derivative)]
Expand Down Expand Up @@ -164,30 +165,44 @@ impl StaticDeviceAllocator {
self.block_size_in_bytes
}

pub fn init(num_blocks: usize, block_size: usize) -> CudaResult<Self> {
assert_ne!(num_blocks, 0);
pub fn init(
max_num_blocks: usize,
min_num_blocks: usize,
block_size: usize,
) -> CudaResult<Self> {
assert_ne!(min_num_blocks, 0);
assert!(max_num_blocks >= min_num_blocks);
assert!(block_size.is_power_of_two());
let memory_size = num_blocks * block_size;
let memory_size_in_bytes = memory_size * std::mem::size_of::<F>();
let block_size_in_bytes = block_size * std::mem::size_of::<F>();

let memory = DeviceAllocation::alloc(memory_size_in_bytes).expect(&format!(
"failed to allocate {} bytes",
memory_size_in_bytes
));
let mut num_blocks = max_num_blocks;
while num_blocks >= min_num_blocks {
let memory_size = num_blocks * block_size;
let memory_size_in_bytes = memory_size * std::mem::size_of::<F>();
let block_size_in_bytes = block_size * std::mem::size_of::<F>();

let result = DeviceAllocation::alloc(memory_size_in_bytes);
let memory = match result {
Ok(memory) => memory,
Err(CudaError::ErrorMemoryAllocation) => {
num_blocks -= 1;
continue;
}
Err(e) => return Err(e),
};

println!("allocated {memory_size_in_bytes} bytes on device");
println!("allocated {memory_size_in_bytes} bytes on device");

let alloc = StaticDeviceAllocator {
memory: Arc::new(memory),
memory_size: memory_size_in_bytes,
block_size_in_bytes,
bitmap: Arc::new(Mutex::new(Self::init_bitmap(num_blocks))),
#[cfg(feature = "allocator_stats")]
stats: Default::default(),
};
let alloc = StaticDeviceAllocator {
memory: Arc::new(memory),
memory_size: memory_size_in_bytes,
block_size_in_bytes,
bitmap: Arc::new(Mutex::new(Self::init_bitmap(num_blocks))),
#[cfg(feature = "allocator_stats")]
stats: Default::default(),
};

Ok(alloc)
return Ok(alloc);
}
Err(CudaError::ErrorMemoryAllocation)
}

pub fn init_all(block_size: usize) -> CudaResult<Self> {
Expand All @@ -196,8 +211,8 @@ impl StaticDeviceAllocator {
assert!(memory_size_in_bytes >= FREE_MEMORY_SLACK);
let free_memory_size_in_bytes = memory_size_in_bytes - FREE_MEMORY_SLACK;
assert!(free_memory_size_in_bytes >= block_size);
let num_blocks = free_memory_size_in_bytes / block_size_in_bytes;
Self::init(num_blocks, block_size)
let max_num_blocks = free_memory_size_in_bytes / block_size_in_bytes;
Self::init(max_num_blocks, MIN_NUM_BLOCKS, block_size)
}

fn find_free_block(&self) -> Option<usize> {
Expand Down Expand Up @@ -370,7 +385,11 @@ impl SmallStaticDeviceAllocator {
pub fn init() -> CudaResult<Self> {
// cuda requires alignment to be multiple of 32 goldilocks elems
const BLOCK_SIZE: usize = 32;
let inner = StaticDeviceAllocator::init(SMALL_ALLOCATOR_BLOCKS_COUNT, BLOCK_SIZE)?;
let inner = StaticDeviceAllocator::init(
SMALL_ALLOCATOR_BLOCKS_COUNT,
SMALL_ALLOCATOR_BLOCKS_COUNT,
BLOCK_SIZE,
)?;
Ok(Self { inner })
}

Expand Down

0 comments on commit 79c5f91

Please sign in to comment.