Skip to content

Commit

Permalink
Replace Aligned with CachePadded since it's specifically designed to …
Browse files Browse the repository at this point in the history
…use the correct architecture-dependent alignment
  • Loading branch information
Pr0methean committed Jan 20, 2024
1 parent 6e05647 commit cc9be26
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 10 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ rand = "0.8.5"
rand_core = "0.6.4"
rand_chacha = "0.3.1"
log = "0.4.20"
aligned = "0.4.1"
bytemuck = "1.14.0"
crossbeam-channel = "0.5.11"
crossbeam-utils = "0.8.19"
thread-priority = "0.15.1"
thread-local-object = "0.1.0"

Expand Down
14 changes: 5 additions & 9 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
#![feature(maybe_uninit_slice)]
#![feature(maybe_uninit_as_bytes)]

use aligned::{Aligned, A64};
use bytemuck::{cast_slice_mut, Pod, Zeroable};
use core::fmt::Debug;
use core::mem::{MaybeUninit, replace, size_of};
Expand All @@ -18,26 +17,23 @@ use rand_core::block::{BlockRng64, BlockRngCore};
use rand_core::{CryptoRng, RngCore, SeedableRng};
use std::sync::{Arc, OnceLock};
use std::thread::{Builder};
use crossbeam_utils::CachePadded;
use thread_local_object::{ThreadLocal};
use thread_priority::ThreadPriority;

// Alignment is chosen to prevent "false sharing" (i.e. instance A and instance B being part of or straddling the same
// cache line, which would prevent &mut A from being used concurrently with &B or &mut B because only one CPU core can
// have a given cache line in the modified state). All modern x86, ARM, x86-64 and Aarch64 CPUs have 64-byte cache
// lines. TODO: Find a future-proof way to choose the right alignment for obscure architectures.
#[derive(Copy, Clone)]
#[repr(transparent)] // may be necessary to make Bytemuck transmutation safe
pub struct DefaultableAlignedArray<const N: usize, T>(Aligned<A64, [T; N]>);
pub struct DefaultableAlignedArray<const N: usize, T>(CachePadded<[T; N]>);

impl<const N: usize, T: Default + Copy> Default for DefaultableAlignedArray<N, T> {
fn default() -> Self {
DefaultableAlignedArray(Aligned([T::default(); N]))
DefaultableAlignedArray(CachePadded::new([T::default(); N]))
}
}

impl<const N: usize, T: Default + Copy> From<[T; N]> for DefaultableAlignedArray<N, T> {
fn from(value: [T; N]) -> Self {
Self(Aligned(value))
Self(CachePadded::new(value))
}
}

Expand Down Expand Up @@ -84,7 +80,7 @@ impl <T, U: From<T>> Drop for RecyclableVec<T, U> {

/// An RNG that reads from a shared buffer, to which only one thread per buffer will read from a seed source. It will
/// share the buffer with all of its clones. Once this and all clones have been dropped, the source-reading thread will
/// detect this using a [std::sync::Weak] reference and terminate. Since this RNG is used to implement [BlockRngCore]
/// detect this and terminate. Since this RNG is used to implement [BlockRngCore]
/// for instances of [BlockRng64], it can produce seeds of any desired size, but a `[u64; [WORDS_PER_SEED]]` will be
/// fastest.
///
Expand Down

0 comments on commit cc9be26

Please sign in to comment.