diff --git a/lib/std/core/mem.c3 b/lib/std/core/mem.c3 index 305acf603..ce0533f9d 100644 --- a/lib/std/core/mem.c3 +++ b/lib/std/core/mem.c3 @@ -536,6 +536,7 @@ fn void* tmalloc(usz size, usz alignment = 0, usz offset = 0) @builtin @inline @ /** * @require $vacount < 2 : "Too many arguments." * @require $or($vacount == 0, $assignable($vaexpr(0), $Type)) : "The second argument must be an initializer for the type" + * @require $Type.alignof <= DEFAULT_MEM_ALIGNMENT : "Types with alignment exceeding the default must use 'alloc_aligned' instead" **/ macro new($Type, ...) @nodiscard { @@ -548,11 +549,40 @@ macro new($Type, ...) @nodiscard $endif } +/** + * Allocate using an aligned allocation. This is necessary for types with a default memory alignment + * exceeding DEFAULT_MEM_ALIGNMENT. IMPORTANT! It must be freed using free_aligned. + * @require $vacount < 2 : "Too many arguments." + * @require $or($vacount == 0, $assignable($vaexpr(0), $Type)) : "The second argument must be an initializer for the type" + **/ +macro new_aligned($Type, ...) @nodiscard +{ + $if $vacount == 0: + return ($Type*)calloc_aligned($Type.sizeof, $Type.alignof); + $else + $Type* val = malloc_aligned($Type.sizeof, $Type.alignof); + *val = $vaexpr(0); + return val; + $endif +} + +/** + * @require $Type.alignof <= DEFAULT_MEM_ALIGNMENT : "Types with alignment exceeding the default must use 'alloc_aligned' instead" + **/ macro alloc($Type) @nodiscard { return ($Type*)malloc($Type.sizeof); } +/** + * Allocate using an aligned allocation. This is necessary for types with a default memory alignment + * exceeding DEFAULT_MEM_ALIGNMENT. IMPORTANT! It must be freed using free_aligned. + **/ +macro alloc_aligned($Type) @nodiscard +{ + return ($Type*)malloc_aligned($Type.sizeof, $Type.alignof); +} + macro new_clear($Type) @deprecated("Use mem::new") { return new($Type); @@ -588,16 +618,42 @@ macro new_temp_clear($Type) @deprecated("use mem::temp_new") return tcalloc($Type.sizeof); } + + +/** + * @require $Type.alignof <= DEFAULT_MEM_ALIGNMENT : "Types with alignment exceeding the default must use 'new_array_aligned' instead" + **/ macro new_array($Type, usz elements) @nodiscard { return allocator::new_array(allocator::heap(), $Type, elements); } +/** + * Allocate using an aligned allocation. This is necessary for types with a default memory alignment + * exceeding DEFAULT_MEM_ALIGNMENT. IMPORTANT! It must be freed using free_aligned. + **/ +macro new_array_aligned($Type, usz elements) @nodiscard +{ + return allocator::new_array_aligned(allocator::heap(), $Type, elements); +} + +/** + * @require $Type.alignof <= DEFAULT_MEM_ALIGNMENT : "Types with alignment exceeding the default must use 'alloc_array_aligned' instead" + **/ macro alloc_array($Type, usz elements) @nodiscard { return allocator::alloc_array(allocator::heap(), $Type, elements); } +/** + * Allocate using an aligned allocation. This is necessary for types with a default memory alignment + * exceeding DEFAULT_MEM_ALIGNMENT. IMPORTANT! It must be freed using free_aligned. + **/ +macro alloc_array_aligned($Type, usz elements) @nodiscard +{ + return allocator::alloc_array(allocator::heap(), $Type, elements); +} + macro talloc_array($Type, usz elements) @nodiscard @deprecated("use mem::temp_alloc_array") { return temp_alloc_array($Type, elements); @@ -633,6 +689,11 @@ fn void* calloc(usz size) @builtin @inline @nodiscard return allocator::calloc(allocator::heap(), size); } +fn void* calloc_aligned(usz size, usz alignment) @builtin @inline @nodiscard +{ + return allocator::calloc_aligned(allocator::heap(), size, alignment)!!; +} + fn void* tcalloc(usz size, usz alignment = 0, usz offset = 0) @builtin @inline @nodiscard { return allocator::temp().acquire(size, false, alignment, offset)!!; @@ -643,11 +704,21 @@ fn void* realloc(void *ptr, usz new_size) @builtin @inline @nodiscard return allocator::realloc(allocator::heap(), ptr, new_size); } +fn void* realloc_aligned(void *ptr, usz new_size, usz alignment) @builtin @inline @nodiscard +{ + return allocator::realloc_aligned(allocator::heap(), ptr, new_size, alignment)!!; +} + fn void free(void* ptr) @builtin @inline { return allocator::free(allocator::heap(), ptr); } +fn void free_aligned(void* ptr) @builtin @inline +{ + return allocator::free_aligned(allocator::heap(), ptr); +} + fn void* trealloc(void* ptr, usz size, usz alignment = mem::DEFAULT_MEM_ALIGNMENT) @builtin @inline @nodiscard { return allocator::temp().resize(ptr, size, alignment, 0)!!; diff --git a/lib/std/core/mem_allocator.c3 b/lib/std/core/mem_allocator.c3 index 386da66d4..2a1bdc51d 100644 --- a/lib/std/core/mem_allocator.c3 +++ b/lib/std/core/mem_allocator.c3 @@ -166,11 +166,21 @@ macro new_array_try(Allocator* allocator, $Type, usz elements) @nodiscard return (($Type*)calloc_try(allocator, $Type.sizeof * elements))[:elements]; } +macro new_array_aligned(Allocator* allocator, $Type, usz elements) @nodiscard +{ + return ((Type*)calloc_aligned(allocator, $Type.sizeof * elements, $Type.alignof))[:elements]!!; +} + macro alloc_array(Allocator* allocator, $Type, usz elements) @nodiscard { return alloc_array_try(allocator, $Type, elements)!!; } +macro alloc_array_aligned(Allocator* allocator, $Type, usz elements) @nodiscard +{ + return ((Type*)malloc_aligned(allocator, $Type.sizeof * elements, $Type.alignof))[:elements]!!; +} + macro alloc_array_try(Allocator* allocator, $Type, usz elements) @nodiscard { return (($Type*)malloc_try(allocator, $Type.sizeof * elements))[:elements]; diff --git a/releasenotes.md b/releasenotes.md index 18d49f850..07e6f8f68 100644 --- a/releasenotes.md +++ b/releasenotes.md @@ -10,7 +10,7 @@ - `math::pow` will now correctly promote integer arguments. ### Stdlib changes -None +- Added `new_aligned` and `alloc_aligned` functions to prevent accidental under-alignment when allocating simd. ## 0.5.4 Change list diff --git a/src/compiler/target.c b/src/compiler/target.c index 78619dc11..f6a6dc618 100644 --- a/src/compiler/target.c +++ b/src/compiler/target.c @@ -4,7 +4,7 @@ #include "compiler_internal.h" extern void LLVMSetTargetMachineUseInitArray(LLVMTargetMachineRef ref, bool use_init_array); - +static bool x64features_contains(X86Features *cpu_features, X86Feature feature); static ObjectFormatType object_format_from_os(OsType os, ArchType arch_type); static unsigned arch_pointer_bit_width(OsType os, ArchType arch); static ArchType arch_from_llvm_string(StringSlice string); @@ -47,7 +47,7 @@ bool arch_is_wasm(ArchType type) return type == ARCH_TYPE_WASM32 || type == ARCH_TYPE_WASM64; } -static AlignSize os_arch_max_alignment_of_vector(OsType os, ArchType arch, EnvironmentType type, ARMVariant variant) +static AlignSize os_arch_max_alignment_of_vector(OsType os, ArchType arch, EnvironmentType type, ARMVariant variant, X86Features* features) { switch (arch) { @@ -71,12 +71,14 @@ static AlignSize os_arch_max_alignment_of_vector(OsType os, ArchType arch, Envir case ARCH_TYPE_X86: if (os == OS_TYPE_WIN32) /* COFF */ { - return 8192; + return 8192 / 8; } if (os_is_apple(os)) { // With AVX512 - 512, AVX - 256 otherwise AVX - 128 - return 256; + if (x64features_contains(features, X86_FEAT_AVX512F)) return 512 / 8; + if (x64features_contains(features, X86_FEAT_AVX)) return 256 / 8; + return 128 / 8; } break; default: @@ -393,6 +395,7 @@ static char *x86_feature_name[] = { [X86_FEAT_AVXVNNIINT8] = "avxvnniint8", [X86_FEAT_AVXVNNIINT16] = "avxvnniint16", }; + static X86Feature x86feature_from_string(const char *str) { for (int i = 0; i <= X86_FEATURE_LAST; i++) @@ -1928,7 +1931,11 @@ void target_setup(BuildTarget *target) platform_target.abi = ABI_UNKNOWN; break; } - platform_target.align_max_vector = os_arch_max_alignment_of_vector(platform_target.os, platform_target.arch, platform_target.environment_type, platform_target.arm.variant); + platform_target.align_max_vector = os_arch_max_alignment_of_vector(platform_target.os, + platform_target.arch, + platform_target.environment_type, + platform_target.arm.variant, + &platform_target.x64.features); platform_target.align_max_tls = os_arch_max_alignment_of_tls(platform_target.os, platform_target.arch, platform_target.environment_type);