Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better to/from slice operations #571

Merged
merged 1 commit into from
Oct 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions codegen/templates/vec.rs.tera
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,7 @@ impl {{ self_t }} {
#[inline]
#[must_use]
pub const fn from_slice(slice: &[{{ scalar_t }}]) -> Self {
assert!(slice.len() >= {{ dim }});
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

accessing slice by index will also check that the index is in range, are you suggesting that adding this here makes the compiler elide the other changes, is this one of the operations that you were looking at generated code for?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes! Range check is performed only once now. For this code compiler output before:

example::from_slice_slow::hd394bd4d83252b04:
        push    rax
        test    rdx, rdx
        je      .LBB2_4
        cmp     rdx, 1
        je      .LBB2_5
        cmp     rdx, 2
        jbe     .LBB2_3
        movss   xmm0, dword ptr [rsi]
        movss   dword ptr [rdi], xmm0
        movsd   xmm0, qword ptr [rsi + 4]
        movsd   qword ptr [rdi + 4], xmm0
        mov     rax, rdi
        pop     rcx
        ret
...

after:

example::from_slice_fast::h47cd007525b38d49:
        cmp     rdx, 2
        jbe     .LBB3_2
        movss   xmm0, dword ptr [rsi + 8]
        movsd   xmm1, qword ptr [rsi]
        movsd   qword ptr [rdi], xmm1
        movss   dword ptr [rdi + 8], xmm0
        mov     rax, rdi
        ret
...

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I just noticed the godbolt link.

Godbolt might not tell the full story though as there are different implementations of glam vectors, using simd in some cases and floats in others. It's better to look at the generated assembly. You can use cargo asm, because most things are inlined it's usually necessary to add public test functions that call glam functions and look at the asm for those.

Copy link
Contributor Author

@stepancheg stepancheg Oct 21, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR does not change code for SIMD versions, only code for working with regular slice.

Even if code is inlined, the same branches stay, because it is observable behavior:

  • different error messages before this PR for different indices
  • on write to slice, data was partially written before panic

Anyway, looking at cargo asm glam::test_xxx for Vec3A on my Apple laptop.

Code:

pub fn test_write(a: Vec3A, target: &mut [f32]) {
    a.write_to_slice(target);
}

pub fn test_read(a: &[f32]) -> Vec3A {
    Vec3A::from_slice(a)
}

Before:

glam::test_write:
 stp     x29, x30, [sp, #-16]!
 mov     x29, sp
 cbz     x2, LBB98_4
 ldr     q0, [x0]
 str     s0, [x1]
 cmp     x2, #1
 b.eq    LBB98_5
 add     x8, x1, #4
 st1.s   {, v0, }[1], [x8]
 cmp     x2, #2
 b.ls    LBB98_6
 add     x8, x1, #8
 st1.s   {, v0, }[2], [x8]
 ldp     x29, x30, [sp], #16
 ret
LBB98_4:
Lloh662:
 adrp    x2, l___unnamed_75@PAGE
Lloh663:
 add     x2, x2, l___unnamed_75@PAGEOFF
 mov     x0, #0
 mov     x1, #0
 bl      __ZN4core9panicking18panic_bounds_check17h47e687fd4a18b35fE
LBB98_5:
Lloh664:
 adrp    x2, l___unnamed_76@PAGE
Lloh665:
 add     x2, x2, l___unnamed_76@PAGEOFF
 mov     w0, #1
 mov     w1, #1
 bl      __ZN4core9panicking18panic_bounds_check17h47e687fd4a18b35fE
LBB98_6:
Lloh666:
 adrp    x2, l___unnamed_77@PAGE
Lloh667:
 add     x2, x2, l___unnamed_77@PAGEOFF
 mov     w0, #2
 mov     w1, #2
 bl      __ZN4core9panicking18panic_bounds_check17h47e687fd4a18b35fE

glam::test_read:
 stp     x29, x30, [sp, #-16]!
 mov     x29, sp
 cbz     x1, LBB99_4
 cmp     x1, #1
 b.eq    LBB99_5
 cmp     x1, #2
 b.ls    LBB99_6
 ldr     s0, [x0]
 add     x9, x0, #4
 ld1.s   {, v0, }[1], [x9]
 ldr     s1, [x0, #8]
 mov.s   v0[2], v1[0]
 mov.s   v0[3], v1[0]
 str     q0, [x8]
 ldp     x29, x30, [sp], #16
 ret
LBB99_4:
Lloh668:
 adrp    x2, l___unnamed_78@PAGE
Lloh669:
 add     x2, x2, l___unnamed_78@PAGEOFF
 mov     x0, #0
 bl      __ZN4core9panicking18panic_bounds_check17h47e687fd4a18b35fE
LBB99_5:
Lloh670:
 adrp    x2, l___unnamed_79@PAGE
Lloh671:
 add     x2, x2, l___unnamed_79@PAGEOFF
 mov     w0, #1
 bl      __ZN4core9panicking18panic_bounds_check17h47e687fd4a18b35fE
LBB99_6:
Lloh672:
 adrp    x2, l___unnamed_80@PAGE
Lloh673:
 add     x2, x2, l___unnamed_80@PAGEOFF
 mov     w0, #2
 mov     w1, #2
 bl      __ZN4core9panicking18panic_bounds_check17h47e687fd4a18b35fE

After:

glam::test_write:
 cmp     x2, #3
 b.ne    LBB98_2
 ldr     q0, [x0]
 add     x8, x1, #8
 st1.s   {, v0, }[2], [x8]
 str     d0, [x1]
 ret
LBB98_2:
 stp     x29, x30, [sp, #-16]!
 mov     x29, sp
Lloh662:
 adrp    x8, l___unnamed_75@PAGE
Lloh663:
 add     x8, x8, l___unnamed_75@PAGEOFF
 mov     x0, x2
 mov     w1, #3
 mov     x2, x8
 bl      __ZN4core5slice29_$LT$impl$u20$$u5b$T$u5d$$GT$15copy_from_slice17len_mismatch_fail17h7212f66e1542c208E

glam::test_read:
 cmp     x1, #2
 b.ls    LBB99_2
 ldr     d0, [x0]
 ldr     s1, [x0, #8]
 mov.s   v0[2], v1[0]
 mov.s   v0[3], v1[0]
 str     q0, [x8]
 ret
LBB99_2:
 stp     x29, x30, [sp, #-16]!
 mov     x29, sp
Lloh664:
 adrp    x0, l___unnamed_76@PAGE
Lloh665:
 add     x0, x0, l___unnamed_76@PAGEOFF
Lloh666:
 adrp    x2, l___unnamed_77@PAGE
Lloh667:
 add     x2, x2, l___unnamed_77@PAGEOFF
 mov     w1, #34
 bl      __ZN4core9panicking5panic17hae814ca9668aac2fE

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for checking. I was mostly wondering if to_array resulted in anything weird with underlying simd types since it will be dereferencing all the vector elements, seems fine though.

Self::new(
{% for c in components %}
slice[{{ loop.index0 }}],
Expand All @@ -504,9 +505,7 @@ impl {{ self_t }} {
assert!(slice.len() >= 4);
unsafe { vst1q_f32(slice.as_mut_ptr(), self.0); }
{% else %}
{% for c in components %}
slice[{{ loop.index0 }}] = self.{{ c }};
{%- endfor %}
slice.copy_from_slice(&self.to_array());
{% endif %}
}

Expand Down
5 changes: 2 additions & 3 deletions src/f32/coresimd/vec3a.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ impl Vec3A {
#[inline]
#[must_use]
pub const fn from_slice(slice: &[f32]) -> Self {
assert!(slice.len() >= 3);
Self::new(slice[0], slice[1], slice[2])
}

Expand All @@ -142,9 +143,7 @@ impl Vec3A {
/// Panics if `slice` is less than 3 elements long.
#[inline]
pub fn write_to_slice(self, slice: &mut [f32]) {
slice[0] = self.x;
slice[1] = self.y;
slice[2] = self.z;
slice.copy_from_slice(&self.to_array());
}

/// Creates a [`Vec3A`] from the `x`, `y` and `z` elements of `self` discarding `w`.
Expand Down
6 changes: 2 additions & 4 deletions src/f32/coresimd/vec4.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ impl Vec4 {
#[inline]
#[must_use]
pub const fn from_slice(slice: &[f32]) -> Self {
assert!(slice.len() >= 4);
Self::new(slice[0], slice[1], slice[2], slice[3])
}

Expand All @@ -144,10 +145,7 @@ impl Vec4 {
/// Panics if `slice` is less than 4 elements long.
#[inline]
pub fn write_to_slice(self, slice: &mut [f32]) {
slice[0] = self.x;
slice[1] = self.y;
slice[2] = self.z;
slice[3] = self.w;
slice.copy_from_slice(&self.to_array());
}

/// Creates a 3D vector from the `x`, `y` and `z` elements of `self`, discarding `w`.
Expand Down
5 changes: 2 additions & 3 deletions src/f32/neon/vec3a.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ impl Vec3A {
#[inline]
#[must_use]
pub const fn from_slice(slice: &[f32]) -> Self {
assert!(slice.len() >= 3);
Self::new(slice[0], slice[1], slice[2])
}

Expand All @@ -147,9 +148,7 @@ impl Vec3A {
/// Panics if `slice` is less than 3 elements long.
#[inline]
pub fn write_to_slice(self, slice: &mut [f32]) {
slice[0] = self.x;
slice[1] = self.y;
slice[2] = self.z;
slice.copy_from_slice(&self.to_array());
}

/// Creates a [`Vec3A`] from the `x`, `y` and `z` elements of `self` discarding `w`.
Expand Down
1 change: 1 addition & 0 deletions src/f32/neon/vec4.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ impl Vec4 {
#[inline]
#[must_use]
pub const fn from_slice(slice: &[f32]) -> Self {
assert!(slice.len() >= 4);
Self::new(slice[0], slice[1], slice[2], slice[3])
}

Expand Down
5 changes: 2 additions & 3 deletions src/f32/scalar/vec3a.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ impl Vec3A {
#[inline]
#[must_use]
pub const fn from_slice(slice: &[f32]) -> Self {
assert!(slice.len() >= 3);
Self::new(slice[0], slice[1], slice[2])
}

Expand All @@ -149,9 +150,7 @@ impl Vec3A {
/// Panics if `slice` is less than 3 elements long.
#[inline]
pub fn write_to_slice(self, slice: &mut [f32]) {
slice[0] = self.x;
slice[1] = self.y;
slice[2] = self.z;
slice.copy_from_slice(&self.to_array());
}

/// Creates a [`Vec3A`] from the `x`, `y` and `z` elements of `self` discarding `w`.
Expand Down
6 changes: 2 additions & 4 deletions src/f32/scalar/vec4.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ impl Vec4 {
#[inline]
#[must_use]
pub const fn from_slice(slice: &[f32]) -> Self {
assert!(slice.len() >= 4);
Self::new(slice[0], slice[1], slice[2], slice[3])
}

Expand All @@ -168,10 +169,7 @@ impl Vec4 {
/// Panics if `slice` is less than 4 elements long.
#[inline]
pub fn write_to_slice(self, slice: &mut [f32]) {
slice[0] = self.x;
slice[1] = self.y;
slice[2] = self.z;
slice[3] = self.w;
slice.copy_from_slice(&self.to_array());
}

/// Creates a 3D vector from the `x`, `y` and `z` elements of `self`, discarding `w`.
Expand Down
5 changes: 2 additions & 3 deletions src/f32/sse2/vec3a.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ impl Vec3A {
#[inline]
#[must_use]
pub const fn from_slice(slice: &[f32]) -> Self {
assert!(slice.len() >= 3);
Self::new(slice[0], slice[1], slice[2])
}

Expand All @@ -155,9 +156,7 @@ impl Vec3A {
/// Panics if `slice` is less than 3 elements long.
#[inline]
pub fn write_to_slice(self, slice: &mut [f32]) {
slice[0] = self.x;
slice[1] = self.y;
slice[2] = self.z;
slice.copy_from_slice(&self.to_array());
}

/// Creates a [`Vec3A`] from the `x`, `y` and `z` elements of `self` discarding `w`.
Expand Down
1 change: 1 addition & 0 deletions src/f32/sse2/vec4.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ impl Vec4 {
#[inline]
#[must_use]
pub const fn from_slice(slice: &[f32]) -> Self {
assert!(slice.len() >= 4);
Self::new(slice[0], slice[1], slice[2], slice[3])
}

Expand Down
4 changes: 2 additions & 2 deletions src/f32/vec2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ impl Vec2 {
#[inline]
#[must_use]
pub const fn from_slice(slice: &[f32]) -> Self {
assert!(slice.len() >= 2);
Self::new(slice[0], slice[1])
}

Expand All @@ -133,8 +134,7 @@ impl Vec2 {
/// Panics if `slice` is less than 2 elements long.
#[inline]
pub fn write_to_slice(self, slice: &mut [f32]) {
slice[0] = self.x;
slice[1] = self.y;
slice.copy_from_slice(&self.to_array());
}

/// Creates a 3D vector from `self` and the given `z` value.
Expand Down
5 changes: 2 additions & 3 deletions src/f32/vec3.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ impl Vec3 {
#[inline]
#[must_use]
pub const fn from_slice(slice: &[f32]) -> Self {
assert!(slice.len() >= 3);
Self::new(slice[0], slice[1], slice[2])
}

Expand All @@ -140,9 +141,7 @@ impl Vec3 {
/// Panics if `slice` is less than 3 elements long.
#[inline]
pub fn write_to_slice(self, slice: &mut [f32]) {
slice[0] = self.x;
slice[1] = self.y;
slice[2] = self.z;
slice.copy_from_slice(&self.to_array());
}

/// Internal method for creating a 3D vector from a 4D vector, discarding `w`.
Expand Down
5 changes: 2 additions & 3 deletions src/f32/wasm32/vec3a.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ impl Vec3A {
#[inline]
#[must_use]
pub const fn from_slice(slice: &[f32]) -> Self {
assert!(slice.len() >= 3);
Self::new(slice[0], slice[1], slice[2])
}

Expand All @@ -141,9 +142,7 @@ impl Vec3A {
/// Panics if `slice` is less than 3 elements long.
#[inline]
pub fn write_to_slice(self, slice: &mut [f32]) {
slice[0] = self.x;
slice[1] = self.y;
slice[2] = self.z;
slice.copy_from_slice(&self.to_array());
}

/// Creates a [`Vec3A`] from the `x`, `y` and `z` elements of `self` discarding `w`.
Expand Down
6 changes: 2 additions & 4 deletions src/f32/wasm32/vec4.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ impl Vec4 {
#[inline]
#[must_use]
pub const fn from_slice(slice: &[f32]) -> Self {
assert!(slice.len() >= 4);
Self::new(slice[0], slice[1], slice[2], slice[3])
}

Expand All @@ -143,10 +144,7 @@ impl Vec4 {
/// Panics if `slice` is less than 4 elements long.
#[inline]
pub fn write_to_slice(self, slice: &mut [f32]) {
slice[0] = self.x;
slice[1] = self.y;
slice[2] = self.z;
slice[3] = self.w;
slice.copy_from_slice(&self.to_array());
}

/// Creates a 3D vector from the `x`, `y` and `z` elements of `self`, discarding `w`.
Expand Down
4 changes: 2 additions & 2 deletions src/f64/dvec2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ impl DVec2 {
#[inline]
#[must_use]
pub const fn from_slice(slice: &[f64]) -> Self {
assert!(slice.len() >= 2);
Self::new(slice[0], slice[1])
}

Expand All @@ -133,8 +134,7 @@ impl DVec2 {
/// Panics if `slice` is less than 2 elements long.
#[inline]
pub fn write_to_slice(self, slice: &mut [f64]) {
slice[0] = self.x;
slice[1] = self.y;
slice.copy_from_slice(&self.to_array());
}

/// Creates a 3D vector from `self` and the given `z` value.
Expand Down
5 changes: 2 additions & 3 deletions src/f64/dvec3.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ impl DVec3 {
#[inline]
#[must_use]
pub const fn from_slice(slice: &[f64]) -> Self {
assert!(slice.len() >= 3);
Self::new(slice[0], slice[1], slice[2])
}

Expand All @@ -140,9 +141,7 @@ impl DVec3 {
/// Panics if `slice` is less than 3 elements long.
#[inline]
pub fn write_to_slice(self, slice: &mut [f64]) {
slice[0] = self.x;
slice[1] = self.y;
slice[2] = self.z;
slice.copy_from_slice(&self.to_array());
}

/// Internal method for creating a 3D vector from a 4D vector, discarding `w`.
Expand Down
6 changes: 2 additions & 4 deletions src/f64/dvec4.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ impl DVec4 {
#[inline]
#[must_use]
pub const fn from_slice(slice: &[f64]) -> Self {
assert!(slice.len() >= 4);
Self::new(slice[0], slice[1], slice[2], slice[3])
}

Expand All @@ -159,10 +160,7 @@ impl DVec4 {
/// Panics if `slice` is less than 4 elements long.
#[inline]
pub fn write_to_slice(self, slice: &mut [f64]) {
slice[0] = self.x;
slice[1] = self.y;
slice[2] = self.z;
slice[3] = self.w;
slice.copy_from_slice(&self.to_array());
}

/// Creates a 3D vector from the `x`, `y` and `z` elements of `self`, discarding `w`.
Expand Down
4 changes: 2 additions & 2 deletions src/i16/i16vec2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ impl I16Vec2 {
#[inline]
#[must_use]
pub const fn from_slice(slice: &[i16]) -> Self {
assert!(slice.len() >= 2);
Self::new(slice[0], slice[1])
}

Expand All @@ -125,8 +126,7 @@ impl I16Vec2 {
/// Panics if `slice` is less than 2 elements long.
#[inline]
pub fn write_to_slice(self, slice: &mut [i16]) {
slice[0] = self.x;
slice[1] = self.y;
slice.copy_from_slice(&self.to_array());
}

/// Creates a 3D vector from `self` and the given `z` value.
Expand Down
5 changes: 2 additions & 3 deletions src/i16/i16vec3.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ impl I16Vec3 {
#[inline]
#[must_use]
pub const fn from_slice(slice: &[i16]) -> Self {
assert!(slice.len() >= 3);
Self::new(slice[0], slice[1], slice[2])
}

Expand All @@ -132,9 +133,7 @@ impl I16Vec3 {
/// Panics if `slice` is less than 3 elements long.
#[inline]
pub fn write_to_slice(self, slice: &mut [i16]) {
slice[0] = self.x;
slice[1] = self.y;
slice[2] = self.z;
slice.copy_from_slice(&self.to_array());
}

/// Internal method for creating a 3D vector from a 4D vector, discarding `w`.
Expand Down
6 changes: 2 additions & 4 deletions src/i16/i16vec4.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ impl I16Vec4 {
#[inline]
#[must_use]
pub const fn from_slice(slice: &[i16]) -> Self {
assert!(slice.len() >= 4);
Self::new(slice[0], slice[1], slice[2], slice[3])
}

Expand All @@ -151,10 +152,7 @@ impl I16Vec4 {
/// Panics if `slice` is less than 4 elements long.
#[inline]
pub fn write_to_slice(self, slice: &mut [i16]) {
slice[0] = self.x;
slice[1] = self.y;
slice[2] = self.z;
slice[3] = self.w;
slice.copy_from_slice(&self.to_array());
}

/// Creates a 3D vector from the `x`, `y` and `z` elements of `self`, discarding `w`.
Expand Down
4 changes: 2 additions & 2 deletions src/i32/ivec2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ impl IVec2 {
#[inline]
#[must_use]
pub const fn from_slice(slice: &[i32]) -> Self {
assert!(slice.len() >= 2);
Self::new(slice[0], slice[1])
}

Expand All @@ -125,8 +126,7 @@ impl IVec2 {
/// Panics if `slice` is less than 2 elements long.
#[inline]
pub fn write_to_slice(self, slice: &mut [i32]) {
slice[0] = self.x;
slice[1] = self.y;
slice.copy_from_slice(&self.to_array());
}

/// Creates a 3D vector from `self` and the given `z` value.
Expand Down
5 changes: 2 additions & 3 deletions src/i32/ivec3.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ impl IVec3 {
#[inline]
#[must_use]
pub const fn from_slice(slice: &[i32]) -> Self {
assert!(slice.len() >= 3);
Self::new(slice[0], slice[1], slice[2])
}

Expand All @@ -132,9 +133,7 @@ impl IVec3 {
/// Panics if `slice` is less than 3 elements long.
#[inline]
pub fn write_to_slice(self, slice: &mut [i32]) {
slice[0] = self.x;
slice[1] = self.y;
slice[2] = self.z;
slice.copy_from_slice(&self.to_array());
}

/// Internal method for creating a 3D vector from a 4D vector, discarding `w`.
Expand Down
6 changes: 2 additions & 4 deletions src/i32/ivec4.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ impl IVec4 {
#[inline]
#[must_use]
pub const fn from_slice(slice: &[i32]) -> Self {
assert!(slice.len() >= 4);
Self::new(slice[0], slice[1], slice[2], slice[3])
}

Expand All @@ -151,10 +152,7 @@ impl IVec4 {
/// Panics if `slice` is less than 4 elements long.
#[inline]
pub fn write_to_slice(self, slice: &mut [i32]) {
slice[0] = self.x;
slice[1] = self.y;
slice[2] = self.z;
slice[3] = self.w;
slice.copy_from_slice(&self.to_array());
}

/// Creates a 3D vector from the `x`, `y` and `z` elements of `self`, discarding `w`.
Expand Down
Loading