Skip to content

Commit

Permalink
Add support for _mm256_min_epu16
Browse files Browse the repository at this point in the history
  • Loading branch information
Guillaume Piolat committed Dec 17, 2023
1 parent 1fbdca3 commit 27316ca
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 15 deletions.
60 changes: 46 additions & 14 deletions source/inteli/avx2intrin.d
Original file line number Diff line number Diff line change
Expand Up @@ -1675,19 +1675,7 @@ __m256i _mm256_max_epu16 (__m256i a, __m256i b) pure @trusted
return cast(__m256i)( (greater & sa) | (~greater & sb) );
}
else
{
// good with GDC, but not LDC
short16 sa = cast(short16)a;
short16 sb = cast(short16)b;
short16 r;
for (int n = 0; n < 16; ++n)
{
ushort ua = sa.array[n];
ushort ub = sb.array[n];
r.ptr[n] = ua > ub ? ua : ub;
}
return cast(__m256i)r;
}
static assert(false);
}
unittest
{
Expand Down Expand Up @@ -1836,7 +1824,51 @@ unittest


// TODO __m256i _mm256_min_epi8 (__m256i a, __m256i b) pure @safe
// TODO __m256i _mm256_min_epu16 (__m256i a, __m256i b) pure @safe

/// Compare packed unsigned 16-bit integers in `a` and `b`, and return packed minimum values.
__m256i _mm256_min_epu16 (__m256i a, __m256i b) pure @trusted
{
// PERF D_SIMD
version(GNU)
enum bool split = true;
else static if (SIMD_COMPARISON_MASKS_32B)
enum bool split = false;
else
enum bool split = true;

static if (GDC_with_AVX2)
{
return cast(__m256i) __builtin_ia32_pminuw256(cast(short16)a, cast(short16)b);
}
else static if (split)
{
// split
__m128i a_lo = _mm256_extractf128_si256!0(a);
__m128i a_hi = _mm256_extractf128_si256!1(a);
__m128i b_lo = _mm256_extractf128_si256!0(b);
__m128i b_hi = _mm256_extractf128_si256!1(b);
__m128i r_lo = _mm_min_epu16(a_lo, b_lo);
__m128i r_hi = _mm_min_epu16(a_hi, b_hi);
return _mm256_set_m128i(r_hi, r_lo);
}
else static if (SIMD_COMPARISON_MASKS_32B)
{
// ??? catastrophic with GDC x86_64, good with LDC
short16 sa = cast(short16)a;
short16 sb = cast(short16)b;
short16 greater = cast(short16)(cast(ushort16)sa > cast(ushort16)sb);
return cast(__m256i)( (~greater & sa) | (greater & sb) );
}
else
static assert(false);
}
unittest
{
short16 R = cast(short16) _mm256_min_epu16(_mm256_setr_epi16(32767, 1, -4, -8, 9, 7, 0,-57, 1, 0, 0, 0, 1, 0, 0, -6),
_mm256_setr_epi16( -4, -8, 9, 7, 0,-32768, 0, 0, 0, 2, 0, 4, 2, 1, 2, -4));
short[16] correct = [32767, 1, 9, 7, 0, 7, 0, 0, 0, 0, 0, 0, 1, 0, 0, -6];
assert(R.array == correct);
}

/// Compare packed unsigned 32-bit integers in `a` and `b`, and return packed minimum values.
__m256i _mm256_min_epu32 (__m256i a, __m256i b) pure @safe
Expand Down
2 changes: 1 addition & 1 deletion source/inteli/smmintrin.d
Original file line number Diff line number Diff line change
Expand Up @@ -1427,7 +1427,7 @@ unittest
}

/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst.
__m128i _mm_min_epu16 (__m128i a, __m128i b) @trusted
__m128i _mm_min_epu16 (__m128i a, __m128i b) pure @trusted
{
// PERF DMD
static if (GDC_with_SSE41)
Expand Down

0 comments on commit 27316ca

Please sign in to comment.