Skip to content

Commit

Permalink
Updated comments for FastBuffer Memcpy256
Browse files Browse the repository at this point in the history
  • Loading branch information
CptMoore committed Jan 16, 2025
1 parent dc9097e commit de80b4a
Showing 1 changed file with 7 additions and 9 deletions.
16 changes: 7 additions & 9 deletions ModTek/Features/Logging/FastBuffer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -318,10 +318,10 @@ private void EnlargeCapacity(int targetLength)
}

// from Buffer.memcpy* and optimized to use wider types like 128 and 256 bit
// JIT can do xmm (128) and cpu can optimize 2x xmm (2x128) further it seems
// most gains from 128, some for 256, and almost none for 512 (therefore left out)
internal static void Memcpy256(byte* dest, byte* src, int size)
{
{ // 25% faster than if using 2x128 on AMD Zen4 hardware
{
const int BatchSize = My256Bit.Size;
for (; size >= BatchSize; size -= BatchSize)
{
Expand All @@ -330,7 +330,7 @@ internal static void Memcpy256(byte* dest, byte* src, int size)
src += BatchSize;
}
}
{ // 100% faster than if using 2x64 on xmm hardware
{
const int BatchSize = My128Bit.Size;
for (; size >= BatchSize; size -= BatchSize)
{
Expand Down Expand Up @@ -363,17 +363,15 @@ internal static void Memcpy256(byte* dest, byte* src, int size)
}
}

// the jit can optimize this to 2x xmm 128 ops
// and 2x 128bit ops together are 25% faster than looping over 128bit ops
private struct My128Bit
{
internal const int Size = 128/8;
internal long _00;
internal long _01;
internal const int Size = 2 * sizeof(ulong);
internal ulong _00;
internal ulong _01;
}
private struct My256Bit
{
internal const int Size = 256/8;
internal const int Size = 2 * My128Bit.Size;
internal My128Bit _00;
internal My128Bit _01;
}
Expand Down

0 comments on commit de80b4a

Please sign in to comment.