Skip to content

Commit

Permalink
Add back in the custom fast memcpy.
Browse files Browse the repository at this point in the history
  • Loading branch information
CptMoore committed Jan 19, 2025
1 parent 2070bf6 commit fe03011
Show file tree
Hide file tree
Showing 3 changed files with 164 additions and 2 deletions.
2 changes: 1 addition & 1 deletion ModTek/Features/Logging/AppenderFile.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ internal AppenderFile(string path, AppenderSettings settings)
$"""
ModTek v{GitVersionInformation.InformationalVersion} ({GitVersionInformation.CommitDate}) ; HarmonyX {typeof(Harmony).Assembly.GetName().Version}
{Environment.OSVersion} ; BattleTech {Application.version} ; Unity {Application.unityVersion} ; CLR {Environment.Version} ; {System.Runtime.InteropServices.RuntimeInformation.FrameworkDescription}"
{dateTime.ToLocalTime().ToString("o", CultureInfo.InvariantCulture)} ; Startup {unityStartupTime.ToString(null, CultureInfo.InvariantCulture)} ; Ticks {stopwatchTimestamp} ; Timestamp Overhead {MTStopwatch.OverheadPerTimestampInNanoseconds}ns
{dateTime.ToLocalTime().ToString("o", CultureInfo.InvariantCulture)} ; Startup {unityStartupTime.ToString(null, CultureInfo.InvariantCulture)} ; Ticks {stopwatchTimestamp} ; Timestamp Overhead {MTStopwatch.OverheadPerTimestampInNanoseconds}ns ; FastMemCpy Threshold {FastMemCpy.Threshold}
{new string('-', 80)}
"""
Expand Down
2 changes: 1 addition & 1 deletion ModTek/Features/Logging/FastBuffer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ internal void Append(byte[] value)
{
var length = value.Length;
var position = GetPositionAndIncrementLength(length);
Buffer.BlockCopy(value, 0, _buffer, position, length);
FastMemCpy.BlockCopy(value, 0, _buffer, position, length);
}

internal unsafe void Append(int value)
Expand Down
162 changes: 162 additions & 0 deletions ModTek/Features/Logging/FastMemCpy.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
using System;
using System.Runtime.CompilerServices;
using ModTek.Util.Stopwatch;

namespace ModTek.Features.Logging;

// effectively used only for byte arrays sized <= 14 (112bit)
internal static class FastMemCpy
{
internal static unsafe void BlockCopy(byte[] src, int srcOffset, byte[] dst, int dstOffset, int length)
{
if (length > Threshold) // 700-1300 bytes are typical
{
Buffer.BlockCopy(src, srcOffset, dst, dstOffset, length);
}
else
{
fixed (byte* dstPtr = dst)
{
fixed (byte* srcPtr = src)
{
Memcpy512(dstPtr + dstOffset, srcPtr + srcOffset, length);
}
}
}
}

// from Buffer.memcpy* and optimized to use wider types like 128 and 256 bit
// most gains from 128, some for 256, and almost none for 512. 1024 is negative.
// faster than Buffer.BlockCopy but only until call overhead to extern method is overcome
private static unsafe void Memcpy512(byte* dest, byte* src, int size)
{
{
const int BatchSize = My512Bit.Size;
for (; size >= BatchSize; size -= BatchSize)
{
*(My512Bit*)dest = *(My512Bit*)src;
dest += BatchSize;
src += BatchSize;
}
}
{
const int BatchSize = My256Bit.Size;
for (; size >= BatchSize; size -= BatchSize)
{
*(My256Bit*)dest = *(My256Bit*)src;
dest += BatchSize;
src += BatchSize;
}
}
{
const int BatchSize = My128Bit.Size;
for (; size >= BatchSize; size -= BatchSize)
{
*(My128Bit*)dest = *(My128Bit*)src;
dest += BatchSize;
src += BatchSize;
}
}
{
const int BatchSize = sizeof(ulong);
for (; size >= BatchSize; size -= BatchSize)
{
*(ulong*)dest = *(ulong*)src;
dest += BatchSize;
src += BatchSize;
}
}
{
const int BatchSize = sizeof(ushort);
for (; size >= BatchSize; size -= BatchSize)
{
*(ushort*)dest = *(ushort*)src;
dest += BatchSize;
src += BatchSize;
}
}
if (size > 0)
{
*dest = *src;
}
}
private struct My128Bit
{
internal const int Size = 2 * sizeof(ulong);
internal ulong _00;
internal ulong _01;
}
private struct My256Bit
{
internal const int Size = 2 * My128Bit.Size;
internal My128Bit _00;
internal My128Bit _01;
}
private struct My512Bit
{
internal const int Size = 2 * My256Bit.Size;
internal My256Bit _00;
internal My256Bit _01;
}

internal static readonly int Threshold = FindThreshold();
private static unsafe int FindThreshold()
{
const int MaxSize = 4 * 1024;
const int StepSize = 32;
const int MinSize = 16;
const int Steps = (MaxSize - MinSize) / StepSize;
var byteBufferTicks = new long[Steps];
var memCpyTicks = new long[Steps];
var srcA = new byte[MaxSize];
var srcB = new byte[MaxSize];
for (var i = 0; i < MaxSize; i++)
{
srcA[i] = (byte)i;
srcB[i] = (byte)i;
}
var dstA = new byte[MaxSize];
var dstB = new byte[MaxSize];
const int TestRunsPerSize = 100;

var benchStart = MTStopwatch.GetTimestamp();
do
{
for (var step = 0; step < Steps; step++)
{
var size = step * StepSize + MinSize;
{
var start = MTStopwatch.GetTimestamp();
for (var run = 0; run < TestRunsPerSize; run++)
{
Buffer.BlockCopy(srcA, 0, dstA, 0, size);
}
byteBufferTicks[step] = MTStopwatch.GetTimestamp() - start;
}
{
var start = MTStopwatch.GetTimestamp();
for (var run = 0; run < TestRunsPerSize; run++)
{
fixed (byte* dst = dstB)
{
fixed (byte* src = srcB)
{
Memcpy512(dst, src, size);
}
}
}
memCpyTicks[step] = MTStopwatch.GetTimestamp() - start;
}
}
} while (MTStopwatch.TimeSpanFromTicks(MTStopwatch.GetTimestamp() - benchStart).TotalMilliseconds < 1);

for (var step = 0; step < Steps; step++)
{
if (memCpyTicks[step] > byteBufferTicks[step] )
{
return Math.Max((step - 1) * StepSize + MinSize, MinSize);
}
}
return MaxSize;
}
}

0 comments on commit fe03011

Please sign in to comment.