diff --git a/ModTek/Features/Logging/FastBuffer.cs b/ModTek/Features/Logging/FastBuffer.cs
index b0c03902..dd09f210 100644
--- a/ModTek/Features/Logging/FastBuffer.cs
+++ b/ModTek/Features/Logging/FastBuffer.cs
@@ -157,90 +157,99 @@ internal void Append(string value)
             var dstPtr = _bufferPtr + _length;
             var srcPtr = (byte*)chars + s_charLowBitsPosition;
 
-            // parallelism isn't what makes it particular fast, it's the batching that is helpful (fewer ops overall)
-            // 8 is a sweat spot, since we can do the ASCII bit mask check with an ulong
+            if (FastConvert(dstPtr, srcPtr, ref processingCount))
             {
-                const int IterSize = 8;
-                for (; processingCount >= IterSize; processingCount -= IterSize)
-                {
-                    *(dstPtr + 0) = *(srcPtr + 0 * 2);
-                    *(dstPtr + 1) = *(srcPtr + 1 * 2);
-                    *(dstPtr + 2) = *(srcPtr + 2 * 2);
-                    *(dstPtr + 3) = *(srcPtr + 3 * 2);
-                    *(dstPtr + 4) = *(srcPtr + 4 * 2);
-                    *(dstPtr + 5) = *(srcPtr + 5 * 2);
-                    *(dstPtr + 6) = *(srcPtr + 6 * 2);
-                    *(dstPtr + 7) = *(srcPtr + 7 * 2);
-
-                    const ulong NonAsciiBitmask =
-                            (1ul << (7 + 8 * 7)) +
-                            (1ul << (7 + 8 * 6)) +
-                            (1ul << (7 + 8 * 5)) +
-                            (1ul << (7 + 8 * 4)) +
-                            (1ul << (7 + 8 * 3)) +
-                            (1ul << (7 + 8 * 2)) +
-                            (1ul << (7 + 8 * 1)) +
-                            (1ul << (7 + 8 * 0));
-                    if ((*(ulong*)dstPtr & NonAsciiBitmask) != 0)
-                    {
-                        goto Utf8Fallback;
-                    }
-                    dstPtr += IterSize;
-                    srcPtr += 2*IterSize;
-                    _length += IterSize;
-                }
+                _length += value.Length;
             }
-
+            else
+            {
+                // this is 10x slower or more (GetBytes has no fast ASCII path and no SIMD in this old .NET)
+                var measurement = MTStopwatch.GetTimestamp();
+                var charIndex = value.Length - processingCount;
+                _length += charIndex;
+                const int Utf8MaxBytesPerChar = 4;
+                EnsureCapacity(_length + processingCount * Utf8MaxBytesPerChar);
+                _length += Encoding.UTF8.GetBytes(value, charIndex, processingCount, _buffer, _length);
+                UTF8FallbackStopwatch.EndMeasurement(measurement);
+            }
+        }
+    }
+    internal static readonly MTStopwatch UTF8FallbackStopwatch = new();
+    private static readonly int s_charLowBitsPosition = GetLowerBytePosition();
+    private static int GetLowerBytePosition()
+    {
+        var chars = stackalloc char[] { '1' };
+        return *(byte*)chars == 0 ? 1 : 0;
+    }
+    // if utf16 is only ASCII7 we can just copy the lower bits to 1 byte
+    // there is some parallelism achieved due to unrolling of the loop
+    // batching also has an effect due to fewer ops overall
+    // 8 is a sweat spot for unrolling and the ulong bit mask check
+    [MethodImpl(MethodImplOptions.AggressiveInlining)]
+    private static bool FastConvert(byte* dstPtr, byte* srcPtr,  ref int processingCount)
+    {
+        {
+            const int IterSize = 8;
+            for (; processingCount >= IterSize; processingCount -= IterSize)
             {
-                const int IterSize = 2;
-                for (; processingCount >= IterSize; processingCount -= IterSize)
+                *(dstPtr + 0) = *(srcPtr + 0 * 2);
+                *(dstPtr + 1) = *(srcPtr + 1 * 2);
+                *(dstPtr + 2) = *(srcPtr + 2 * 2);
+                *(dstPtr + 3) = *(srcPtr + 3 * 2);
+                *(dstPtr + 4) = *(srcPtr + 4 * 2);
+                *(dstPtr + 5) = *(srcPtr + 5 * 2);
+                *(dstPtr + 6) = *(srcPtr + 6 * 2);
+                *(dstPtr + 7) = *(srcPtr + 7 * 2);
+
+                const ulong NonAsciiBitmask =
+                        (1ul << (7 + 8 * 7)) +
+                        (1ul << (7 + 8 * 6)) +
+                        (1ul << (7 + 8 * 5)) +
+                        (1ul << (7 + 8 * 4)) +
+                        (1ul << (7 + 8 * 3)) +
+                        (1ul << (7 + 8 * 2)) +
+                        (1ul << (7 + 8 * 1)) +
+                        (1ul << (7 + 8 * 0));
+                if ((*(ulong*)dstPtr & NonAsciiBitmask) != 0)
                 {
-                    *(dstPtr + 0) = *(srcPtr + 0 * 2);
-                    *(dstPtr + 1) = *(srcPtr + 1 * 2);
-
-                    const ushort NonAsciiBitmask =
-                        (1 << (7 + 8 * 1)) +
-                        (1 << (7 + 8 * 0));
-                    if ((*(ushort*)dstPtr & NonAsciiBitmask) != 0)
-                    {
-                        goto Utf8Fallback;
-                    }
-                    dstPtr += IterSize;
-                    srcPtr += 2*IterSize;
-                    _length += IterSize;
+                    return false;
                 }
+                dstPtr += IterSize;
+                srcPtr += 2*IterSize;
             }
+        }
 
-            if (processingCount > 0)
+        {
+            const int IterSize = 2;
+            for (; processingCount >= IterSize; processingCount -= IterSize)
             {
-                const int IterSize = 1;
                 *(dstPtr + 0) = *(srcPtr + 0 * 2);
+                *(dstPtr + 1) = *(srcPtr + 1 * 2);
 
-                const byte NonAsciiBitmask = 1 << 7;
-                if ((*dstPtr & NonAsciiBitmask) != 0)
+                const ushort NonAsciiBitmask =
+                    (1 << (7 + 8 * 1)) +
+                    (1 << (7 + 8 * 0));
+                if ((*(ushort*)dstPtr & NonAsciiBitmask) != 0)
                 {
-                    goto Utf8Fallback;
+                    return false;
                 }
-                _length += IterSize;
+                dstPtr += IterSize;
+                srcPtr += 2*IterSize;
             }
+        }
 
-            return;
+        if (processingCount > 0)
+        {
+            *(dstPtr + 0) = *(srcPtr + 0 * 2);
 
-            Utf8Fallback: // this is 10x slower or more (GetBytes has no fast ASCII path and no SIMD in this old .NET)
-            var measurement = MTStopwatch.GetTimestamp();
-            var charIndex = value.Length - processingCount;
-            const int Utf8MaxBytesPerChar = 4;
-            EnsureCapacity(_length + processingCount * Utf8MaxBytesPerChar);
-            _length += Encoding.UTF8.GetBytes(value, charIndex, processingCount, _buffer, _length);
-            UTF8FallbackStopwatch.EndMeasurement(measurement);
+            const byte NonAsciiBitmask = 1 << 7;
+            if ((*dstPtr & NonAsciiBitmask) != 0)
+            {
+                return false;
+            }
         }
-    }
-    internal static readonly MTStopwatch UTF8FallbackStopwatch = new();
-    private static readonly int s_charLowBitsPosition = GetLowerBytePosition();
-    private static int GetLowerBytePosition()
-    {
-        var chars = stackalloc char[] { '1' };
-        return *(byte*)chars == 0 ? 1 : 0;
+
+        return true;
     }
 
     internal void Append(DateTime value)