From 7151d95817516d96509fdfeb3faaa92f273df47f Mon Sep 17 00:00:00 2001 From: stackotter Date: Fri, 13 Sep 2024 14:45:15 +1000 Subject: [PATCH] Manually specialize parts of CRC32 implementation to speed them up in debug mode --- Sources/CRC/CRC32.swift | 76 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 75 insertions(+), 1 deletion(-) diff --git a/Sources/CRC/CRC32.swift b/Sources/CRC/CRC32.swift index b207fb5..8fa7c2c 100644 --- a/Sources/CRC/CRC32.swift +++ b/Sources/CRC/CRC32.swift @@ -40,6 +40,18 @@ struct CRC32:Hashable, Sendable return checksum } + /// Returns a new checksum by hashing the provided message into the current checksum. + /// + /// This manually specialized implementation is much faster in debug mode than the + /// generic implementation, but exactly the same in release mode. + @inlinable public + func updated(with message:borrowing [UInt8]) -> Self + { + var checksum:Self = self + checksum.update(with: message) + return checksum + } + /// Updates the checksum by hashing the provided message into the existing checksum. @inlinable public mutating func update(with message:borrowing some Sequence) @@ -47,9 +59,71 @@ struct CRC32:Hashable, Sendable self.checksum = ~message.reduce(~self.checksum) { (state:UInt32, byte:UInt8) in - Self.table[Int.init(UInt8.init(truncatingIfNeeded: state) ^ byte)] ^ state >> 8 + let indexByte:UInt8 = UInt8.init(truncatingIfNeeded: state) ^ byte + let index:Int + #if DEBUG + // in debug mode these hacky integer conversions make this function + // around 35% faster + if MemoryLayout.stride == 8 { + let tuple:(UInt8, UInt8, UInt8, UInt8, UInt8, UInt8, UInt8, UInt8) = + ( + indexByte, 0, 0, 0, 0, 0, 0, 0 + ) + index = unsafeBitCast(tuple, to: Int.self) + } else { + let tuple:(UInt8, UInt8, UInt8, UInt8) = + ( + indexByte, 0, 0, 0 + ) + index = unsafeBitCast(tuple, to: Int.self) + } + #else + index = Int.init(indexByte) + #endif + return Self.table[index] ^ state >> 8 } } + + /// Updates the checksum by hashing the provided message into the existing checksum. + /// + /// This manually specialized implementation is much faster in debug mode than the + /// generic implementation, but exactly the same in release mode. + @inlinable public mutating + func update(with message:borrowing [UInt8]) + { + #if DEBUG + // in debug mode this manually specialized version of `reduce` is about 2.8x faster + self.checksum = ~self.checksum + var i:Int = 0 + while i < message.count + { + let state:UInt32 = self.checksum + let byte:UInt8 = message[i] + let indexByte:UInt8 = UInt8.init(truncatingIfNeeded: state) ^ byte + let index:Int + // in debug mode these hacky integer conversions make this function + // around 35% faster + if MemoryLayout.stride == 8 { + let tuple:(UInt8, UInt8, UInt8, UInt8, UInt8, UInt8, UInt8, UInt8) = + ( + indexByte, 0, 0, 0, 0, 0, 0, 0 + ) + index = unsafeBitCast(tuple, to: Int.self) + } else { + let tuple:(UInt8, UInt8, UInt8, UInt8) = + ( + indexByte, 0, 0, 0 + ) + index = unsafeBitCast(tuple, to: Int.self) + } + self.checksum = Self.table[index] ^ state >> 8 + i += 1 + } + self.checksum = ~self.checksum + #else + self.update(with: message[...]) + #endif + } } extension CRC32:ExpressibleByIntegerLiteral {