From aca65b394e5f1f3ad8ccb0ae9c7201f754887ece Mon Sep 17 00:00:00 2001
From: Matthew Craven <clyring@gmail.com>
Date: Wed, 1 Jan 2025 07:38:53 -0500
Subject: [PATCH] Use only fixed-width uints in the C itoa functions (#702)

* Use only fixed-width uints in the C itoa functions

The existing logic for decimal encoding of signed ints was a bit
more complicated than necessary in its handling for negative numbers,
mostly because of negation overflowing for INT_MIN.

But the absolute value of the smallest signed Int16 does fit into
an unsigned Word16 without overflowing, allowing some simplification.

Additionally, on hardware with slow integer division instructions,
fast division-by-known-divisor is typically faster for unsigned types,
so this change may lead to a slight speed-up on such platforms.

(We could almost certainly produce slightly better code still for
these platforms by hand, for example by exploiting the fact that
after the first division the numbers are small enough that a quotient
by ten can be extracted with a single mulhi and no shift.)

* Remove a dead branch in `integerDec`

If the absolute value of the input is small enough to enter this
branch, then it fits in an Int and takes the very first branch instead.
---
 Data/ByteString/Builder/ASCII.hs      | 12 ++--
 Data/ByteString/Builder/Prim/ASCII.hs | 66 ++++++++++++-------
 Data/ByteString/Internal/Pure.hs      | 18 ------
 Data/ByteString/Internal/Type.hs      | 74 +++++++++------------
 cbits/itoa.c                          | 93 ++++-----------------------
 5 files changed, 90 insertions(+), 173 deletions(-)

diff --git a/Data/ByteString/Builder/ASCII.hs b/Data/ByteString/Builder/ASCII.hs
index 437339e09..a78098511 100644
--- a/Data/ByteString/Builder/ASCII.hs
+++ b/Data/ByteString/Builder/ASCII.hs
@@ -79,7 +79,7 @@ import           Data.ByteString.Builder.Internal (Builder)
 import qualified Data.ByteString.Builder.Prim                   as P
 import qualified Data.ByteString.Builder.Prim.Internal          as P
 import           Data.ByteString.Builder.RealFloat (floatDec, doubleDec)
-import           Data.ByteString.Internal.Type (c_int_dec_padded9, c_long_long_int_dec_padded18)
+import           Data.ByteString.Internal.Type (c_uint32_dec_padded9, c_uint64_dec_padded18)
 
 import           Foreign
 import           Data.List.NonEmpty (NonEmpty(..))
@@ -275,10 +275,8 @@ integerDec i
     | otherwise =                                   go i
   where
     go :: Integer -> Builder
-    go n | n < maxPow10 = intDec (fromInteger n)
-         | otherwise    =
-             case putH (splitf (maxPow10 * maxPow10) n) of
-               x:|xs -> intDec x `mappend` P.primMapListBounded intDecPadded xs
+    go n = case putH (splitf (maxPow10 * maxPow10) n) of
+             x:|xs -> intDec x `mappend` P.primMapListBounded intDecPadded xs
 
     splitf :: Integer -> Integer -> NonEmpty Integer
     splitf pow10 n0
@@ -311,5 +309,5 @@ integerDec i
 {-# INLINE intDecPadded #-}
 intDecPadded :: P.BoundedPrim Int
 intDecPadded = P.liftFixedToBounded $ P.caseWordSize_32_64
-    (P.fixedPrim  9 $ c_int_dec_padded9            . fromIntegral)
-    (P.fixedPrim 18 $ c_long_long_int_dec_padded18 . fromIntegral)
+    (P.fixedPrim  9 $ c_uint32_dec_padded9  . fromIntegral)
+    (P.fixedPrim 18 $ c_uint64_dec_padded18 . fromIntegral)
diff --git a/Data/ByteString/Builder/Prim/ASCII.hs b/Data/ByteString/Builder/Prim/ASCII.hs
index f73da790e..7700cbd0a 100644
--- a/Data/ByteString/Builder/Prim/ASCII.hs
+++ b/Data/ByteString/Builder/Prim/ASCII.hs
@@ -1,3 +1,5 @@
+{-# LANGUAGE TypeFamilies #-}
+
 -- | Copyright   : (c) 2010 Jasper Van der Jeugt
 --                 (c) 2010 - 2011 Simon Meier
 -- License       : BSD3-style (see LICENSE)
@@ -99,30 +101,50 @@ char7 = (\c -> fromIntegral $ ord c .&. 0x7f) >$< word8
 -- Signed integers
 ------------------
 
-{-# INLINE encodeIntDecimal #-}
-encodeIntDecimal :: Integral a => Int -> BoundedPrim a
-encodeIntDecimal bound = boundedPrim bound $ c_int_dec . fromIntegral
+type family CorrespondingUnsigned s where
+  CorrespondingUnsigned Int8  = Word8
+  CorrespondingUnsigned Int16 = Word16
+  CorrespondingUnsigned Int32 = Word32
+  CorrespondingUnsigned Int   = Word
+  CorrespondingUnsigned Int64 = Word64
+
+{-# INLINE encodeSignedViaUnsigned #-}
+encodeSignedViaUnsigned ::
+  forall s.
+  (Integral s, Num (CorrespondingUnsigned s)) =>
+  Int -> (BoundedPrim (CorrespondingUnsigned s)) -> BoundedPrim s
+encodeSignedViaUnsigned bound writeUnsigned = boundedPrim bound $ \sval ptr ->
+  if sval < 0 then do
+    poke ptr (c2w '-')
+    runB writeUnsigned (makeUnsigned (negate sval)) (ptr `plusPtr` 1)
+    -- This call to 'negate' may overflow if `sval == minBound`.
+    -- But since we insist that the unsigned type has the same width,
+    -- this causes no trouble.
+  else do
+    runB writeUnsigned (makeUnsigned sval) ptr
+  where
+    makeUnsigned = fromIntegral @s @(CorrespondingUnsigned s)
 
 -- | Decimal encoding of an 'Int8'.
 {-# INLINE int8Dec #-}
 int8Dec :: BoundedPrim Int8
-int8Dec = encodeIntDecimal 4
+int8Dec = encodeSignedViaUnsigned 4 word8Dec
 
 -- | Decimal encoding of an 'Int16'.
 {-# INLINE int16Dec #-}
 int16Dec :: BoundedPrim Int16
-int16Dec = encodeIntDecimal 6
+int16Dec = encodeSignedViaUnsigned 6 word16Dec
 
 
 -- | Decimal encoding of an 'Int32'.
 {-# INLINE int32Dec #-}
 int32Dec :: BoundedPrim Int32
-int32Dec = encodeIntDecimal 11
+int32Dec = encodeSignedViaUnsigned 11 word32Dec
 
 -- | Decimal encoding of an 'Int64'.
 {-# INLINE int64Dec #-}
 int64Dec :: BoundedPrim Int64
-int64Dec = boundedPrim 20 $ c_long_long_int_dec . fromIntegral
+int64Dec = encodeSignedViaUnsigned 20 word64Dec
 
 -- | Decimal encoding of an 'Int'.
 {-# INLINE intDec #-}
@@ -135,29 +157,29 @@ intDec = caseWordSize_32_64
 -- Unsigned integers
 --------------------
 
-{-# INLINE encodeWordDecimal #-}
-encodeWordDecimal :: Integral a => Int -> BoundedPrim a
-encodeWordDecimal bound = boundedPrim bound $ c_uint_dec . fromIntegral
+{-# INLINE encodeWord32Decimal #-}
+encodeWord32Decimal :: Integral a => Int -> BoundedPrim a
+encodeWord32Decimal bound = boundedPrim bound $ c_uint32_dec . fromIntegral
 
 -- | Decimal encoding of a 'Word8'.
 {-# INLINE word8Dec #-}
 word8Dec :: BoundedPrim Word8
-word8Dec = encodeWordDecimal 3
+word8Dec = encodeWord32Decimal 3
 
 -- | Decimal encoding of a 'Word16'.
 {-# INLINE word16Dec #-}
 word16Dec :: BoundedPrim Word16
-word16Dec = encodeWordDecimal 5
+word16Dec = encodeWord32Decimal 5
 
 -- | Decimal encoding of a 'Word32'.
 {-# INLINE word32Dec #-}
 word32Dec :: BoundedPrim Word32
-word32Dec = encodeWordDecimal 10
+word32Dec = encodeWord32Decimal 10
 
 -- | Decimal encoding of a 'Word64'.
 {-# INLINE word64Dec #-}
 word64Dec :: BoundedPrim Word64
-word64Dec = boundedPrim 20 $ c_long_long_uint_dec . fromIntegral
+word64Dec = boundedPrim 20 c_uint64_dec
 
 -- | Decimal encoding of a 'Word'.
 {-# INLINE wordDec #-}
@@ -173,30 +195,30 @@ wordDec = caseWordSize_32_64
 -- without lead
 ---------------
 
-{-# INLINE encodeWordHex #-}
-encodeWordHex :: forall a. (Storable a, Integral a) => BoundedPrim a
-encodeWordHex =
-    boundedPrim (2 * sizeOf (undefined :: a)) $ c_uint_hex  . fromIntegral
+{-# INLINE encodeWord32Hex #-}
+encodeWord32Hex :: forall a. (Storable a, Integral a) => BoundedPrim a
+encodeWord32Hex =
+    boundedPrim (2 * sizeOf @a undefined) $ c_uint32_hex . fromIntegral
 
 -- | Hexadecimal encoding of a 'Word8'.
 {-# INLINE word8Hex #-}
 word8Hex :: BoundedPrim Word8
-word8Hex = encodeWordHex
+word8Hex = encodeWord32Hex
 
 -- | Hexadecimal encoding of a 'Word16'.
 {-# INLINE word16Hex #-}
 word16Hex :: BoundedPrim Word16
-word16Hex = encodeWordHex
+word16Hex = encodeWord32Hex
 
 -- | Hexadecimal encoding of a 'Word32'.
 {-# INLINE word32Hex #-}
 word32Hex :: BoundedPrim Word32
-word32Hex = encodeWordHex
+word32Hex = encodeWord32Hex
 
 -- | Hexadecimal encoding of a 'Word64'.
 {-# INLINE word64Hex #-}
 word64Hex :: BoundedPrim Word64
-word64Hex = boundedPrim 16 $ c_long_long_uint_hex . fromIntegral
+word64Hex = boundedPrim 16 c_uint64_hex
 
 -- | Hexadecimal encoding of a 'Word'.
 {-# INLINE wordHex #-}
diff --git a/Data/ByteString/Internal/Pure.hs b/Data/ByteString/Internal/Pure.hs
index 5f7cb4703..d78e74a8d 100644
--- a/Data/ByteString/Internal/Pure.hs
+++ b/Data/ByteString/Internal/Pure.hs
@@ -19,7 +19,6 @@ module Data.ByteString.Internal.Pure
   , isValidUtf8
   , isValidUtf8BA
   -- * itoa.c
-  , encodeSignedDec
   , encodeUnsignedDec
   , encodeUnsignedDecPadded
   , encodeUnsignedHex
@@ -307,22 +306,6 @@ reverseBytesInplace !p1 !p2
     reverseBytesInplace (plusPtr p1 1) (plusPtr p2 (-1))
   | otherwise = pure ()
 
--- | Encode signed number as decimal
-encodeSignedDec :: (Eq a, Num a, Integral a) => a -> Ptr Word8 -> IO (Ptr Word8)
-{-# INLINABLE encodeSignedDec #-} -- for specialization
-encodeSignedDec !x !buf
-  | x >= 0    = encodeUnsignedDec x buf
-  | otherwise = do
-    -- we cannot negate directly as  0 - (minBound :: Int) = minBound
-    -- So we write the sign and the first digit.
-    pokeByteOff buf 0 '-'
-    let !(q,r) = quotRem x (-10)
-    putDigit buf 1 (fromIntegral (abs r))
-    case q of
-      0 -> pure (plusPtr buf 2)
-      _ -> encodeUnsignedDec' q (plusPtr buf 1) (plusPtr buf 2)
-
-
 -- | Encode positive number as decimal
 encodeUnsignedDec :: (Eq a, Num a, Integral a) => a -> Ptr Word8 -> IO (Ptr Word8)
 {-# INLINABLE encodeUnsignedDec #-} -- for specialization
@@ -331,7 +314,6 @@ encodeUnsignedDec !v !next_ptr = encodeUnsignedDec' v next_ptr next_ptr
 -- | Encode positive number as little-endian decimal, then reverse it.
 --
 -- Take two pointers (orig_ptr, next_ptr) to support already encoded digits
--- (e.g. used by encodeSignedDec to avoid overflows)
 --
 encodeUnsignedDec' :: (Eq a, Num a, Integral a) => a -> Ptr Word8 -> Ptr Word8 -> IO (Ptr Word8)
 {-# INLINABLE encodeUnsignedDec' #-} -- for specialization
diff --git a/Data/ByteString/Internal/Type.hs b/Data/ByteString/Internal/Type.hs
index 8802f3064..31e449b7d 100644
--- a/Data/ByteString/Internal/Type.hs
+++ b/Data/ByteString/Internal/Type.hs
@@ -103,14 +103,12 @@ module Data.ByteString.Internal.Type (
         c_count_ba,
         c_elem_index,
         c_sort,
-        c_int_dec,
-        c_int_dec_padded9,
-        c_uint_dec,
-        c_uint_hex,
-        c_long_long_int_dec,
-        c_long_long_int_dec_padded18,
-        c_long_long_uint_dec,
-        c_long_long_uint_hex,
+        c_uint32_dec,
+        c_uint64_dec,
+        c_uint32_dec_padded9,
+        c_uint64_dec_padded18,
+        c_uint32_hex,
+        c_uint64_hex,
         cIsValidUtf8BA,
         cIsValidUtf8BASafe,
         cIsValidUtf8,
@@ -1164,29 +1162,23 @@ foreign import ccall unsafe "static sbs_elem_index"
 
 
 
-foreign import ccall unsafe "static _hs_bytestring_uint_dec" c_uint_dec
-    :: CUInt -> Ptr Word8 -> IO (Ptr Word8)
+foreign import ccall unsafe "static _hs_bytestring_uint32_dec" c_uint32_dec
+    :: Word32 -> Ptr Word8 -> IO (Ptr Word8)
 
-foreign import ccall unsafe "static _hs_bytestring_long_long_uint_dec" c_long_long_uint_dec
-    :: CULLong -> Ptr Word8 -> IO (Ptr Word8)
+foreign import ccall unsafe "static _hs_bytestring_uint64_dec" c_uint64_dec
+    :: Word64 -> Ptr Word8 -> IO (Ptr Word8)
 
-foreign import ccall unsafe "static _hs_bytestring_int_dec" c_int_dec
-    :: CInt -> Ptr Word8 -> IO (Ptr Word8)
+foreign import ccall unsafe "static _hs_bytestring_uint32_hex" c_uint32_hex
+    :: Word32 -> Ptr Word8 -> IO (Ptr Word8)
 
-foreign import ccall unsafe "static _hs_bytestring_long_long_int_dec" c_long_long_int_dec
-    :: CLLong -> Ptr Word8 -> IO (Ptr Word8)
+foreign import ccall unsafe "static _hs_bytestring_uint64_hex" c_uint64_hex
+    :: Word64 -> Ptr Word8 -> IO (Ptr Word8)
 
-foreign import ccall unsafe "static _hs_bytestring_uint_hex" c_uint_hex
-    :: CUInt -> Ptr Word8 -> IO (Ptr Word8)
+foreign import ccall unsafe "static _hs_bytestring_uint32_dec_padded9"
+    c_uint32_dec_padded9  :: Word32 -> Ptr Word8 -> IO ()
 
-foreign import ccall unsafe "static _hs_bytestring_long_long_uint_hex" c_long_long_uint_hex
-    :: CULLong -> Ptr Word8 -> IO (Ptr Word8)
-
-foreign import ccall unsafe "static _hs_bytestring_int_dec_padded9"
-    c_int_dec_padded9 :: CInt -> Ptr Word8 -> IO ()
-
-foreign import ccall unsafe "static _hs_bytestring_long_long_int_dec_padded18"
-    c_long_long_int_dec_padded18 :: CLLong -> Ptr Word8 -> IO ()
+foreign import ccall unsafe "static _hs_bytestring_uint64_dec_padded18"
+    c_uint64_dec_padded18 :: Word64 -> Ptr Word8 -> IO ()
 
 -- We import bytestring_is_valid_utf8 both unsafe and safe. For small inputs
 -- we can use the unsafe version to get a bit more performance, but for large
@@ -1272,28 +1264,22 @@ checkedCast x =
 -- Haskell version of functions in itoa.c
 ----------------------------------------------------------------
 
-c_int_dec :: CInt -> Ptr Word8 -> IO (Ptr Word8)
-c_int_dec = Pure.encodeSignedDec
-
-c_long_long_int_dec :: CLLong -> Ptr Word8 -> IO (Ptr Word8)
-c_long_long_int_dec = Pure.encodeSignedDec
-
-c_uint_dec :: CUInt -> Ptr Word8 -> IO (Ptr Word8)
-c_uint_dec = Pure.encodeUnsignedDec
+c_uint32_dec :: Word32 -> Ptr Word8 -> IO (Ptr Word8)
+c_uint32_dec = Pure.encodeUnsignedDec
 
-c_long_long_uint_dec :: CULLong -> Ptr Word8 -> IO (Ptr Word8)
-c_long_long_uint_dec = Pure.encodeUnsignedDec
+c_uint64_dec :: Word64 -> Ptr Word8 -> IO (Ptr Word8)
+c_uint64_dec = Pure.encodeUnsignedDec
 
-c_uint_hex :: CUInt -> Ptr Word8 -> IO (Ptr Word8)
-c_uint_hex = Pure.encodeUnsignedHex
+c_uint32_hex :: Word32 -> Ptr Word8 -> IO (Ptr Word8)
+c_uint32_hex = Pure.encodeUnsignedHex
 
-c_long_long_uint_hex :: CULLong -> Ptr Word8 -> IO (Ptr Word8)
-c_long_long_uint_hex = Pure.encodeUnsignedHex
+c_uint64_hex :: Word64 -> Ptr Word8 -> IO (Ptr Word8)
+c_uint64_hex = Pure.encodeUnsignedHex
 
-c_int_dec_padded9 :: CInt -> Ptr Word8 -> IO ()
-c_int_dec_padded9 = Pure.encodeUnsignedDecPadded 9
+c_uint32_dec_padded9 :: Word32 -> Ptr Word8 -> IO ()
+c_uint32_dec_padded9 = Pure.encodeUnsignedDecPadded 9
 
-c_long_long_int_dec_padded18 :: CLLong -> Ptr Word8 -> IO ()
-c_long_long_int_dec_padded18 = Pure.encodeUnsignedDecPadded 18
+c_uint64_dec_padded18 :: Word64 -> Ptr Word8 -> IO ()
+c_uint64_dec_padded18 = Pure.encodeUnsignedDecPadded 18
 
 #endif
diff --git a/cbits/itoa.c b/cbits/itoa.c
index f69fa6b66..f3bc980a3 100644
--- a/cbits/itoa.c
+++ b/cbits/itoa.c
@@ -5,89 +5,18 @@
 ///////////////////////////////////////////////////////////////
 
 #include <stdio.h>
+#include <stdint.h>
 
 // Decimal Encoding
 ///////////////////
 
 static const char* digits = "0123456789abcdef";
 
-// signed integers
-char* _hs_bytestring_int_dec (int x, char* buf)
-{
-    char c, *ptr = buf, *next_free;
-    int x_tmp;
-
-    // we cannot negate directly as  0 - (minBound :: Int) = minBound
-    if (x < 0) {
-        *ptr++ = '-';
-        buf++;
-        x_tmp = x;
-        x /= 10;
-        *ptr++ = digits[x * 10 - x_tmp];
-        if (x == 0)
-          return ptr;
-        else
-          x = -x;
-    }
-
-    // encode positive number as little-endian decimal
-    do {
-        x_tmp = x;
-        x /= 10;
-        *ptr++ = digits[x_tmp - x * 10];
-    } while ( x );
-
-    // reverse written digits
-    next_free = ptr--;
-    while (buf < ptr) {
-        c       = *ptr;
-        *ptr--  = *buf;
-        *buf++  = c;
-    }
-    return next_free;
-}
-
-// signed long long ints (64 bit integers)
-char* _hs_bytestring_long_long_int_dec (long long int x, char* buf)
-{
-    char c, *ptr = buf, *next_free;
-    long long int x_tmp;
-
-    // we cannot negate directly as  0 - (minBound :: Int) = minBound
-    if (x < 0) {
-        *ptr++ = '-';
-        buf++;
-        x_tmp = x;
-        x /= 10;
-        *ptr++ = digits[x * 10 - x_tmp];
-        if (x == 0)
-          return ptr;
-        else
-          x = -x;
-    }
-
-    // encode positive number as little-endian decimal
-    do {
-        x_tmp = x;
-        x /= 10;
-        *ptr++ = digits[x_tmp - x * 10];
-    } while ( x );
-
-    // reverse written digits
-    next_free = ptr--;
-    while (buf < ptr) {
-        c       = *ptr;
-        *ptr--  = *buf;
-        *buf++  = c;
-    }
-    return next_free;
-}
-
 // unsigned integers
-char* _hs_bytestring_uint_dec (unsigned int x, char* buf)
+char* _hs_bytestring_uint32_dec (uint32_t x, char* buf)
 {
     char c, *ptr = buf, *next_free;
-    unsigned int x_tmp;
+    uint32_t x_tmp;
 
     // encode positive number as little-endian decimal
     do {
@@ -107,10 +36,10 @@ char* _hs_bytestring_uint_dec (unsigned int x, char* buf)
 }
 
 // unsigned long ints
-char* _hs_bytestring_long_long_uint_dec (long long unsigned int x, char* buf)
+char* _hs_bytestring_uint64_dec (uint64_t x, char* buf)
 {
     char c, *ptr = buf, *next_free;
-    long long unsigned int x_tmp;
+    uint64_t x_tmp;
 
     // encode positive number as little-endian decimal
     do {
@@ -136,11 +65,11 @@ char* _hs_bytestring_long_long_uint_dec (long long unsigned int x, char* buf)
 // Padded (9 digits), decimal, positive int:
 // We will use it with numbers that fit in 31 bits; i.e., numbers smaller than
 // 10^9, as "31 * log 2 / log 10 = 9.33"
-void _hs_bytestring_int_dec_padded9 (int x, char* buf)
+void _hs_bytestring_uint32_dec_padded9 (uint32_t x, char* buf)
 {
     const int max_width_int32_dec = 9;
     char* ptr = buf + max_width_int32_dec;
-    int x_tmp;
+    uint32_t x_tmp;
 
     // encode positive number as little-endian decimal
     do {
@@ -156,11 +85,11 @@ void _hs_bytestring_int_dec_padded9 (int x, char* buf)
 // Padded (19 digits), decimal, positive long long int:
 // We will use it with numbers that fit in 63 bits; i.e., numbers smaller than
 // 10^18, as "63 * log 2 / log 10 = 18.96"
-void _hs_bytestring_long_long_int_dec_padded18 (long long int x, char* buf)
+void _hs_bytestring_uint64_dec_padded18 (uint64_t x, char* buf)
 {
     const int max_width_int64_dec = 18;
     char* ptr = buf + max_width_int64_dec;
-    long long int x_tmp;
+    uint64_t x_tmp;
 
     // encode positive number as little-endian decimal
     do {
@@ -179,7 +108,7 @@ void _hs_bytestring_long_long_int_dec_padded18 (long long int x, char* buf)
 ///////////////////////
 
 // unsigned ints (32 bit words)
-char* _hs_bytestring_uint_hex (unsigned int x, char* buf) {
+char* _hs_bytestring_uint32_hex (uint32_t x, char* buf) {
     // write hex representation in reverse order
     char c, *ptr = buf, *next_free;
     do {
@@ -197,7 +126,7 @@ char* _hs_bytestring_uint_hex (unsigned int x, char* buf) {
 };
 
 // unsigned long ints (64 bit words)
-char* _hs_bytestring_long_long_uint_hex (long long unsigned int x, char* buf) {
+char* _hs_bytestring_uint64_hex (uint64_t x, char* buf) {
     // write hex representation in reverse order
     char c, *ptr = buf, *next_free;
     do {