From 2524ada63fa1098b19b86c84d1eebd74de0df424 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Mon, 23 Dec 2024 05:58:27 +0300 Subject: [PATCH] Optimize to_binary() function and to_bytes() method It seems, that using mpn_get_str() is more efficient than generic mpz_export(). Some benchmarks are here: https://github.com/aleaxit/gmpy/issues/404#issuecomment-2526603947 Not sure what else we can do for #404. In the python-gmp I've added also the `__reduce__` dunded method. This seems slightly better than rely on copyreg to support pickling: | Benchmark | ref | patch | gmp | |----------------|:-------:|:---------------------:|:---------------------:| | dumps(1<<7) | 23.9 us | 23.8 us: 1.01x faster | 22.6 us: 1.06x faster | | dumps(1<<38) | 24.0 us | 23.9 us: 1.01x faster | 22.7 us: 1.06x faster | | dumps(1<<300) | 24.1 us | 23.8 us: 1.01x faster | 22.9 us: 1.05x faster | | dumps(1<<3000) | 26.8 us | 25.2 us: 1.07x faster | 23.8 us: 1.13x faster | | Geometric mean | (ref) | 1.02x faster | 1.07x faster | Can we add pickling to the gmpy2 with even less overhead? I don't know. But if we avoid pickle machinery, you can see noticeable performance boost for small numbers too: | Benchmark | to_binary-ref | to_binary-patch | |----------------|:-------------:|:---------------------:| | dumps(1<<7) | 323 ns | 300 ns: 1.08x faster | | dumps(1<<38) | 352 ns | 315 ns: 1.12x faster | | dumps(1<<300) | 603 ns | 436 ns: 1.39x faster | | dumps(1<<3000) | 3.17 us | 1.57 us: 2.02x faster | | Geometric mean | (ref) | 1.35x faster | New code seems faster than int.to_bytes() roughly from 500bit numbers on my system. --- src/gmpy2_binary.c | 6 ++++-- src/gmpy2_macros.h | 17 +++++++++++++++++ src/gmpy2_mpz_misc.c | 12 ++++++------ 3 files changed, 27 insertions(+), 8 deletions(-) diff --git a/src/gmpy2_binary.c b/src/gmpy2_binary.c index d46e2d22..ce50501d 100644 --- a/src/gmpy2_binary.c +++ b/src/gmpy2_binary.c @@ -260,7 +260,7 @@ GMPy_MPZ_To_Binary(MPZ_Object *self) goto done; } - size = ((mpz_sizeinbase(self->z, 2) + 7) / 8) + 2; + size = mpz_sizeinbase(self->z, 256) + 2; TEMP_ALLOC(buffer, size); buffer[0] = 0x01; @@ -268,7 +268,9 @@ GMPy_MPZ_To_Binary(MPZ_Object *self) buffer[1] = 0x01; else buffer[1] = 0x02; - mpz_export(buffer+2, NULL, -1, sizeof(char), 0, 0, self->z); + mpn_get_str((unsigned char *)(buffer + 2), 256, + self->z->_mp_d, Py_ABS(self->z->_mp_size)); + revstr(buffer, 2, size - 1); done: result = PyBytes_FromStringAndSize(buffer, size); diff --git a/src/gmpy2_macros.h b/src/gmpy2_macros.h index e2af9149..aaf87b3e 100644 --- a/src/gmpy2_macros.h +++ b/src/gmpy2_macros.h @@ -726,3 +726,20 @@ GMPy_Context_##NAME(PyObject *self, PyObject *args) \ } \ return GMPy_Number_##NAME(PyTuple_GET_ITEM(args, 0), PyTuple_GET_ITEM(args, 1), context); \ } + +#define SWAP(T, a, b) \ + do { \ + T tmp = a; \ + a = b; \ + b = tmp; \ + } while (0); + +static inline void +revstr(char *s, size_t l, size_t r) +{ + while (l < r) { + SWAP(char, s[l], s[r]); + l++; + r--; + } +} diff --git a/src/gmpy2_mpz_misc.c b/src/gmpy2_mpz_misc.c index c3597807..87393dcd 100644 --- a/src/gmpy2_mpz_misc.c +++ b/src/gmpy2_mpz_misc.c @@ -1986,15 +1986,15 @@ GMPy_MPZ_Method_To_Bytes(PyObject *self, PyObject *const *args, return NULL; } buffer = PyBytes_AS_STRING(bytes); - memset(buffer, 0, length); + memset(buffer, is_negative ? 0xFF : 0, gap); - if (is_big) { - mpz_export(buffer + gap, NULL, 1, sizeof(char), 0, 0, *px); + if ((*px)->_mp_size) { + mpn_get_str((unsigned char *)(buffer + gap), 256, + (*px)->_mp_d, (*px)->_mp_size); } - else { - mpz_export(buffer, NULL, -1, sizeof(char), 0, 0, *px); + if (!is_big && length) { + revstr(buffer, 0, length - 1); } - if (is_negative) { mpz_clear(tmp); }