From c4153456552c413531cf8c1b3e323450b87738b8 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 7 Oct 2024 23:22:27 +0200 Subject: [PATCH] gh-124502: Optimize unicode_eq() * Replace unicode_compare_eq() with unicode_eq(). * Replace _PyUnicode_EQ() calls with _PyUnicode_Equal(). * Remove _PyUnicode_EQ(). --- Include/internal/pycore_unicodeobject.h | 6 +---- Objects/setobject.c | 4 +-- Objects/stringlib/eq.h | 20 +++++++++----- Objects/unicodeobject.c | 36 +++---------------------- Python/getargs.c | 2 +- 5 files changed, 21 insertions(+), 47 deletions(-) diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index 20497ee93016d0..a60372f58295a9 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -252,11 +252,7 @@ extern Py_ssize_t _PyUnicode_InsertThousandsGrouping( extern PyObject* _PyUnicode_FormatLong(PyObject *, int, int, int); -/* Fast equality check when the inputs are known to be exact unicode types - and where the hash values are equal (i.e. a very probable match) */ -extern int _PyUnicode_EQ(PyObject *, PyObject *); - -// Equality check. +// Fast equality check when the inputs are known to be exact unicode types. // Export for '_pickle' shared extension. PyAPI_FUNC(int) _PyUnicode_Equal(PyObject *, PyObject *); diff --git a/Objects/setobject.c b/Objects/setobject.c index 8bff4d99f81b81..5ebac5f69ece33 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -96,7 +96,7 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash) return entry; if (PyUnicode_CheckExact(startkey) && PyUnicode_CheckExact(key) - && _PyUnicode_EQ(startkey, key)) + && _PyUnicode_Equal(startkey, key)) return entry; table = so->table; Py_INCREF(startkey); @@ -157,7 +157,7 @@ set_add_entry(PySetObject *so, PyObject *key, Py_hash_t hash) goto found_active; if (PyUnicode_CheckExact(startkey) && PyUnicode_CheckExact(key) - && _PyUnicode_EQ(startkey, key)) + && _PyUnicode_Equal(startkey, key)) goto found_active; table = so->table; Py_INCREF(startkey); diff --git a/Objects/stringlib/eq.h b/Objects/stringlib/eq.h index 2eac4baf5ca9ce..55e0ebf171be5b 100644 --- a/Objects/stringlib/eq.h +++ b/Objects/stringlib/eq.h @@ -4,14 +4,20 @@ * unicode_eq() is called when the hash of two unicode objects is equal. */ Py_LOCAL_INLINE(int) -unicode_eq(PyObject *a, PyObject *b) +unicode_eq(PyObject *str1, PyObject *str2) { - if (PyUnicode_GET_LENGTH(a) != PyUnicode_GET_LENGTH(b)) + Py_ssize_t len = PyUnicode_GET_LENGTH(str1); + if (PyUnicode_GET_LENGTH(str2) != len) { return 0; - if (PyUnicode_GET_LENGTH(a) == 0) - return 1; - if (PyUnicode_KIND(a) != PyUnicode_KIND(b)) + } + + int kind = PyUnicode_KIND(str1); + if (PyUnicode_KIND(str2) != kind) { return 0; - return memcmp(PyUnicode_1BYTE_DATA(a), PyUnicode_1BYTE_DATA(b), - PyUnicode_GET_LENGTH(a) * PyUnicode_KIND(a)) == 0; + } + + const void *data1 = PyUnicode_DATA(str1); + const void *data2 = PyUnicode_DATA(str2); + int cmp = memcmp(data1, data2, len * kind); + return (cmp == 0); } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 60d4875d3b393e..a9040a07e1263e 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -261,7 +261,6 @@ _PyUnicode_InternedSize_Immortal(void) } static Py_hash_t unicode_hash(PyObject *); -static int unicode_compare_eq(PyObject *, PyObject *); static Py_uhash_t hashtable_unicode_hash(const void *key) @@ -275,7 +274,7 @@ hashtable_unicode_compare(const void *key1, const void *key2) PyObject *obj1 = (PyObject *)key1; PyObject *obj2 = (PyObject *)key2; if (obj1 != NULL && obj2 != NULL) { - return unicode_compare_eq(obj1, obj2); + return unicode_eq(obj1, obj2); } else { return obj1 == obj2; @@ -10968,27 +10967,6 @@ unicode_compare(PyObject *str1, PyObject *str2) #undef COMPARE } -static int -unicode_compare_eq(PyObject *str1, PyObject *str2) -{ - int kind; - const void *data1, *data2; - Py_ssize_t len; - int cmp; - - len = PyUnicode_GET_LENGTH(str1); - if (PyUnicode_GET_LENGTH(str2) != len) - return 0; - kind = PyUnicode_KIND(str1); - if (PyUnicode_KIND(str2) != kind) - return 0; - data1 = PyUnicode_DATA(str1); - data2 = PyUnicode_DATA(str2); - - cmp = memcmp(data1, data2, len * kind); - return (cmp == 0); -} - int _PyUnicode_Equal(PyObject *str1, PyObject *str2) { @@ -10997,7 +10975,7 @@ _PyUnicode_Equal(PyObject *str1, PyObject *str2) if (str1 == str2) { return 1; } - return unicode_compare_eq(str1, str2); + return unicode_eq(str1, str2); } @@ -11213,7 +11191,7 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right) return 0; } - return unicode_compare_eq(left, right_uni); + return unicode_eq(left, right_uni); } PyObject * @@ -11241,7 +11219,7 @@ PyUnicode_RichCompare(PyObject *left, PyObject *right, int op) } } else if (op == Py_EQ || op == Py_NE) { - result = unicode_compare_eq(left, right); + result = unicode_eq(left, right); result ^= (op == Py_NE); return PyBool_FromLong(result); } @@ -11251,12 +11229,6 @@ PyUnicode_RichCompare(PyObject *left, PyObject *right, int op) } } -int -_PyUnicode_EQ(PyObject *aa, PyObject *bb) -{ - return unicode_eq(aa, bb); -} - int PyUnicode_Contains(PyObject *str, PyObject *substr) { diff --git a/Python/getargs.c b/Python/getargs.c index 1b9b39939ae94e..a764343ea9ee3f 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -2064,7 +2064,7 @@ find_keyword(PyObject *kwnames, PyObject *const *kwstack, PyObject *key) for (i = 0; i < nkwargs; i++) { PyObject *kwname = PyTuple_GET_ITEM(kwnames, i); assert(PyUnicode_Check(kwname)); - if (_PyUnicode_EQ(kwname, key)) { + if (_PyUnicode_Equal(kwname, key)) { return Py_NewRef(kwstack[i]); } }