Skip to content

Commit

Permalink
pythongh-124502: Optimize unicode_eq()
Browse files Browse the repository at this point in the history
* Replace unicode_compare_eq() with unicode_eq().
* Replace _PyUnicode_EQ() calls with _PyUnicode_Equal().
* Remove _PyUnicode_EQ().
  • Loading branch information
vstinner committed Oct 7, 2024
1 parent a7f0727 commit c415345
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 47 deletions.
6 changes: 1 addition & 5 deletions Include/internal/pycore_unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -252,11 +252,7 @@ extern Py_ssize_t _PyUnicode_InsertThousandsGrouping(

extern PyObject* _PyUnicode_FormatLong(PyObject *, int, int, int);

/* Fast equality check when the inputs are known to be exact unicode types
and where the hash values are equal (i.e. a very probable match) */
extern int _PyUnicode_EQ(PyObject *, PyObject *);

// Equality check.
// Fast equality check when the inputs are known to be exact unicode types.
// Export for '_pickle' shared extension.
PyAPI_FUNC(int) _PyUnicode_Equal(PyObject *, PyObject *);

Expand Down
4 changes: 2 additions & 2 deletions Objects/setobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
return entry;
if (PyUnicode_CheckExact(startkey)
&& PyUnicode_CheckExact(key)
&& _PyUnicode_EQ(startkey, key))
&& _PyUnicode_Equal(startkey, key))
return entry;
table = so->table;
Py_INCREF(startkey);
Expand Down Expand Up @@ -157,7 +157,7 @@ set_add_entry(PySetObject *so, PyObject *key, Py_hash_t hash)
goto found_active;
if (PyUnicode_CheckExact(startkey)
&& PyUnicode_CheckExact(key)
&& _PyUnicode_EQ(startkey, key))
&& _PyUnicode_Equal(startkey, key))
goto found_active;
table = so->table;
Py_INCREF(startkey);
Expand Down
20 changes: 13 additions & 7 deletions Objects/stringlib/eq.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,20 @@
* unicode_eq() is called when the hash of two unicode objects is equal.
*/
Py_LOCAL_INLINE(int)
unicode_eq(PyObject *a, PyObject *b)
unicode_eq(PyObject *str1, PyObject *str2)
{
if (PyUnicode_GET_LENGTH(a) != PyUnicode_GET_LENGTH(b))
Py_ssize_t len = PyUnicode_GET_LENGTH(str1);
if (PyUnicode_GET_LENGTH(str2) != len) {
return 0;
if (PyUnicode_GET_LENGTH(a) == 0)
return 1;
if (PyUnicode_KIND(a) != PyUnicode_KIND(b))
}

int kind = PyUnicode_KIND(str1);
if (PyUnicode_KIND(str2) != kind) {
return 0;
return memcmp(PyUnicode_1BYTE_DATA(a), PyUnicode_1BYTE_DATA(b),
PyUnicode_GET_LENGTH(a) * PyUnicode_KIND(a)) == 0;
}

const void *data1 = PyUnicode_DATA(str1);
const void *data2 = PyUnicode_DATA(str2);
int cmp = memcmp(data1, data2, len * kind);
return (cmp == 0);
}
36 changes: 4 additions & 32 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,6 @@ _PyUnicode_InternedSize_Immortal(void)
}

static Py_hash_t unicode_hash(PyObject *);
static int unicode_compare_eq(PyObject *, PyObject *);

static Py_uhash_t
hashtable_unicode_hash(const void *key)
Expand All @@ -275,7 +274,7 @@ hashtable_unicode_compare(const void *key1, const void *key2)
PyObject *obj1 = (PyObject *)key1;
PyObject *obj2 = (PyObject *)key2;
if (obj1 != NULL && obj2 != NULL) {
return unicode_compare_eq(obj1, obj2);
return unicode_eq(obj1, obj2);
}
else {
return obj1 == obj2;
Expand Down Expand Up @@ -10968,27 +10967,6 @@ unicode_compare(PyObject *str1, PyObject *str2)
#undef COMPARE
}

static int
unicode_compare_eq(PyObject *str1, PyObject *str2)
{
int kind;
const void *data1, *data2;
Py_ssize_t len;
int cmp;

len = PyUnicode_GET_LENGTH(str1);
if (PyUnicode_GET_LENGTH(str2) != len)
return 0;
kind = PyUnicode_KIND(str1);
if (PyUnicode_KIND(str2) != kind)
return 0;
data1 = PyUnicode_DATA(str1);
data2 = PyUnicode_DATA(str2);

cmp = memcmp(data1, data2, len * kind);
return (cmp == 0);
}

int
_PyUnicode_Equal(PyObject *str1, PyObject *str2)
{
Expand All @@ -10997,7 +10975,7 @@ _PyUnicode_Equal(PyObject *str1, PyObject *str2)
if (str1 == str2) {
return 1;
}
return unicode_compare_eq(str1, str2);
return unicode_eq(str1, str2);
}


Expand Down Expand Up @@ -11213,7 +11191,7 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
return 0;
}

return unicode_compare_eq(left, right_uni);
return unicode_eq(left, right_uni);
}

PyObject *
Expand Down Expand Up @@ -11241,7 +11219,7 @@ PyUnicode_RichCompare(PyObject *left, PyObject *right, int op)
}
}
else if (op == Py_EQ || op == Py_NE) {
result = unicode_compare_eq(left, right);
result = unicode_eq(left, right);
result ^= (op == Py_NE);
return PyBool_FromLong(result);
}
Expand All @@ -11251,12 +11229,6 @@ PyUnicode_RichCompare(PyObject *left, PyObject *right, int op)
}
}

int
_PyUnicode_EQ(PyObject *aa, PyObject *bb)
{
return unicode_eq(aa, bb);
}

int
PyUnicode_Contains(PyObject *str, PyObject *substr)
{
Expand Down
2 changes: 1 addition & 1 deletion Python/getargs.c
Original file line number Diff line number Diff line change
Expand Up @@ -2064,7 +2064,7 @@ find_keyword(PyObject *kwnames, PyObject *const *kwstack, PyObject *key)
for (i = 0; i < nkwargs; i++) {
PyObject *kwname = PyTuple_GET_ITEM(kwnames, i);
assert(PyUnicode_Check(kwname));
if (_PyUnicode_EQ(kwname, key)) {
if (_PyUnicode_Equal(kwname, key)) {
return Py_NewRef(kwstack[i]);
}
}
Expand Down

0 comments on commit c415345

Please sign in to comment.