wjakob · wjakob · Jan 20, 2025 · Jan 16, 2025 · Jan 17, 2025 · Jan 17, 2025
diff --git a/src/nb_type.cpp b/src/nb_type.cpp
@@ -40,6 +40,60 @@ static PyObject **nb_weaklist_ptr(PyObject *self) {
     return weaklistoffset ? (PyObject **) ((uint8_t *) self + weaklistoffset) : nullptr;
 }
 
+static void nb_enable_try_inc_ref(PyObject *obj) noexcept {
+#if 0 && defined(Py_GIL_DISABLED) && PY_VERSION_HEX >= 0x030E00A5
+    PyUnstable_EnableTryIncRef(obj);
+#elif defined(Py_GIL_DISABLED)
+    // Since this is called during object construction, we know that we have
+    // the only reference to the object and can use a non-atomic write.
+    assert(obj->ob_ref_shared == 0);
+    obj->ob_ref_shared = _Py_REF_MAYBE_WEAKREF;
+#endif
+}
+
+static bool nb_try_inc_ref(PyObject *obj) noexcept {
+#if 0 && defined(Py_GIL_DISABLED) && PY_VERSION_HEX >= 0x030E00A5
+    return PyUnstable_TryIncRef(obj);
+#elif defined(Py_GIL_DISABLED)
+    // See https://github.com/python/cpython/blob/d05140f9f77d7dfc753dd1e5ac3a5962aaa03eff/Include/internal/pycore_object.h#L761
+    uint32_t local = _Py_atomic_load_uint32_relaxed(&obj->ob_ref_local);
+    local += 1;
+    if (local == 0) {
+        // immortal
+        return true;
+    }
+    if (_Py_IsOwnedByCurrentThread(obj)) {
+        _Py_atomic_store_uint32_relaxed(&obj->ob_ref_local, local);
+#ifdef Py_REF_DEBUG
+        _Py_INCREF_IncRefTotal();
+#endif
+        return true;
+    }
+    Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&obj->ob_ref_shared);
+    for (;;) {
+        // If the shared refcount is zero and the object is either merged
+        // or may not have weak references, then we cannot incref it.
+        if (shared == 0 || shared == _Py_REF_MERGED) {
+            return false;
+        }
+
+        if (_Py_atomic_compare_exchange_ssize(
+                &obj->ob_ref_shared, &shared, shared + (1 << _Py_REF_SHARED_SHIFT))) {
+#ifdef Py_REF_DEBUG
+            _Py_INCREF_IncRefTotal();
+#endif
+            return true;
+        }
+    }
+#else
+    if (Py_REFCNT(obj) > 0) {
+        Py_INCREF(obj);
+        return true;
+    }
+    return false;
+#endif
+}
+
 static PyGetSetDef inst_getset[] = {
     { "__dict__", PyObject_GenericGetDict, PyObject_GenericSetDict, nullptr, nullptr },
     { nullptr, nullptr, nullptr, nullptr, nullptr }
@@ -98,6 +152,7 @@ PyObject *inst_new_int(PyTypeObject *tp, PyObject * /* args */,
         self->clear_keep_alive = 0;
         self->intrusive = intrusive;
         self->unused = 0;
+        nb_enable_try_inc_ref((PyObject *)self);
 
         // Update hash table that maps from C++ to Python instance
         nb_shard &shard = internals->shard((void *) payload);
@@ -163,6 +218,7 @@ PyObject *inst_new_ext(PyTypeObject *tp, void *value) {
     self->clear_keep_alive = 0;
     self->intrusive = intrusive;
     self->unused = 0;
+    nb_enable_try_inc_ref((PyObject *)self);
 
     nb_shard &shard = internals->shard(value);
     lock_shard guard(shard);
@@ -1766,16 +1822,18 @@ PyObject *nb_type_put(const std::type_info *cpp_type,
                 PyTypeObject *tp = Py_TYPE(seq.inst);
 
                 if (nb_type_data(tp)->type == cpp_type) {
-                    Py_INCREF(seq.inst);
-                    return seq.inst;
+                    if (nb_try_inc_ref(seq.inst)) {
+                        return seq.inst;
+                    }
                 }
 
                 if (!lookup_type())
                     return nullptr;
 
                 if (PyType_IsSubtype(tp, td->type_py)) {
-                    Py_INCREF(seq.inst);
-                    return seq.inst;
+                    if (nb_try_inc_ref(seq.inst)) {
+                        return seq.inst;
+                    }
                 }
 
                 if (seq.next == nullptr)

diff --git a/tests/test_thread.cpp b/tests/test_thread.cpp
@@ -12,6 +12,8 @@ struct Counter {
     }
 };
 
+struct GlobalData {} global_data;
+
 nb::ft_mutex mutex;
 
 NB_MODULE(test_thread_ext, m) {
@@ -34,4 +36,7 @@ NB_MODULE(test_thread_ext, m) {
               nb::ft_lock_guard guard(mutex);
               c.inc();
           }, "counter");
+
+    nb::class_<GlobalData>(m, "GlobalData")
+        .def_static("get", [] { return &global_data; }, nb::rv_policy::reference);
 }
diff --git a/tests/test_thread.py b/tests/test_thread.py
@@ -1,5 +1,5 @@
 import test_thread_ext as t
-from test_thread_ext import Counter
+from test_thread_ext import Counter, GlobalData
 from common import parallelize
 
 def test01_object_creation(n_threads=8):
@@ -75,3 +75,16 @@ def f():
 
     parallelize(f, n_threads=n_threads)
     assert c.value == n * n_threads
+
+
+def test_06_global_wrapper(n_threads=8):
+    # Check wrapper lookup racing with wrapper deallocation
+    n = 10000
+    def f():
+        for i in range(n):
+            GlobalData.get()
+            GlobalData.get()
+            GlobalData.get()
+            GlobalData.get()
+
+    parallelize(f, n_threads=n_threads)