From 28c30c0f7686d56fcef385c236594ab5f1e56a77 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Mon, 27 May 2024 16:21:18 +0200
Subject: [PATCH 01/11] gh-119609: Add PyUnicode_AsNativeFormat() function

Add PyUnicode_AsNativeFormat() and PyUnicode_FromNativeFormat()
functions to the C API.
---
 Doc/c-api/unicode.rst                         | 47 +++++++++++
 Doc/data/stable_abi.dat                       |  2 +
 Doc/whatsnew/3.14.rst                         |  6 ++
 Include/unicodeobject.h                       | 22 +++++
 Lib/test/test_capi/test_unicode.py            | 81 +++++++++++++++++-
 Lib/test/test_stable_abi_ctypes.py            |  2 +
 ...-05-27-17-46-17.gh-issue-119609.kPIx6S.rst |  3 +
 Misc/stable_abi.toml                          |  4 +
 Modules/_testlimitedcapi/unicode.c            | 31 +++++++
 Objects/unicodeobject.c                       | 83 +++++++++++++++++++
 PC/python3dll.c                               |  2 +
 11 files changed, 281 insertions(+), 2 deletions(-)
 create mode 100644 Misc/NEWS.d/next/C API/2024-05-27-17-46-17.gh-issue-119609.kPIx6S.rst

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index 7320d035bab513..0f3b6c29200f34 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -341,6 +341,53 @@ APIs:
    .. versionadded:: 3.3
 
 
+.. c:function:: const void* PyUnicode_AsNativeFormat(PyObject *unicode, Py_ssize_t *size, int *native_format)
+
+   Get the contents of a string in its native format.
+
+   * Return the contents, set *\*size* and *\*native_format* on success.
+   * Set an exception and return ``NULL`` on error.
+
+   The contents is valid as long as *unicode* is valid.
+
+   *unicode*, *size* and *native_format* must not be NULL.
+
+   *\*native_format* is set to one of these native formats:
+
+   .. c:namespace:: NULL
+
+   ========================================  =====  ============================
+   Constant Identifier                       Value  Description
+   ========================================  =====  ============================
+   .. c:macro:: PyUnicode_NATIVE_ASCII       ``1``  ASCII string (``Py_UCS1*``)
+   .. c:macro:: PyUnicode_NATIVE_UCS1        ``2``  UCS-1 string (``Py_UCS1*``)
+   .. c:macro:: PyUnicode_NATIVE_UCS2        ``3``  UCS-2 string (``Py_UCS2*``)
+   .. c:macro:: PyUnicode_NATIVE_UCS4        ``4``  UCS-4 string (``Py_UCS4*``)
+   .. c:macro:: PyUnicode_NATIVE_UTF8        ``5``  UTF-8 string (``char*``)
+   ========================================  =====  ============================
+
+   .. impl-detail::
+      In CPython, the :c:macro:`PyUnicode_NATIVE_UTF8` format is not used by
+      :c:func:`PyUnicode_AsNativeFormat`, but it's accepted by
+      :c:func:`PyUnicode_FromNativeFormat`.
+
+   .. versionadded:: 3.14
+
+
+.. c:function:: PyObject* PyUnicode_FromNativeFormat(const void *data, Py_ssize_t size, int native_format)
+
+   Create a string object from a native format string.
+
+   * Return a reference to a new string object on success.
+   * Set an exception and return ``NULL`` on error.
+
+   *data* must not be NULL. *size* must be positive or zero.
+
+   See :c:func:`PyUnicode_AsNativeFormat` for the available native formats.
+
+   .. versionadded:: 3.14
+
+
 .. c:function:: PyObject* PyUnicode_FromKindAndData(int kind, const void *buffer, \
                                                     Py_ssize_t size)
 
diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat
index 76a035f194d911..e4aef2ea0385e3 100644
--- a/Doc/data/stable_abi.dat
+++ b/Doc/data/stable_abi.dat
@@ -734,6 +734,7 @@ function,PyUnicode_AsEncodedString,3.2,,
 function,PyUnicode_AsEncodedUnicode,3.2,,
 function,PyUnicode_AsLatin1String,3.2,,
 function,PyUnicode_AsMBCSString,3.7,on Windows,
+function,PyUnicode_AsNativeFormat,3.14,,
 function,PyUnicode_AsRawUnicodeEscapeString,3.2,,
 function,PyUnicode_AsUCS4,3.7,,
 function,PyUnicode_AsUCS4Copy,3.7,,
@@ -784,6 +785,7 @@ function,PyUnicode_Format,3.2,,
 function,PyUnicode_FromEncodedObject,3.2,,
 function,PyUnicode_FromFormat,3.2,,
 function,PyUnicode_FromFormatV,3.2,,
+function,PyUnicode_FromNativeFormat,3.14,,
 function,PyUnicode_FromObject,3.2,,
 function,PyUnicode_FromOrdinal,3.2,,
 function,PyUnicode_FromString,3.2,,
diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst
index b77ff30a8fbbee..785e8431c0be34 100644
--- a/Doc/whatsnew/3.14.rst
+++ b/Doc/whatsnew/3.14.rst
@@ -265,6 +265,12 @@ New Features
 * Add :c:func:`PyLong_GetSign` function to get the sign of :class:`int` objects.
   (Contributed by Sergey B Kirpichev in :gh:`116560`.)
 
+* Add :c:func:`PyUnicode_AsNativeFormat` and
+  :c:func:`PyUnicode_FromNativeFormat` functions to import and export strings
+  in their native format.
+  (Contributed by Victor Stinner in :gh:`119609`.)
+
+
 Porting to Python 3.14
 ----------------------
 
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index dee00715b3c51d..a106b0aaf03ba8 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -248,6 +248,28 @@ PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
     const char *u              /* UTF-8 encoded string */
     );
 
+#define PyUnicode_NATIVE_ASCII 1
+#define PyUnicode_NATIVE_UCS1 2
+#define PyUnicode_NATIVE_UCS2 3
+#define PyUnicode_NATIVE_UCS4 4
+#define PyUnicode_NATIVE_UTF8 5
+
+// Get the content of a string in its native format.
+// - Return the content, set '*size' and '*native_format' on success.
+// - Set an exception and return NULL on error.
+PyAPI_FUNC(const void*) PyUnicode_AsNativeFormat(
+    PyObject *unicode,
+    Py_ssize_t *size,
+    int *native_format);
+
+// Create a string object from a native format string.
+// - Return a reference to a new string object on success.
+// - Set an exception and return NULL on error.
+PyAPI_FUNC(PyObject*) PyUnicode_FromNativeFormat(
+    const void *data,
+    Py_ssize_t size,
+    int native_format);
+
 /* --- wchar_t support for platforms which support it --------------------- */
 
 #ifdef HAVE_WCHAR_H
diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
index a69f817c515ba7..dda1dd116f0c04 100644
--- a/Lib/test/test_capi/test_unicode.py
+++ b/Lib/test/test_capi/test_unicode.py
@@ -24,6 +24,14 @@ class Str(str):
     pass
 
 
+PyUnicode_NATIVE_ASCII = 1
+PyUnicode_NATIVE_UCS1 = 2
+PyUnicode_NATIVE_UCS2 = 3
+PyUnicode_NATIVE_UCS4 = 4
+PyUnicode_NATIVE_UTF8 = 5
+# Invalid native format
+PyUnicode_NATIVE_INVALID = 0
+
 class CAPITest(unittest.TestCase):
 
     @support.cpython_only
@@ -1675,6 +1683,75 @@ def test_pep393_utf8_caching_bug(self):
                 # Check that the second call returns the same result
                 self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1))
 
-
-if __name__ == "__main__":
+    def test_unicode_asnativeformat(self):
+        # Test PyUnicode_AsNativeFormat()
+        asnativeformat = _testlimitedcapi.unicode_asnativeformat
+        self.assertEqual(asnativeformat("abc"),
+                         (b'abc', PyUnicode_NATIVE_ASCII))
+        self.assertEqual(asnativeformat("latin1:\xe9"),
+                         (b'latin1:\xe9', PyUnicode_NATIVE_UCS1))
+
+        ucs2_enc = 'utf-16le' if sys.byteorder == 'little' else 'utf-16be'
+        self.assertEqual(asnativeformat('ucs2:\u20ac'),
+                         ('ucs2:\u20ac'.encode(ucs2_enc),
+                          PyUnicode_NATIVE_UCS2))
+
+        ucs4_enc = 'utf-32le' if sys.byteorder == 'little' else 'utf-32be'
+        self.assertEqual(asnativeformat('ucs4:\U0010ffff'),
+                         ('ucs4:\U0010ffff'.encode(ucs4_enc),
+                          PyUnicode_NATIVE_UCS4))
+
+    def test_unicode_fromnativeformat(self):
+        # Test PyUnicode_FromNativeFormat()
+        fromnativeformat = _testlimitedcapi.unicode_fromnativeformat
+        self.assertEqual(fromnativeformat(b'abc', PyUnicode_NATIVE_ASCII),
+                         "abc")
+        self.assertEqual(fromnativeformat(b'latin1:\xe9', PyUnicode_NATIVE_UCS1),
+                         "latin1:\xe9")
+
+        ucs2_enc = 'utf-16le' if sys.byteorder == 'little' else 'utf-16be'
+        self.assertEqual(fromnativeformat('ucs2:\u20ac'.encode(ucs2_enc),
+                                          PyUnicode_NATIVE_UCS2),
+                         'ucs2:\u20ac')
+
+        ucs4_enc = 'utf-32le' if sys.byteorder == 'little' else 'utf-32be'
+        self.assertEqual(fromnativeformat('ucs4:\U0010ffff'.encode(ucs4_enc),
+                                          PyUnicode_NATIVE_UCS4),
+                         'ucs4:\U0010ffff')
+
+        text = "abc\xe9\U0010ffff"
+        self.assertEqual(fromnativeformat(text.encode('utf8'),
+                                          PyUnicode_NATIVE_UTF8),
+                         text)
+
+        # Empty string
+        for native_format in (
+            PyUnicode_NATIVE_ASCII,
+            PyUnicode_NATIVE_UCS1,
+            PyUnicode_NATIVE_UCS2,
+            PyUnicode_NATIVE_UCS4,
+            PyUnicode_NATIVE_UTF8,
+        ):
+            with self.subTest(native_format=native_format):
+                self.assertEqual(fromnativeformat(b'', native_format),
+                                 '')
+
+        # Invalid format
+        with self.assertRaises(ValueError):
+            fromnativeformat(b'', PyUnicode_NATIVE_INVALID)
+
+        # Invalid size
+        ucs2 = 'ucs2:\u20ac'.encode(ucs2_enc)
+        with self.assertRaises(ValueError):
+            fromnativeformat(ucs2[:-1], PyUnicode_NATIVE_UCS2)
+        ucs4 = 'ucs4:\U0010ffff'.encode(ucs4_enc)
+        with self.assertRaises(ValueError):
+            fromnativeformat(ucs4[:-1], PyUnicode_NATIVE_UCS4)
+        with self.assertRaises(ValueError):
+            fromnativeformat(ucs4[:-2], PyUnicode_NATIVE_UCS4)
+        with self.assertRaises(ValueError):
+            fromnativeformat(ucs4[:-3], PyUnicode_NATIVE_UCS4)
+
+
+if __name__ == '__main__':
     unittest.main()
diff --git a/Lib/test/test_stable_abi_ctypes.py b/Lib/test/test_stable_abi_ctypes.py
index c06c285c5013a6..99bc693448f122 100644
--- a/Lib/test/test_stable_abi_ctypes.py
+++ b/Lib/test/test_stable_abi_ctypes.py
@@ -760,6 +760,7 @@ def test_windows_feature_macros(self):
     "PyUnicode_AsEncodedString",
     "PyUnicode_AsEncodedUnicode",
     "PyUnicode_AsLatin1String",
+    "PyUnicode_AsNativeFormat",
     "PyUnicode_AsRawUnicodeEscapeString",
     "PyUnicode_AsUCS4",
     "PyUnicode_AsUCS4Copy",
@@ -806,6 +807,7 @@ def test_windows_feature_macros(self):
     "PyUnicode_FromEncodedObject",
     "PyUnicode_FromFormat",
     "PyUnicode_FromFormatV",
+    "PyUnicode_FromNativeFormat",
     "PyUnicode_FromObject",
     "PyUnicode_FromOrdinal",
     "PyUnicode_FromString",
diff --git a/Misc/NEWS.d/next/C API/2024-05-27-17-46-17.gh-issue-119609.kPIx6S.rst b/Misc/NEWS.d/next/C API/2024-05-27-17-46-17.gh-issue-119609.kPIx6S.rst
new file mode 100644
index 00000000000000..06f9a061ec8ac0
--- /dev/null
+++ b/Misc/NEWS.d/next/C API/2024-05-27-17-46-17.gh-issue-119609.kPIx6S.rst	
@@ -0,0 +1,3 @@
+Add :c:func:`PyUnicode_AsNativeFormat` and
+:c:func:`PyUnicode_FromNativeFormat` functions to import and export strings
+in their native format. Patch by Victor Stinner.
diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml
index 77473662aaa76c..5fe199be27f79d 100644
--- a/Misc/stable_abi.toml
+++ b/Misc/stable_abi.toml
@@ -2507,3 +2507,7 @@
     added = '3.13'
 [function.PyEval_GetFrameLocals]
     added = '3.13'
+[function.PyUnicode_AsNativeFormat]
+    added = '3.14'
+[function.PyUnicode_FromNativeFormat]
+    added = '3.14'
diff --git a/Modules/_testlimitedcapi/unicode.c b/Modules/_testlimitedcapi/unicode.c
index 2b70d09108a333..66da5b1d1846b4 100644
--- a/Modules/_testlimitedcapi/unicode.c
+++ b/Modules/_testlimitedcapi/unicode.c
@@ -1837,6 +1837,35 @@ test_string_from_format(PyObject *self, PyObject *Py_UNUSED(ignored))
 #undef CHECK_FORMAT_0
 }
 
+
+// Test PyUnicode_AsNativeFormat()
+static PyObject*
+unicode_asnativeformat(PyObject *self, PyObject *obj)
+{
+    Py_ssize_t size;
+    int native_format;
+    const void *data = PyUnicode_AsNativeFormat(obj, &size, &native_format);
+    if (data == NULL) {
+        return NULL;
+    }
+    return Py_BuildValue("y#i", data, size, native_format);
+}
+
+
+// Test PyUnicode_FromNativeFormat()
+static PyObject*
+unicode_fromnativeformat(PyObject *self, PyObject *args)
+{
+    const void *data;
+    Py_ssize_t size;
+    int native_format;
+    if (!PyArg_ParseTuple(args, "y#i", &data, &size, &native_format)) {
+        return NULL;
+    }
+    return PyUnicode_FromNativeFormat(data, size, native_format);
+}
+
+
 static PyMethodDef TestMethods[] = {
     {"codec_incrementalencoder", codec_incrementalencoder,       METH_VARARGS},
     {"codec_incrementaldecoder", codec_incrementaldecoder,       METH_VARARGS},
@@ -1924,6 +1953,8 @@ static PyMethodDef TestMethods[] = {
     {"unicode_format",           unicode_format,                 METH_VARARGS},
     {"unicode_contains",         unicode_contains,               METH_VARARGS},
     {"unicode_isidentifier",     unicode_isidentifier,           METH_O},
+    {"unicode_asnativeformat",   unicode_asnativeformat,         METH_O},
+    {"unicode_fromnativeformat", unicode_fromnativeformat,       METH_VARARGS},
     {NULL},
 };
 
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 3b0b4173408724..068315fb13aa72 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2097,6 +2097,89 @@ _PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size)
     return res;
 }
 
+const void*
+PyUnicode_AsNativeFormat(PyObject *unicode,
+                         Py_ssize_t *size, int *native_format)
+{
+    if (!PyUnicode_Check(unicode)) {
+        *size = 0;
+        *native_format = 0;
+        PyErr_Format(PyExc_TypeError, "must be str, not %T", unicode);
+        return NULL;
+    }
+
+    Py_ssize_t len = PyUnicode_GET_LENGTH(unicode);
+
+    if (PyUnicode_IS_ASCII(unicode)) {
+        *native_format = PyUnicode_NATIVE_ASCII;
+        *size = len;
+        return PyUnicode_1BYTE_DATA(unicode);
+    }
+    int kind = PyUnicode_KIND(unicode);
+
+    switch (kind)
+    {
+    case PyUnicode_1BYTE_KIND:
+        *native_format = PyUnicode_NATIVE_UCS1;
+        *size = len;
+        return PyUnicode_1BYTE_DATA(unicode);
+
+    case PyUnicode_2BYTE_KIND:
+        *native_format = PyUnicode_NATIVE_UCS2;
+        *size = len * 2;
+        return PyUnicode_2BYTE_DATA(unicode);
+
+    default:
+        assert(kind == PyUnicode_4BYTE_KIND);
+        *native_format = PyUnicode_NATIVE_UCS4;
+        *size = len * 4;
+        return PyUnicode_4BYTE_DATA(unicode);
+    }
+}
+
+PyObject*
+PyUnicode_FromNativeFormat(const void *data, Py_ssize_t size,
+                           int native_format)
+{
+    if (size < 0) {
+        PyErr_SetString(PyExc_ValueError, "Negative size");
+        return NULL;
+    }
+
+    switch (native_format)
+    {
+    case PyUnicode_NATIVE_ASCII:
+        return PyUnicode_DecodeASCII((const char*)data, size, NULL);
+
+    case PyUnicode_NATIVE_UCS1:
+        return _PyUnicode_FromUCS1(data, size);
+
+    case PyUnicode_NATIVE_UCS2:
+        if (size % 2) {
+            PyErr_Format(PyExc_ValueError, "size must be a multiple of 2: %zd",
+                         size);
+            return NULL;
+        }
+        return _PyUnicode_FromUCS2(data, size / 2);
+
+    case PyUnicode_NATIVE_UCS4:
+        if (size % 4) {
+            PyErr_Format(PyExc_ValueError, "size must be a multiple of 4: %zd",
+                         size);
+            return NULL;
+        }
+        return _PyUnicode_FromUCS4(data, size / 4);
+
+    case PyUnicode_NATIVE_UTF8:
+        return PyUnicode_DecodeUTF8((const char*)data, size, NULL);
+
+    default:
+        PyErr_Format(PyExc_ValueError, "unknown native format %i",
+                     native_format);
+        return NULL;
+    }
+}
+
 PyObject*
 PyUnicode_FromKindAndData(int kind, const void *buffer, Py_ssize_t size)
 {
diff --git a/PC/python3dll.c b/PC/python3dll.c
index 86c888430891c9..ca558c6fcf56fe 100755
--- a/PC/python3dll.c
+++ b/PC/python3dll.c
@@ -665,6 +665,7 @@ EXPORT_FUNC(PyUnicode_AsEncodedString)
 EXPORT_FUNC(PyUnicode_AsEncodedUnicode)
 EXPORT_FUNC(PyUnicode_AsLatin1String)
 EXPORT_FUNC(PyUnicode_AsMBCSString)
+EXPORT_FUNC(PyUnicode_AsNativeFormat)
 EXPORT_FUNC(PyUnicode_AsRawUnicodeEscapeString)
 EXPORT_FUNC(PyUnicode_AsUCS4)
 EXPORT_FUNC(PyUnicode_AsUCS4Copy)
@@ -713,6 +714,7 @@ EXPORT_FUNC(PyUnicode_Format)
 EXPORT_FUNC(PyUnicode_FromEncodedObject)
 EXPORT_FUNC(PyUnicode_FromFormat)
 EXPORT_FUNC(PyUnicode_FromFormatV)
+EXPORT_FUNC(PyUnicode_FromNativeFormat)
 EXPORT_FUNC(PyUnicode_FromObject)
 EXPORT_FUNC(PyUnicode_FromOrdinal)
 EXPORT_FUNC(PyUnicode_FromString)

From 4d771924595075fc2e8d4a2c5a1cbb6662d9dd36 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Sun, 9 Jun 2024 11:39:17 +0200
Subject: [PATCH 02/11] Change the API to PyUnicode_Export()

---
 Include/unicodeobject.h            |  24 +++--
 Lib/test/test_capi/test_unicode.py | 137 +++++++++++++++++++----------
 Modules/_testlimitedcapi/unicode.c |  29 +++---
 Objects/unicodeobject.c            | 108 +++++++++++++++++------
 4 files changed, 207 insertions(+), 91 deletions(-)

diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index a106b0aaf03ba8..c23849a0365982 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -248,27 +248,33 @@ PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
     const char *u              /* UTF-8 encoded string */
     );
 
-#define PyUnicode_NATIVE_ASCII 1
-#define PyUnicode_NATIVE_UCS1 2
-#define PyUnicode_NATIVE_UCS2 3
-#define PyUnicode_NATIVE_UCS4 4
-#define PyUnicode_NATIVE_UTF8 5
+#define PyUnicode_FORMAT_ASCII 0x01
+#define PyUnicode_FORMAT_UCS1 0x02
+#define PyUnicode_FORMAT_UCS2 0x04
+#define PyUnicode_FORMAT_UCS4 0x08
+#define PyUnicode_FORMAT_UTF8 0x10
 
 // Get the content of a string in its native format.
 // - Return the content, set '*size' and '*native_format' on success.
 // - Set an exception and return NULL on error.
-PyAPI_FUNC(const void*) PyUnicode_AsNativeFormat(
+PyAPI_FUNC(const void*) PyUnicode_Export(
     PyObject *unicode,
+    unsigned int supported_formats,
     Py_ssize_t *size,
-    int *native_format);
+    unsigned int *format);
+
+PyAPI_FUNC(void) PyUnicode_FreeExport(
+    PyObject *unicode,
+    const void* data,
+    unsigned int format);
 
 // Create a string object from a native format string.
 // - Return a reference to a new string object on success.
 // - Set an exception and return NULL on error.
-PyAPI_FUNC(PyObject*) PyUnicode_FromNativeFormat(
+PyAPI_FUNC(PyObject*) PyUnicode_Import(
     const void *data,
     Py_ssize_t size,
-    int native_format);
+    unsigned int format);
 
 /* --- wchar_t support for platforms which support it --------------------- */
 
diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
index dda1dd116f0c04..a7eccb1c973616 100644
--- a/Lib/test/test_capi/test_unicode.py
+++ b/Lib/test/test_capi/test_unicode.py
@@ -24,13 +24,13 @@ class Str(str):
     pass
 
 
-PyUnicode_NATIVE_ASCII = 1
-PyUnicode_NATIVE_UCS1 = 2
-PyUnicode_NATIVE_UCS2 = 3
-PyUnicode_NATIVE_UCS4 = 4
-PyUnicode_NATIVE_UTF8 = 5
+PyUnicode_FORMAT_ASCII = 0x01
+PyUnicode_FORMAT_UCS1 = 0x02
+PyUnicode_FORMAT_UCS2 = 0x04
+PyUnicode_FORMAT_UCS4 = 0x08
+PyUnicode_FORMAT_UTF8 = 0x10
 # Invalid native format
-PyUnicode_NATIVE_INVALID = 0
+PyUnicode_FORMAT_INVALID = 0x20
 
 class CAPITest(unittest.TestCase):
 
@@ -1683,74 +1683,119 @@ def test_pep393_utf8_caching_bug(self):
                 # Check that the second call returns the same result
                 self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1))
 
-    def test_unicode_asnativeformat(self):
-        # Test PyUnicode_AsNativeFormat()
-        asnativeformat = _testlimitedcapi.unicode_asnativeformat
-        self.assertEqual(asnativeformat("abc"),
-                         (b'abc', PyUnicode_NATIVE_ASCII))
-        self.assertEqual(asnativeformat("latin1:\xe9"),
-                         (b'latin1:\xe9', PyUnicode_NATIVE_UCS1))
-
-        ucs2_enc = 'utf-16le' if sys.byteorder == 'little' else 'utf-16be'
-        self.assertEqual(asnativeformat('ucs2:\u20ac'),
+    def test_unicode_export(self):
+        # Test PyUnicode_Export() and PyUnicode_FreeExport()
+        unicode_export = _testlimitedcapi.unicode_export
+        if sys.byteorder == 'little':
+            ucs2_enc = 'utf-16le'
+            ucs4_enc = 'utf-32le'
+        else:
+            ucs2_enc = 'utf-16be'
+            ucs4_enc = 'utf-32be'
+
+        # export to the native format
+        formats = (PyUnicode_FORMAT_ASCII
+                   | PyUnicode_FORMAT_UCS1
+                   | PyUnicode_FORMAT_UCS2
+                   | PyUnicode_FORMAT_UCS4)
+        self.assertEqual(unicode_export("abc", formats),
+                         (b'abc', PyUnicode_FORMAT_ASCII))
+        self.assertEqual(unicode_export("latin1:\xe9", formats),
+                         (b'latin1:\xe9', PyUnicode_FORMAT_UCS1))
+        self.assertEqual(unicode_export('ucs2:\u20ac', formats),
                          ('ucs2:\u20ac'.encode(ucs2_enc),
-                          PyUnicode_NATIVE_UCS2))
-
-        ucs4_enc = 'utf-32le' if sys.byteorder == 'little' else 'utf-32be'
-        self.assertEqual(asnativeformat('ucs4:\U0010ffff'),
+                          PyUnicode_FORMAT_UCS2))
+        self.assertEqual(unicode_export('ucs4:\U0010ffff', formats),
                          ('ucs4:\U0010ffff'.encode(ucs4_enc),
-                          PyUnicode_NATIVE_UCS4))
-
-    def test_unicode_fromnativeformat(self):
-        # Test PyUnicode_FromNativeFormat()
-        fromnativeformat = _testlimitedcapi.unicode_fromnativeformat
-        self.assertEqual(fromnativeformat(b'abc', PyUnicode_NATIVE_ASCII),
+                          PyUnicode_FORMAT_UCS4))
+
+        # always export to UCS4
+        self.assertEqual(unicode_export("abc", PyUnicode_FORMAT_UCS4),
+                         ('abc'.encode(ucs4_enc), PyUnicode_FORMAT_UCS4))
+        self.assertEqual(unicode_export("latin1:\xe9", PyUnicode_FORMAT_UCS4),
+                         ('latin1:\xe9'.encode(ucs4_enc), PyUnicode_FORMAT_UCS4))
+        self.assertEqual(unicode_export('ucs2:\u20ac', PyUnicode_FORMAT_UCS4),
+                         ('ucs2:\u20ac'.encode(ucs4_enc),
+                          PyUnicode_FORMAT_UCS4))
+        self.assertEqual(unicode_export('ucs4:\U0010ffff', PyUnicode_FORMAT_UCS4),
+                         ('ucs4:\U0010ffff'.encode(ucs4_enc),
+                          PyUnicode_FORMAT_UCS4))
+
+        # always export to UTF8
+        self.assertEqual(unicode_export("abc", PyUnicode_FORMAT_UTF8),
+                         ('abc'.encode('utf8'), PyUnicode_FORMAT_UTF8))
+        self.assertEqual(unicode_export("latin1:\xe9", PyUnicode_FORMAT_UTF8),
+                         ('latin1:\xe9'.encode('utf8'), PyUnicode_FORMAT_UTF8))
+        self.assertEqual(unicode_export('ucs2:\u20ac', PyUnicode_FORMAT_UTF8),
+                         ('ucs2:\u20ac'.encode('utf8'),
+                          PyUnicode_FORMAT_UTF8))
+        self.assertEqual(unicode_export('ucs4:\U0010ffff', PyUnicode_FORMAT_UTF8),
+                         ('ucs4:\U0010ffff'.encode('utf8'),
+                          PyUnicode_FORMAT_UTF8))
+
+        # No supported format or invalid format
+        with self.assertRaisesRegex(ValueError,
+                                    "unable to find a matching export format"):
+            unicode_export('abc', 0)
+        with self.assertRaisesRegex(ValueError,
+                                    "unable to find a matching export format"):
+            unicode_export('abc', PyUnicode_FORMAT_INVALID)
+
+    def test_unicode_import(self):
+        # Test PyUnicode_Import()
+        unicode_import = _testlimitedcapi.unicode_import
+        if sys.byteorder == 'little':
+            ucs2_enc = 'utf-16le'
+            ucs4_enc = 'utf-32le'
+        else:
+            ucs2_enc = 'utf-16be'
+            ucs4_enc = 'utf-32be'
+
+        self.assertEqual(unicode_import(b'abc', PyUnicode_FORMAT_ASCII),
                          "abc")
-        self.assertEqual(fromnativeformat(b'latin1:\xe9', PyUnicode_NATIVE_UCS1),
+        self.assertEqual(unicode_import(b'latin1:\xe9', PyUnicode_FORMAT_UCS1),
                          "latin1:\xe9")
 
-        ucs2_enc = 'utf-16le' if sys.byteorder == 'little' else 'utf-16be'
-        self.assertEqual(fromnativeformat('ucs2:\u20ac'.encode(ucs2_enc),
-                                          PyUnicode_NATIVE_UCS2),
+        self.assertEqual(unicode_import('ucs2:\u20ac'.encode(ucs2_enc),
+                                          PyUnicode_FORMAT_UCS2),
                          'ucs2:\u20ac')
 
-        ucs4_enc = 'utf-32le' if sys.byteorder == 'little' else 'utf-32be'
-        self.assertEqual(fromnativeformat('ucs4:\U0010ffff'.encode(ucs4_enc),
-                                          PyUnicode_NATIVE_UCS4),
+        self.assertEqual(unicode_import('ucs4:\U0010ffff'.encode(ucs4_enc),
+                                          PyUnicode_FORMAT_UCS4),
                          'ucs4:\U0010ffff')
 
         text = "abc\xe9\U0010ffff"
-        self.assertEqual(fromnativeformat(text.encode('utf8'),
-                                          PyUnicode_NATIVE_UTF8),
+        self.assertEqual(unicode_import(text.encode('utf8'),
+                                          PyUnicode_FORMAT_UTF8),
                          text)
 
         # Empty string
         for native_format in (
-            PyUnicode_NATIVE_ASCII,
-            PyUnicode_NATIVE_UCS1,
-            PyUnicode_NATIVE_UCS2,
-            PyUnicode_NATIVE_UCS4,
-            PyUnicode_NATIVE_UTF8,
+            PyUnicode_FORMAT_ASCII,
+            PyUnicode_FORMAT_UCS1,
+            PyUnicode_FORMAT_UCS2,
+            PyUnicode_FORMAT_UCS4,
+            PyUnicode_FORMAT_UTF8,
         ):
             with self.subTest(native_format=native_format):
-                self.assertEqual(fromnativeformat(b'', native_format),
+                self.assertEqual(unicode_import(b'', native_format),
                                  '')
 
         # Invalid format
         with self.assertRaises(ValueError):
-            fromnativeformat(b'', PyUnicode_NATIVE_INVALID)
+            unicode_import(b'', PyUnicode_FORMAT_INVALID)
 
         # Invalid size
         ucs2 = 'ucs2:\u20ac'.encode(ucs2_enc)
         with self.assertRaises(ValueError):
-            fromnativeformat(ucs2[:-1], PyUnicode_NATIVE_UCS2)
+            unicode_import(ucs2[:-1], PyUnicode_FORMAT_UCS2)
         ucs4 = 'ucs4:\U0010ffff'.encode(ucs4_enc)
         with self.assertRaises(ValueError):
-            fromnativeformat(ucs4[:-1], PyUnicode_NATIVE_UCS4)
+            unicode_import(ucs4[:-1], PyUnicode_FORMAT_UCS4)
         with self.assertRaises(ValueError):
-            fromnativeformat(ucs4[:-2], PyUnicode_NATIVE_UCS4)
+            unicode_import(ucs4[:-2], PyUnicode_FORMAT_UCS4)
         with self.assertRaises(ValueError):
-            fromnativeformat(ucs4[:-3], PyUnicode_NATIVE_UCS4)
+            unicode_import(ucs4[:-3], PyUnicode_FORMAT_UCS4)
 
 
 if __name__ == '__main__':
diff --git a/Modules/_testlimitedcapi/unicode.c b/Modules/_testlimitedcapi/unicode.c
index 66da5b1d1846b4..360f432fd51a57 100644
--- a/Modules/_testlimitedcapi/unicode.c
+++ b/Modules/_testlimitedcapi/unicode.c
@@ -1840,29 +1840,38 @@ test_string_from_format(PyObject *self, PyObject *Py_UNUSED(ignored))
 
 // Test PyUnicode_AsNativeFormat()
 static PyObject*
-unicode_asnativeformat(PyObject *self, PyObject *obj)
+unicode_export(PyObject *self, PyObject *args)
 {
+    PyObject *obj;
+    unsigned int supported_formats;
+    if (!PyArg_ParseTuple(args, "OI", &obj, &supported_formats)) {
+        return NULL;
+    }
+
     Py_ssize_t size;
-    int native_format;
-    const void *data = PyUnicode_AsNativeFormat(obj, &size, &native_format);
+    unsigned int format;
+    const void *data = PyUnicode_Export(obj, supported_formats, &size, &format);
     if (data == NULL) {
         return NULL;
     }
-    return Py_BuildValue("y#i", data, size, native_format);
+
+    PyObject *res = Py_BuildValue("y#i", data, size, format);
+    PyUnicode_FreeExport(obj, data, format);
+    return res;
 }
 
 
 // Test PyUnicode_FromNativeFormat()
 static PyObject*
-unicode_fromnativeformat(PyObject *self, PyObject *args)
+unicode_import(PyObject *self, PyObject *args)
 {
     const void *data;
     Py_ssize_t size;
-    int native_format;
-    if (!PyArg_ParseTuple(args, "y#i", &data, &size, &native_format)) {
+    unsigned int format;
+    if (!PyArg_ParseTuple(args, "y#i", &data, &size, &format)) {
         return NULL;
     }
-    return PyUnicode_FromNativeFormat(data, size, native_format);
+    return PyUnicode_Import(data, size, format);
 }
 
 
@@ -1953,8 +1962,8 @@ static PyMethodDef TestMethods[] = {
     {"unicode_format",           unicode_format,                 METH_VARARGS},
     {"unicode_contains",         unicode_contains,               METH_VARARGS},
     {"unicode_isidentifier",     unicode_isidentifier,           METH_O},
-    {"unicode_asnativeformat",   unicode_asnativeformat,         METH_O},
-    {"unicode_fromnativeformat", unicode_fromnativeformat,       METH_VARARGS},
+    {"unicode_export",           unicode_export,                 METH_VARARGS},
+    {"unicode_import",           unicode_import,                 METH_VARARGS},
     {NULL},
 };
 
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 068315fb13aa72..020b0b3bacefd9 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2098,63 +2098,119 @@ _PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size)
 }
 
 const void*
-PyUnicode_AsNativeFormat(PyObject *unicode,
-                         Py_ssize_t *size, int *native_format)
+PyUnicode_Export(PyObject *unicode, unsigned int supported_formats,
+                 Py_ssize_t *size, unsigned int *format)
 {
     if (!PyUnicode_Check(unicode)) {
-        *size = 0;
-        *native_format = 0;
         PyErr_Format(PyExc_TypeError, "must be str, not %T", unicode);
-        return NULL;
+        goto error;
     }
 
     Py_ssize_t len = PyUnicode_GET_LENGTH(unicode);
 
-    if (PyUnicode_IS_ASCII(unicode)) {
-        *native_format = PyUnicode_NATIVE_ASCII;
+    if (PyUnicode_IS_ASCII(unicode)
+        && (supported_formats & PyUnicode_FORMAT_ASCII))
+    {
+        *format = PyUnicode_FORMAT_ASCII;
         *size = len;
         return PyUnicode_1BYTE_DATA(unicode);
     }
-    int kind = PyUnicode_KIND(unicode);
 
-    switch (kind)
+    int kind = PyUnicode_KIND(unicode);
+    if (kind == PyUnicode_1BYTE_KIND
+        && (supported_formats & PyUnicode_FORMAT_UCS1))
     {
-    case PyUnicode_1BYTE_KIND:
-        *native_format = PyUnicode_NATIVE_UCS1;
+        *format = PyUnicode_FORMAT_UCS1;
         *size = len;
         return PyUnicode_1BYTE_DATA(unicode);
+    }
 
-    case PyUnicode_2BYTE_KIND:
-        *native_format = PyUnicode_NATIVE_UCS2;
+    if (kind == PyUnicode_2BYTE_KIND
+        && (supported_formats & PyUnicode_FORMAT_UCS2))
+    {
+        *format = PyUnicode_FORMAT_UCS2;
         *size = len * 2;
         return PyUnicode_2BYTE_DATA(unicode);
+    }
 
-    default:
-        assert(kind == PyUnicode_4BYTE_KIND);
-        *native_format = PyUnicode_NATIVE_UCS4;
+    if (kind == PyUnicode_4BYTE_KIND
+        && (supported_formats & PyUnicode_FORMAT_UCS4))
+    {
+        *format = PyUnicode_FORMAT_UCS4;
         *size = len * 4;
         return PyUnicode_4BYTE_DATA(unicode);
     }
+
+    if (supported_formats & PyUnicode_FORMAT_UCS4) {
+        Py_UCS4 *ucs4 = PyUnicode_AsUCS4Copy(unicode);
+        if (ucs4 == NULL) {
+            goto error;
+        }
+        *format = PyUnicode_FORMAT_UCS4;
+        *size = len * 4;
+        return ucs4;
+    }
+
+    if (supported_formats & PyUnicode_FORMAT_UTF8) {
+        const char *utf8 = PyUnicode_AsUTF8AndSize(unicode, size);
+        if (utf8 == NULL) {
+            goto error;
+        }
+        *format = PyUnicode_FORMAT_UTF8;
+        return utf8;
+    }
+
+    PyErr_Format(PyExc_ValueError, "unable to find a matching export format");
+
+
+error:
+    *size = 0;
+    *format = 0;
+    return NULL;
+}
+
+void
+PyUnicode_FreeExport(PyObject *unicode, const void* data, unsigned int format)
+{
+    switch (format)
+    {
+    case PyUnicode_FORMAT_ASCII:
+        break;
+    case PyUnicode_FORMAT_UCS1:
+        break;
+    case PyUnicode_FORMAT_UCS2:
+        break;
+    case PyUnicode_FORMAT_UCS4:
+        if (PyUnicode_KIND(unicode) != PyUnicode_4BYTE_KIND) {
+            PyMem_Free((void*)data);
+        }
+        break;
+    case PyUnicode_FORMAT_UTF8:
+        break;
+    default:
+        // ignore silently an unknown format
+        break;
+    }
 }
 
 PyObject*
-PyUnicode_FromNativeFormat(const void *data, Py_ssize_t size,
-                           int native_format)
+PyUnicode_Import(const void *data, Py_ssize_t size,
+                 unsigned int format)
 {
     if (size < 0) {
         PyErr_SetString(PyExc_ValueError, "Negative size");
         return NULL;
     }
 
-    switch (native_format)
+    switch (format)
     {
-    case PyUnicode_NATIVE_ASCII:
+    case PyUnicode_FORMAT_ASCII:
         return PyUnicode_DecodeASCII((const char*)data, size, NULL);
 
-    case PyUnicode_NATIVE_UCS1:
+    case PyUnicode_FORMAT_UCS1:
         return _PyUnicode_FromUCS1(data, size);
 
-    case PyUnicode_NATIVE_UCS2:
+    case PyUnicode_FORMAT_UCS2:
         if (size % 2) {
             PyErr_Format(PyExc_ValueError, "size must be a multiple of 2: %zd",
                          size);
@@ -2162,7 +2218,7 @@ PyUnicode_FromNativeFormat(const void *data, Py_ssize_t size,
         }
         return _PyUnicode_FromUCS2(data, size / 2);
 
-    case PyUnicode_NATIVE_UCS4:
+    case PyUnicode_FORMAT_UCS4:
         if (size % 4) {
             PyErr_Format(PyExc_ValueError, "size must be a multiple of 4: %zd",
                          size);
@@ -2170,12 +2226,12 @@ PyUnicode_FromNativeFormat(const void *data, Py_ssize_t size,
         }
         return _PyUnicode_FromUCS4(data, size / 4);
 
-    case PyUnicode_NATIVE_UTF8:
+    case PyUnicode_FORMAT_UTF8:
         return PyUnicode_DecodeUTF8((const char*)data, size, NULL);
 
     default:
-        PyErr_Format(PyExc_ValueError, "unknown native format %i",
-                     native_format);
+        PyErr_Format(PyExc_ValueError, "unknown format: %i",
+                     format);
         return NULL;
     }
 }

From 076985d06d87a1a71e7b334dc3bcd22b423ee685 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Tue, 11 Jun 2024 14:41:13 +0200
Subject: [PATCH 03/11] Rename PyUnicode_FreeExport() to
 PyUnicode_ReleaseExport()

---
 Include/unicodeobject.h            | 11 +++++++----
 Modules/_testlimitedcapi/unicode.c |  2 +-
 Objects/unicodeobject.c            |  3 ++-
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index c23849a0365982..e9ccb480ded2c7 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -254,21 +254,24 @@ PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
 #define PyUnicode_FORMAT_UCS4 0x08
 #define PyUnicode_FORMAT_UTF8 0x10
 
-// Get the content of a string in its native format.
-// - Return the content, set '*size' and '*native_format' on success.
+// Get the content of a string in the requested format:
+// - Return the content, set '*size' and '*format' on success.
 // - Set an exception and return NULL on error.
+//
+// The export must be released by PyUnicode_ReleaseExport().
 PyAPI_FUNC(const void*) PyUnicode_Export(
     PyObject *unicode,
     unsigned int supported_formats,
     Py_ssize_t *size,
     unsigned int *format);
 
-PyAPI_FUNC(void) PyUnicode_FreeExport(
+// Release an export created by PyUnicode_Export().
+PyAPI_FUNC(void) PyUnicode_ReleaseExport(
     PyObject *unicode,
     const void* data,
     unsigned int format);
 
-// Create a string object from a native format string.
+// Create a string object from a string in the format 'format'.
 // - Return a reference to a new string object on success.
 // - Set an exception and return NULL on error.
 PyAPI_FUNC(PyObject*) PyUnicode_Import(
diff --git a/Modules/_testlimitedcapi/unicode.c b/Modules/_testlimitedcapi/unicode.c
index 360f432fd51a57..306612e726aaab 100644
--- a/Modules/_testlimitedcapi/unicode.c
+++ b/Modules/_testlimitedcapi/unicode.c
@@ -1856,7 +1856,7 @@ unicode_export(PyObject *self, PyObject *args)
     }
 
     PyObject *res = Py_BuildValue("y#i", data, size, format);
-    PyUnicode_FreeExport(obj, data, format);
+    PyUnicode_ReleaseExport(obj, data, format);
     return res;
 }
 
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 020b0b3bacefd9..19ce47b0b8bfa8 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2170,7 +2170,8 @@ PyUnicode_Export(PyObject *unicode, unsigned int supported_formats,
 }
 
 void
-PyUnicode_FreeExport(PyObject *unicode, const void* data, unsigned int format)
+PyUnicode_ReleaseExport(PyObject *unicode, const void* data,
+                        unsigned int format)
 {
     switch (format)
     {

From fa0ff6da57e1bbe99cf91cc167e9fcb18b122947 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Tue, 11 Jun 2024 14:49:13 +0200
Subject: [PATCH 04/11] Add test and comments

---
 Lib/test/test_capi/test_unicode.py | 4 ++++
 Objects/unicodeobject.c            | 2 ++
 2 files changed, 6 insertions(+)

diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
index a7eccb1c973616..a8bc1a2117687c 100644
--- a/Lib/test/test_capi/test_unicode.py
+++ b/Lib/test/test_capi/test_unicode.py
@@ -1709,6 +1709,10 @@ def test_unicode_export(self):
                          ('ucs4:\U0010ffff'.encode(ucs4_enc),
                           PyUnicode_FORMAT_UCS4))
 
+        # export ASCII as UCS1
+        self.assertEqual(unicode_export("abc", PyUnicode_FORMAT_UCS1),
+                         (b'abc', PyUnicode_FORMAT_UCS1))
+
         # always export to UCS4
         self.assertEqual(unicode_export("abc", PyUnicode_FORMAT_UCS4),
                          ('abc'.encode(ucs4_enc), PyUnicode_FORMAT_UCS4))
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 19ce47b0b8bfa8..ef7f882d3248c6 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2142,6 +2142,7 @@ PyUnicode_Export(PyObject *unicode, unsigned int supported_formats,
     }
 
     if (supported_formats & PyUnicode_FORMAT_UCS4) {
+        // Convert UCS1 or UCS2 to UCS4
         Py_UCS4 *ucs4 = PyUnicode_AsUCS4Copy(unicode);
         if (ucs4 == NULL) {
             goto error;
@@ -2152,6 +2153,7 @@ PyUnicode_Export(PyObject *unicode, unsigned int supported_formats,
     }
 
     if (supported_formats & PyUnicode_FORMAT_UTF8) {
+        // Encode UCS1, UCS2 or UCS4 to UTF-8
         const char *utf8 = PyUnicode_AsUTF8AndSize(unicode, size);
         if (utf8 == NULL) {
             goto error;

From 11b9f43a17dfafb0447adb3b447556d1acae2cc4 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Fri, 14 Jun 2024 14:34:08 +0200
Subject: [PATCH 05/11] Use uint32_t for the format

---
 Include/unicodeobject.h            | 8 ++++----
 Modules/_testlimitedcapi/unicode.c | 6 +++---
 Objects/unicodeobject.c            | 8 ++++----
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index e9ccb480ded2c7..01e76034a54fbc 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -261,15 +261,15 @@ PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
 // The export must be released by PyUnicode_ReleaseExport().
 PyAPI_FUNC(const void*) PyUnicode_Export(
     PyObject *unicode,
-    unsigned int supported_formats,
+    uint32_t supported_formats,
     Py_ssize_t *size,
-    unsigned int *format);
+    uint32_t *format);
 
 // Release an export created by PyUnicode_Export().
 PyAPI_FUNC(void) PyUnicode_ReleaseExport(
     PyObject *unicode,
     const void* data,
-    unsigned int format);
+    uint32_t format);
 
 // Create a string object from a string in the format 'format'.
 // - Return a reference to a new string object on success.
@@ -277,7 +277,7 @@ PyAPI_FUNC(void) PyUnicode_ReleaseExport(
 PyAPI_FUNC(PyObject*) PyUnicode_Import(
     const void *data,
     Py_ssize_t size,
-    unsigned int format);
+    uint32_t format);
 
 /* --- wchar_t support for platforms which support it --------------------- */
 
diff --git a/Modules/_testlimitedcapi/unicode.c b/Modules/_testlimitedcapi/unicode.c
index 306612e726aaab..252714fc82c62e 100644
--- a/Modules/_testlimitedcapi/unicode.c
+++ b/Modules/_testlimitedcapi/unicode.c
@@ -1849,13 +1849,13 @@ unicode_export(PyObject *self, PyObject *args)
     }
 
     Py_ssize_t size;
-    unsigned int format;
+    uint32_t format;
     const void *data = PyUnicode_Export(obj, supported_formats, &size, &format);
     if (data == NULL) {
         return NULL;
     }
 
-    PyObject *res = Py_BuildValue("y#i", data, size, format);
+    PyObject *res = Py_BuildValue("y#I", data, size, (unsigned int)format);
     PyUnicode_ReleaseExport(obj, data, format);
     return res;
 }
@@ -1868,7 +1868,7 @@ unicode_import(PyObject *self, PyObject *args)
     const void *data;
     Py_ssize_t size;
     unsigned int format;
-    if (!PyArg_ParseTuple(args, "y#i", &data, &size, &format)) {
+    if (!PyArg_ParseTuple(args, "y#I", &data, &size, &format)) {
         return NULL;
     }
     return PyUnicode_Import(data, size, format);
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index ef7f882d3248c6..522ca31f405964 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2098,8 +2098,8 @@ _PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size)
 }
 
 const void*
-PyUnicode_Export(PyObject *unicode, unsigned int supported_formats,
-                 Py_ssize_t *size, unsigned int *format)
+PyUnicode_Export(PyObject *unicode, uint32_t supported_formats,
+                 Py_ssize_t *size, uint32_t *format)
 {
     if (!PyUnicode_Check(unicode)) {
         PyErr_Format(PyExc_TypeError, "must be str, not %T", unicode);
@@ -2173,7 +2173,7 @@ PyUnicode_Export(PyObject *unicode, unsigned int supported_formats,
 
 void
 PyUnicode_ReleaseExport(PyObject *unicode, const void* data,
-                        unsigned int format)
+                        uint32_t format)
 {
     switch (format)
     {
@@ -2198,7 +2198,7 @@ PyUnicode_ReleaseExport(PyObject *unicode, const void* data,
 
 PyObject*
 PyUnicode_Import(const void *data, Py_ssize_t size,
-                 unsigned int format)
+                 uint32_t format)
 {
     if (size < 0) {
         PyErr_SetString(PyExc_ValueError, "Negative size");

From 72ad7ec5546ae805126cf418b6bf653928c64474 Mon Sep 17 00:00:00 2001
From: Petr Viktorin <encukou@gmail.com>
Date: Thu, 20 Jun 2024 16:21:11 +0200
Subject: [PATCH 06/11] Update stable ABI manifest

---
 Doc/data/stable_abi.dat            |  5 +++--
 Lib/test/test_stable_abi_ctypes.py |  5 +++--
 Misc/stable_abi.toml               | 16 ++++++++++++++--
 PC/python3dll.c                    |  5 +++--
 4 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat
index 0e02420db0951c..80222096f3a0b6 100644
--- a/Doc/data/stable_abi.dat
+++ b/Doc/data/stable_abi.dat
@@ -734,7 +734,6 @@ function,PyUnicode_AsEncodedString,3.2,,
 function,PyUnicode_AsEncodedUnicode,3.2,,
 function,PyUnicode_AsLatin1String,3.2,,
 function,PyUnicode_AsMBCSString,3.7,on Windows,
-function,PyUnicode_AsNativeFormat,3.14,,
 function,PyUnicode_AsRawUnicodeEscapeString,3.2,,
 function,PyUnicode_AsUCS4,3.7,,
 function,PyUnicode_AsUCS4Copy,3.7,,
@@ -777,6 +776,7 @@ function,PyUnicode_EncodeFSDefault,3.2,,
 function,PyUnicode_EncodeLocale,3.7,,
 function,PyUnicode_EqualToUTF8,3.13,,
 function,PyUnicode_EqualToUTF8AndSize,3.13,,
+function,PyUnicode_Export,3.14,,
 function,PyUnicode_FSConverter,3.2,,
 function,PyUnicode_FSDecoder,3.2,,
 function,PyUnicode_Find,3.2,,
@@ -785,7 +785,6 @@ function,PyUnicode_Format,3.2,,
 function,PyUnicode_FromEncodedObject,3.2,,
 function,PyUnicode_FromFormat,3.2,,
 function,PyUnicode_FromFormatV,3.2,,
-function,PyUnicode_FromNativeFormat,3.14,,
 function,PyUnicode_FromObject,3.2,,
 function,PyUnicode_FromOrdinal,3.2,,
 function,PyUnicode_FromString,3.2,,
@@ -793,6 +792,7 @@ function,PyUnicode_FromStringAndSize,3.2,,
 function,PyUnicode_FromWideChar,3.2,,
 function,PyUnicode_GetDefaultEncoding,3.2,,
 function,PyUnicode_GetLength,3.7,,
+function,PyUnicode_Import,3.14,,
 function,PyUnicode_InternFromString,3.2,,
 function,PyUnicode_InternInPlace,3.2,,
 function,PyUnicode_IsIdentifier,3.2,,
@@ -801,6 +801,7 @@ function,PyUnicode_Partition,3.2,,
 function,PyUnicode_RPartition,3.2,,
 function,PyUnicode_RSplit,3.2,,
 function,PyUnicode_ReadChar,3.7,,
+function,PyUnicode_ReleaseExport,3.14,,
 function,PyUnicode_Replace,3.2,,
 function,PyUnicode_Resize,3.2,,
 function,PyUnicode_RichCompare,3.2,,
diff --git a/Lib/test/test_stable_abi_ctypes.py b/Lib/test/test_stable_abi_ctypes.py
index 16cc37dea3a040..b4e977f4e972e2 100644
--- a/Lib/test/test_stable_abi_ctypes.py
+++ b/Lib/test/test_stable_abi_ctypes.py
@@ -760,7 +760,6 @@ def test_windows_feature_macros(self):
     "PyUnicode_AsEncodedString",
     "PyUnicode_AsEncodedUnicode",
     "PyUnicode_AsLatin1String",
-    "PyUnicode_AsNativeFormat",
     "PyUnicode_AsRawUnicodeEscapeString",
     "PyUnicode_AsUCS4",
     "PyUnicode_AsUCS4Copy",
@@ -799,6 +798,7 @@ def test_windows_feature_macros(self):
     "PyUnicode_EncodeLocale",
     "PyUnicode_EqualToUTF8",
     "PyUnicode_EqualToUTF8AndSize",
+    "PyUnicode_Export",
     "PyUnicode_FSConverter",
     "PyUnicode_FSDecoder",
     "PyUnicode_Find",
@@ -807,7 +807,6 @@ def test_windows_feature_macros(self):
     "PyUnicode_FromEncodedObject",
     "PyUnicode_FromFormat",
     "PyUnicode_FromFormatV",
-    "PyUnicode_FromNativeFormat",
     "PyUnicode_FromObject",
     "PyUnicode_FromOrdinal",
     "PyUnicode_FromString",
@@ -816,6 +815,7 @@ def test_windows_feature_macros(self):
     "PyUnicode_GetDefaultEncoding",
     "PyUnicode_GetLength",
     "PyUnicode_GetSize",
+    "PyUnicode_Import",
     "PyUnicode_InternFromString",
     "PyUnicode_InternImmortal",
     "PyUnicode_InternInPlace",
@@ -825,6 +825,7 @@ def test_windows_feature_macros(self):
     "PyUnicode_RPartition",
     "PyUnicode_RSplit",
     "PyUnicode_ReadChar",
+    "PyUnicode_ReleaseExport",
     "PyUnicode_Replace",
     "PyUnicode_Resize",
     "PyUnicode_RichCompare",
diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml
index a9d554a6667a3d..c77dd429549509 100644
--- a/Misc/stable_abi.toml
+++ b/Misc/stable_abi.toml
@@ -2510,7 +2510,19 @@
 
 [function.Py_TYPE]
     added = '3.14'
-[function.PyUnicode_AsNativeFormat]
+[function.PyUnicode_Import]
     added = '3.14'
-[function.PyUnicode_FromNativeFormat]
+[function.PyUnicode_Export]
+    added = '3.14'
+[function.PyUnicode_ReleaseExport]
+    added = '3.14'
+[const.PyUnicode_FORMAT_ASCII]
+    added = '3.14'
+[const.PyUnicode_FORMAT_UCS1]
+    added = '3.14'
+[const.PyUnicode_FORMAT_UCS2]
+    added = '3.14'
+[const.PyUnicode_FORMAT_UCS4]
+    added = '3.14'
+[const.PyUnicode_FORMAT_UTF8]
     added = '3.14'
diff --git a/PC/python3dll.c b/PC/python3dll.c
index c69a584a5b2891..3086a08c0b70f5 100755
--- a/PC/python3dll.c
+++ b/PC/python3dll.c
@@ -666,7 +666,6 @@ EXPORT_FUNC(PyUnicode_AsEncodedString)
 EXPORT_FUNC(PyUnicode_AsEncodedUnicode)
 EXPORT_FUNC(PyUnicode_AsLatin1String)
 EXPORT_FUNC(PyUnicode_AsMBCSString)
-EXPORT_FUNC(PyUnicode_AsNativeFormat)
 EXPORT_FUNC(PyUnicode_AsRawUnicodeEscapeString)
 EXPORT_FUNC(PyUnicode_AsUCS4)
 EXPORT_FUNC(PyUnicode_AsUCS4Copy)
@@ -709,13 +708,13 @@ EXPORT_FUNC(PyUnicode_EncodeFSDefault)
 EXPORT_FUNC(PyUnicode_EncodeLocale)
 EXPORT_FUNC(PyUnicode_EqualToUTF8)
 EXPORT_FUNC(PyUnicode_EqualToUTF8AndSize)
+EXPORT_FUNC(PyUnicode_Export)
 EXPORT_FUNC(PyUnicode_Find)
 EXPORT_FUNC(PyUnicode_FindChar)
 EXPORT_FUNC(PyUnicode_Format)
 EXPORT_FUNC(PyUnicode_FromEncodedObject)
 EXPORT_FUNC(PyUnicode_FromFormat)
 EXPORT_FUNC(PyUnicode_FromFormatV)
-EXPORT_FUNC(PyUnicode_FromNativeFormat)
 EXPORT_FUNC(PyUnicode_FromObject)
 EXPORT_FUNC(PyUnicode_FromOrdinal)
 EXPORT_FUNC(PyUnicode_FromString)
@@ -726,6 +725,7 @@ EXPORT_FUNC(PyUnicode_FSDecoder)
 EXPORT_FUNC(PyUnicode_GetDefaultEncoding)
 EXPORT_FUNC(PyUnicode_GetLength)
 EXPORT_FUNC(PyUnicode_GetSize)
+EXPORT_FUNC(PyUnicode_Import)
 EXPORT_FUNC(PyUnicode_InternFromString)
 EXPORT_FUNC(PyUnicode_InternImmortal)
 EXPORT_FUNC(PyUnicode_InternInPlace)
@@ -733,6 +733,7 @@ EXPORT_FUNC(PyUnicode_IsIdentifier)
 EXPORT_FUNC(PyUnicode_Join)
 EXPORT_FUNC(PyUnicode_Partition)
 EXPORT_FUNC(PyUnicode_ReadChar)
+EXPORT_FUNC(PyUnicode_ReleaseExport)
 EXPORT_FUNC(PyUnicode_Replace)
 EXPORT_FUNC(PyUnicode_Resize)
 EXPORT_FUNC(PyUnicode_RichCompare)

From a2433ee87a460c6f9dc507624eff7fe11193ce4b Mon Sep 17 00:00:00 2001
From: Petr Viktorin <encukou@gmail.com>
Date: Thu, 20 Jun 2024 16:45:58 +0200
Subject: [PATCH 07/11] In debug mode, break assumptions that the buffer is
 zero-terminated

---
 Modules/_testlimitedcapi/unicode.c | 15 +++++++++++++++
 Objects/unicodeobject.c            |  8 ++++++++
 2 files changed, 23 insertions(+)

diff --git a/Modules/_testlimitedcapi/unicode.c b/Modules/_testlimitedcapi/unicode.c
index 252714fc82c62e..b367920f7106b4 100644
--- a/Modules/_testlimitedcapi/unicode.c
+++ b/Modules/_testlimitedcapi/unicode.c
@@ -1855,6 +1855,21 @@ unicode_export(PyObject *self, PyObject *args)
         return NULL;
     }
 
+#ifdef Py_DEBUG
+#define CHECK_END_BYTE(X) assert((X) == 0 || (X) == 0xAA)
+#else
+#define CHECK_END_BYTE(X) assert((X) == 0)
+#endif
+    CHECK_END_BYTE(((unsigned char*)data)[size]);
+    if (format == PyUnicode_FORMAT_UCS2) {
+        CHECK_END_BYTE(((unsigned char*)data)[size + 1]);
+    }
+    if (format == PyUnicode_FORMAT_UCS4) {
+        CHECK_END_BYTE(((unsigned char*)data)[size + 1]);
+        CHECK_END_BYTE(((unsigned char*)data)[size + 2]);
+        CHECK_END_BYTE(((unsigned char*)data)[size + 3]);
+    }
+
     PyObject *res = Py_BuildValue("y#I", data, size, (unsigned int)format);
     PyUnicode_ReleaseExport(obj, data, format);
     return res;
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index c3faa174d1c88d..94801e2adc0d94 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2147,6 +2147,14 @@ PyUnicode_Export(PyObject *unicode, uint32_t supported_formats,
         if (ucs4 == NULL) {
             goto error;
         }
+
+        // The buffer is not necessarily zero-terminated.
+        // In debug mode, explicitly set a non-zero byte.
+        // For production, keep the safe zero.
+        assert(ucs4[len] == 0);
+#ifdef Py_DEBUG
+        ucs4[len] = 0xAAAAAAAA;
+#endif
         *format = PyUnicode_FORMAT_UCS4;
         *size = len * 4;
         return ucs4;

From ea4c7f6d68538041d5d563843fae04fb8188e798 Mon Sep 17 00:00:00 2001
From: Petr Viktorin <encukou@gmail.com>
Date: Thu, 20 Jun 2024 16:46:51 +0200
Subject: [PATCH 08/11] Export compatible strings to UCS2

---
 Lib/test/test_capi/test_unicode.py |  9 +++++++++
 Objects/unicodeobject.c            | 27 +++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)

diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
index a8bc1a2117687c..a811bce6b3330c 100644
--- a/Lib/test/test_capi/test_unicode.py
+++ b/Lib/test/test_capi/test_unicode.py
@@ -1725,6 +1725,15 @@ def test_unicode_export(self):
                          ('ucs4:\U0010ffff'.encode(ucs4_enc),
                           PyUnicode_FORMAT_UCS4))
 
+        # export to UCS2 unless it's UCS4
+        self.assertEqual(unicode_export("abc", PyUnicode_FORMAT_UCS2),
+                         ('abc'.encode(ucs2_enc), PyUnicode_FORMAT_UCS2))
+        self.assertEqual(unicode_export("latin1:\xe9", PyUnicode_FORMAT_UCS2),
+                         ('latin1:\xe9'.encode(ucs2_enc), PyUnicode_FORMAT_UCS2))
+        self.assertEqual(unicode_export('ucs2:\u20ac', PyUnicode_FORMAT_UCS2),
+                         ('ucs2:\u20ac'.encode(ucs2_enc),
+                          PyUnicode_FORMAT_UCS2))
+
         # always export to UTF8
         self.assertEqual(unicode_export("abc", PyUnicode_FORMAT_UTF8),
                          ('abc'.encode('utf8'), PyUnicode_FORMAT_UTF8))
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 94801e2adc0d94..60228c16d5c80f 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2160,6 +2160,30 @@ PyUnicode_Export(PyObject *unicode, uint32_t supported_formats,
         return ucs4;
     }
 
+    if (supported_formats & PyUnicode_FORMAT_UCS2
+        && kind == PyUnicode_1BYTE_KIND)
+    {
+        // Convert UCS1 to UCS2
+        Py_UCS2 *ucs2 = PyMem_Malloc(sizeof(Py_UCS2) * (len + 1));
+        if (!ucs2) {
+            PyErr_NoMemory();
+            goto error;
+        }
+        _PyUnicode_CONVERT_BYTES(Py_UCS1, Py_UCS2,
+                                 PyUnicode_1BYTE_DATA(unicode),
+                                 PyUnicode_1BYTE_DATA(unicode) + len,
+                                 ucs2);
+#ifdef Py_DEBUG
+        // See AAAAAAAA in PyUnicode_FORMAT_UCS4
+        ucs2[len] = 0xAAAA;
+#else
+        ucs2[len] = 0;
+#endif
+        *format = PyUnicode_FORMAT_UCS2;
+        *size = len * 2;
+        return ucs2;
+    }
+
     if (supported_formats & PyUnicode_FORMAT_UTF8) {
         // Encode UCS1, UCS2 or UCS4 to UTF-8
         const char *utf8 = PyUnicode_AsUTF8AndSize(unicode, size);
@@ -2190,6 +2214,9 @@ PyUnicode_ReleaseExport(PyObject *unicode, const void* data,
     case PyUnicode_FORMAT_UCS1:
         break;
     case PyUnicode_FORMAT_UCS2:
+        if (PyUnicode_KIND(unicode) != PyUnicode_2BYTE_KIND) {
+            PyMem_Free((void*)data);
+        }
         break;
     case PyUnicode_FORMAT_UCS4:
         if (PyUnicode_KIND(unicode) != PyUnicode_4BYTE_KIND) {

From bece7e206768177574e414579687f576dda31059 Mon Sep 17 00:00:00 2001
From: Petr Viktorin <encukou@gmail.com>
Date: Thu, 20 Jun 2024 16:47:33 +0200
Subject: [PATCH 09/11] Add a more compact roundtrip test

This makes it easier to see the tested combinations
---
 Lib/test/test_capi/test_unicode.py | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
index a811bce6b3330c..20cdedfdde1945 100644
--- a/Lib/test/test_capi/test_unicode.py
+++ b/Lib/test/test_capi/test_unicode.py
@@ -1810,6 +1810,36 @@ def test_unicode_import(self):
         with self.assertRaises(ValueError):
             unicode_import(ucs4[:-3], PyUnicode_FORMAT_UCS4)
 
+    def test_unicode_import_export_roundtrip(self):
+        unicode_export = _testlimitedcapi.unicode_export
+        unicode_import = _testlimitedcapi.unicode_import
+        A = PyUnicode_FORMAT_ASCII
+        CS1 = PyUnicode_FORMAT_UCS1
+        CS2 = PyUnicode_FORMAT_UCS2
+        CS4 = PyUnicode_FORMAT_UCS4
+        TF8 = PyUnicode_FORMAT_UTF8
+        for string, alowed_encodings in (
+            ('', {A, CS1, CS2, CS4, TF8}),
+            ('ascii', {A, CS1, CS2, CS4, TF8}),
+            ('latin1:\xe9', {CS1, CS2, CS4, TF8}),
+            ('ucs2:\u20ac', {CS2, CS4, TF8}),
+            ('ucs4:\U0001f638', {CS4, TF8}),
+        ):
+            for encoding in A, CS1, CS2, CS4, TF8:
+                with self.subTest(string=string, encoding=encoding):
+                    if encoding not in alowed_encodings:
+                        with self.assertRaises(ValueError):
+                            unicode_export(string, encoding)
+                    else:
+                        buf, buf_enc = unicode_export(string, encoding)
+                        restored = unicode_import(buf, buf_enc)
+                        self.assertEqual(restored, string)
+
+                with self.subTest(string=string, encoding=-1):
+                    buf, buf_enc = unicode_export(string, -1)
+                    restored = unicode_import(buf, buf_enc)
+                    self.assertEqual(restored, string)
+
 
 if __name__ == '__main__':
     unittest.main()

From 20592018beff14239608a6f7d586e685a4aa81eb Mon Sep 17 00:00:00 2001
From: Petr Viktorin <encukou@gmail.com>
Date: Thu, 20 Jun 2024 16:47:58 +0200
Subject: [PATCH 10/11] Rename the argument to *nbytes* to make the unit
 clearer

---
 Include/unicodeobject.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index 01e76034a54fbc..0f983d08f740a7 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -255,14 +255,14 @@ PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
 #define PyUnicode_FORMAT_UTF8 0x10
 
 // Get the content of a string in the requested format:
-// - Return the content, set '*size' and '*format' on success.
+// - Return the content, set '*nbytes' and '*format' on success.
 // - Set an exception and return NULL on error.
 //
 // The export must be released by PyUnicode_ReleaseExport().
 PyAPI_FUNC(const void*) PyUnicode_Export(
     PyObject *unicode,
     uint32_t supported_formats,
-    Py_ssize_t *size,
+    Py_ssize_t *nbytes,
     uint32_t *format);
 
 // Release an export created by PyUnicode_Export().
@@ -276,7 +276,7 @@ PyAPI_FUNC(void) PyUnicode_ReleaseExport(
 // - Set an exception and return NULL on error.
 PyAPI_FUNC(PyObject*) PyUnicode_Import(
     const void *data,
-    Py_ssize_t size,
+    Py_ssize_t nbytes,
     uint32_t format);
 
 /* --- wchar_t support for platforms which support it --------------------- */

From dee3755a095836411696d6cf5c6d9274b64bf898 Mon Sep 17 00:00:00 2001
From: Petr Viktorin <encukou@gmail.com>
Date: Thu, 20 Jun 2024 16:48:13 +0200
Subject: [PATCH 11/11] Adjust docs

---
 Doc/c-api/unicode.rst                         | 74 ++++++++++++-------
 Doc/whatsnew/3.14.rst                         |  5 +-
 ...-05-27-17-46-17.gh-issue-119609.kPIx6S.rst |  5 +-
 3 files changed, 52 insertions(+), 32 deletions(-)

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index cc48254255ecf9..1ab4924b763635 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -341,49 +341,71 @@ APIs:
    .. versionadded:: 3.3
 
 
-.. c:function:: const void* PyUnicode_AsNativeFormat(PyObject *unicode, Py_ssize_t *size, int *native_format)
+.. c:function:: const void* PyUnicode_Export(PyObject *unicode, uint32_t supported_formats,
+                 Py_ssize_t *nbytes, uint32_t *format)
 
-   Get the contents of a string in its native format.
+   Get the contents of a string in an “export format”.
 
-   * Return the contents, set *\*size* and *\*native_format* on success.
-   * Set an exception and return ``NULL`` on error.
+   Set *supported_formats* to formats from the following list, OR-ed together:
+
+   .. c:namespace:: NULL
 
-   The contents is valid as long as *unicode* is valid.
+   ========================================  ========  ============================
+   Constant Identifier                       Value     Description
+   ========================================  ========  ============================
+   .. c:macro:: PyUnicode_FORMAT_ASCII       ``0x01``  ASCII string (``Py_UCS1*``)
+   .. c:macro:: PyUnicode_FORMAT_UCS1        ``0x02``  UCS-1 string (``Py_UCS1*``)
+   .. c:macro:: PyUnicode_FORMAT_UCS2        ``0x04``  UCS-2 string (``Py_UCS2*``)
+   .. c:macro:: PyUnicode_FORMAT_UCS4        ``0x08``  UCS-4 string (``Py_UCS4*``)
+   .. c:macro:: PyUnicode_FORMAT_UTF8        ``0x10``  UTF-8 string (``char*``)
+   ========================================  ========  ============================
 
-   *unicode*, *size* and *native_format* must not be NULL.
+   Note that future versions of Python may introduce additional formats.
 
-   *\*native_format* is set to one of these native formats:
+   On success:
 
-   .. c:namespace:: NULL
+   * Return a buffer containing the string data. Note that the buffer is not
+     necessarily zero-terminated.
+   * Set *\*format* to the buffer's format -- this will be one of the flags
+     set in *supported_formats*.
+   * Set *\*nbytes* to the size of the buffer, in bytes.
+
+   On error, set an exception, set *\*format* and *\*nbytes* to zero, and
+   return ``NULL``.
+
+   The returned buffer must be later released using
+   :c:func:`PyUnicode_ReleaseExport`.
+
+   The returned buffer must not be modified.
+
+   If possible, the export is a zero-copy operation -- for example,
+   the string's underlying storage is returned.
+
+   *unicode*, *nbytes* and *native_format* must not be NULL.
+
+   .. versionadded:: 3.14
+
+
+.. c:function:: void PyUnicode_ReleaseExport(PyObject *unicode, const void* data, uint32_t format)
+
+   Release a string's export buffer. The buffer is invalid after this call.
 
-   ========================================  =====  ============================
-   Constant Identifier                       Value  Description
-   ========================================  =====  ============================
-   .. c:macro:: PyUnicode_NATIVE_ASCII       ``1``  ASCII string (``Py_UCS1*``)
-   .. c:macro:: PyUnicode_NATIVE_UCS1        ``2``  UCS-1 string (``Py_UCS1*``)
-   .. c:macro:: PyUnicode_NATIVE_UCS2        ``3``  UCS-2 string (``Py_UCS2*``)
-   .. c:macro:: PyUnicode_NATIVE_UCS4        ``4``  UCS-4 string (``Py_UCS4*``)
-   .. c:macro:: PyUnicode_NATIVE_UTF8        ``5``  UTF-8 string (``char*``)
-   ========================================  =====  ============================
-
-   .. impl-detail::
-      In CPython, the :c:macro:`PyUnicode_NATIVE_UTF8` format is not used by
-      :c:func:`PyUnicode_AsNativeFormat`, but it's accepted by
-      :c:func:`PyUnicode_FromNativeFormat`.
+   Each argument must match the corresponding argument or result of
+   a single earlier call to :c:func:`PyUnicode_Export`.
 
    .. versionadded:: 3.14
 
 
-.. c:function:: PyObject* PyUnicode_FromNativeFormat(const void *data, Py_ssize_t size, int native_format)
+.. c:function:: PyObject* PyUnicode_Import(const void *data, Py_ssize_t nbytes, uint32_t format)
 
-   Create a string object from a native format string.
+   Create a string object from a buffer in an “export format”.
 
    * Return a reference to a new string object on success.
    * Set an exception and return ``NULL`` on error.
 
-   *data* must not be NULL. *size* must be positive or zero.
+   *data* must not be NULL. *nbytes* must be positive or zero.
 
-   See :c:func:`PyUnicode_AsNativeFormat` for the available native formats.
+   See :c:func:`PyUnicode_Export` for the available native formats.
 
    .. versionadded:: 3.14
 
diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst
index 0fa79805809f7b..1914ad3114cfd1 100644
--- a/Doc/whatsnew/3.14.rst
+++ b/Doc/whatsnew/3.14.rst
@@ -298,9 +298,8 @@ New Features
 
   (Contributed by Victor Stinner in :gh:`119182`.)
 
-* Add :c:func:`PyUnicode_AsNativeFormat` and
-  :c:func:`PyUnicode_FromNativeFormat` functions to import and export strings
-  in their native format.
+* Add :c:func:`PyUnicode_Import` and :c:func:`PyUnicode_Export` functions to
+  import and export strings from/to buffers in a given format.
   (Contributed by Victor Stinner in :gh:`119609`.)
 
 
diff --git a/Misc/NEWS.d/next/C API/2024-05-27-17-46-17.gh-issue-119609.kPIx6S.rst b/Misc/NEWS.d/next/C API/2024-05-27-17-46-17.gh-issue-119609.kPIx6S.rst
index 06f9a061ec8ac0..266fac739e338b 100644
--- a/Misc/NEWS.d/next/C API/2024-05-27-17-46-17.gh-issue-119609.kPIx6S.rst	
+++ b/Misc/NEWS.d/next/C API/2024-05-27-17-46-17.gh-issue-119609.kPIx6S.rst	
@@ -1,3 +1,2 @@
-Add :c:func:`PyUnicode_AsNativeFormat` and
-:c:func:`PyUnicode_FromNativeFormat` functions to import and export strings
-in their native format. Patch by Victor Stinner.
+Add :c:func:`PyUnicode_Export` and :c:func:`PyUnicode_Import` functions to
+import and export strings from native buffers. Patch by Victor Stinner.