From fa0ff6da57e1bbe99cf91cc167e9fcb18b122947 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 11 Jun 2024 14:49:13 +0200 Subject: [PATCH] Add test and comments --- Lib/test/test_capi/test_unicode.py | 4 ++++ Objects/unicodeobject.c | 2 ++ 2 files changed, 6 insertions(+) diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py index a7eccb1c973616..a8bc1a2117687c 100644 --- a/Lib/test/test_capi/test_unicode.py +++ b/Lib/test/test_capi/test_unicode.py @@ -1709,6 +1709,10 @@ def test_unicode_export(self): ('ucs4:\U0010ffff'.encode(ucs4_enc), PyUnicode_FORMAT_UCS4)) + # export ASCII as UCS1 + self.assertEqual(unicode_export("abc", PyUnicode_FORMAT_UCS1), + (b'abc', PyUnicode_FORMAT_UCS1)) + # always export to UCS4 self.assertEqual(unicode_export("abc", PyUnicode_FORMAT_UCS4), ('abc'.encode(ucs4_enc), PyUnicode_FORMAT_UCS4)) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 19ce47b0b8bfa8..ef7f882d3248c6 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2142,6 +2142,7 @@ PyUnicode_Export(PyObject *unicode, unsigned int supported_formats, } if (supported_formats & PyUnicode_FORMAT_UCS4) { + // Convert UCS1 or UCS2 to UCS4 Py_UCS4 *ucs4 = PyUnicode_AsUCS4Copy(unicode); if (ucs4 == NULL) { goto error; @@ -2152,6 +2153,7 @@ PyUnicode_Export(PyObject *unicode, unsigned int supported_formats, } if (supported_formats & PyUnicode_FORMAT_UTF8) { + // Encode UCS1, UCS2 or UCS4 to UTF-8 const char *utf8 = PyUnicode_AsUTF8AndSize(unicode, size); if (utf8 == NULL) { goto error;