use internal str Py_UNICODE array, avoid extra encoding and copy

harfbuzz · Dec 11, 2018 · ebdf61e · khaledhosny · Dec 11, 2018 · anthrotype
1 parent d381921
commit ebdf61e
Showing 1 changed file with 6 additions and 11 deletions.
diff --git a/src/uharfbuzz/_harfbuzz.pyx b/src/uharfbuzz/_harfbuzz.pyx
@@ -1,10 +1,10 @@
 #cython: language_level=3
 from charfbuzz cimport *
+from cpython.unicode cimport PyUnicode_AS_UNICODE, PyUnicode_GET_SIZE
+from libc.stdint cimport uint16_t, uint32_t
 from libc.stdlib cimport free, malloc
 from libc.string cimport const_char
 from typing import Callable, Dict, List, Tuple
-import array
-from cpython cimport array
 
 
 cdef bint PY_NARROW_UNICODE = sizeof(Py_UNICODE) != 4
@@ -181,29 +181,24 @@ cdef class Buffer:
 
     def add_str(self, text: str,
                 item_offset: int = None, item_length: int = None) -> None:
-        cdef array.array packed
-        # handle both "wide" and "narrow" python builds; strip the BOM
-        if PY_NARROW_UNICODE:
-            packed = array.array("H", text.encode("UTF-16")[2:])
-        else:
-            packed = array.array("I", text.encode("UTF-32")[4:])
-        cdef unsigned int size = len(packed)
+        cdef Py_UNICODE* array = PyUnicode_AS_UNICODE(text)
+        cdef Py_ssize_t size = PyUnicode_GET_SIZE(text)
         if item_offset is None:
             item_offset = 0
         if item_length is None:
             item_length = size
         if PY_NARROW_UNICODE:
             hb_buffer_add_utf16(
                 self._hb_buffer,
-                packed.data.as_ushorts,
+                <uint16_t*>array,
                 size,
                 item_offset,
                 item_length,
             )
         else:
             hb_buffer_add_utf32(
                 self._hb_buffer,
-                packed.data.as_uints,
+                <uint32_t*>array,
                 size,
                 item_offset,
                 item_length,