From fcb620c8f3b062c63786777339442f4510c38abd Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Mon, 13 Jan 2025 00:33:16 +0800 Subject: [PATCH 01/23] add get uint16_t array max value util --- cpp/fury/util/array_util.h | 83 ++++++++++++++++++++++++++++++++ cpp/fury/util/array_util_test.cc | 33 +++++++++++++ 2 files changed, 116 insertions(+) create mode 100644 cpp/fury/util/array_util.h create mode 100644 cpp/fury/util/array_util_test.cc diff --git a/cpp/fury/util/array_util.h b/cpp/fury/util/array_util.h new file mode 100644 index 0000000000..2dee73e50a --- /dev/null +++ b/cpp/fury/util/array_util.h @@ -0,0 +1,83 @@ +#include "fury/util/platform.h" + +namespace fury { +#if defined(FURY_HAS_NEON) +inline uint16_t getMaxValue(uint16_t* arr, size_t length) { + if (length == 0) { + return 0; // Return 0 for empty arrays + } + uint16x8_t max_val = vdupq_n_u16(0); // Initialize max vector to zero + + size_t i = 0; + for (; i + 8 <= length; i += 8) { + uint16x8_t current_val = vld1q_u16(&arr[i]); + max_val = vmaxq_u16(max_val, current_val); // Max operation + } + + // Find the max value in the resulting vector + uint16_t temp[8]; + vst1q_u16(temp, max_val); + uint16_t max_neon = temp[0]; + for (int j = 1; j < 8; j++) { + if (temp[j] > max_neon) { + max_neon = temp[j]; + } + } + + // Handle remaining elements + for (; i < length; i++) { + if (arr[i] > max_neon) { + max_neon = arr[i]; + } + } + return max_neon; +} + +#elif defined(FURY_HAS_SSE2) + +inline uint16_t getMaxValue(uint16_t* arr, size_t length) { + if (length == 0) { + return 0; // Return 0 for empty arrays + } + + __m128i max_val = _mm_setzero_si128(); // Initialize max vector with zeros + + size_t i = 0; + for (; i + 8 <= length; i += 8) { + __m128i current_val = _mm_loadu_si128((__m128i*)&arr[i]); + max_val = _mm_max_epu16(max_val, current_val); // Max operation + } + + // Find the max value in the resulting vector + uint16_t temp[8]; + _mm_storeu_si128((__m128i*)temp, max_val); + uint16_t max_sse = temp[0]; + for (int j = 1; j < 8; j++) { + if (temp[j] > max_sse) { + max_sse = temp[j]; + } + } + + // Handle remaining elements + for (; i < length; i++) { + if (arr[i] > max_sse) { + max_sse = arr[i]; + } + } + return max_sse; +} +#else +inline uint16_t getMaxValue(uint16_t* arr, size_t length) { + if (length == 0) { + return 0; // Return 0 for empty arrays + } + uint16_t max_val = arr[0]; + for (size_t i = 1; i < length; i++) { + if (arr[i] > max_val) { + max_val = arr[i]; + } + } + return max_val; +} +#endif +} // namespace fury diff --git a/cpp/fury/util/array_util_test.cc b/cpp/fury/util/array_util_test.cc new file mode 100644 index 0000000000..399018730a --- /dev/null +++ b/cpp/fury/util/array_util_test.cc @@ -0,0 +1,33 @@ +#include "fury/util/array_util.h" +#include "gtest/gtest.h" + +namespace fury { +TEST(GetMaxValueTest, HandlesEmptyArray) { + uint16_t arr[] = {}; + EXPECT_EQ(getMaxValue(arr, 0), 0); +} + +TEST(GetMaxValueTest, HandlesSingleElementArray) { + uint16_t arr[] = {42}; + EXPECT_EQ(getMaxValue(arr, 1), 42); +} + +TEST(GetMaxValueTest, HandlesSmallArray) { + uint16_t arr[] = {10, 20, 30, 40, 5}; + EXPECT_EQ(getMaxValue(arr, 5), 40); +} + +TEST(GetMaxValueTest, HandlesLargeArray) { + const size_t length = 1024; + uint16_t arr[length]; + for (size_t i = 0; i < length; ++i) { + arr[i] = static_cast(i); + } + EXPECT_EQ(getMaxValue(arr, length), 1023); +} +} // namespace fury + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} From f68dce4ac8f0ba4d32293897108bfdbeee96b85e Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Tue, 14 Jan 2025 13:44:10 +0800 Subject: [PATCH 02/23] add SMID copy uint16 array to uint8 array --- cpp/fury/util/array_util.h | 115 +++++++++++++++++++++++++++++++++++-- 1 file changed, 110 insertions(+), 5 deletions(-) diff --git a/cpp/fury/util/array_util.h b/cpp/fury/util/array_util.h index 2dee73e50a..5dcdd31c2f 100644 --- a/cpp/fury/util/array_util.h +++ b/cpp/fury/util/array_util.h @@ -1,8 +1,78 @@ #include "fury/util/platform.h" namespace fury { -#if defined(FURY_HAS_NEON) -inline uint16_t getMaxValue(uint16_t* arr, size_t length) { +#if defined(FURY_HAS_IMMINTRIN) +inline uint16_t getMaxValue(const uint16_t* arr, size_t length) { + if (length == 0) { + return 0; // Return 0 for empty arrays + } + + __m256i max_val = _mm256_setzero_si256(); // Initialize max vector with zeros + + size_t i = 0; + for (; i + 16 <= length; i += 16) { + __m256i current_val = _mm256_loadu_si256((__m256i*)&arr[i]); + max_val = _mm256_max_epu16(max_val, current_val); // Max operation + } + + // Find the max value in the resulting vector + uint16_t temp[16]; + _mm256_storeu_si256((__m256i*)temp, max_val); + uint16_t max_avx = temp[0]; + for (int j = 1; j < 16; j++) { + if (temp[j] > max_avx) { + max_avx = temp[j]; + } + } + + // Handle remaining elements + for (; i < length; i++) { + if (arr[i] > max_avx) { + max_avx = arr[i]; + } + } + return max_avx; +} + +inline void copyValue(const uint16_t* from, uint8_t* to, size_t length) { + size_t i = 0; + // Process chunks of 32 bytes (16 uint16_t elements at a time) + for (; i + 31 < length; i += 32) { + // Load two 256-bit blocks (32 uint16_t elements total) + __m256i src1 = _mm256_loadu_si256(reinterpret_cast(&from[i])); + __m256i src2 = _mm256_loadu_si256(reinterpret_cast(&from[i + 16])); + + // Narrow the 16-bit integers to 8-bit integers + __m256i packed = _mm256_packus_epi16(src1, src2); + + // Shuffle the packed result to interleave lower and upper parts + packed = _mm256_permute4x64_epi64(packed, _MM_SHUFFLE(3, 1, 2, 0)); + + // Store the result + _mm256_storeu_si256(reinterpret_cast<__m256i*>(&to[i]), packed); + } + // Check if at least 16 elements are left to process + if (i + 15 < length) { + // Process the next 16 elements + __m256i src1 = _mm256_loadu_si256(reinterpret_cast(&from[i])); + // Narrow the 16-bit integers to 8-bit integers by zeroing the upper halves + __m128i packed1 = _mm256_castsi256_si128(src1); // Lower 128 bits + __m128i packed2 = _mm256_extracti128_si256(src1, 1); // Upper 128 bits + // Pack two 128-wide vectors into 8-bit integers, ignore saturating with itself. + __m128i packed = _mm_packus_epi16(packed1, packed2); + + // Store the result; using only the first 128 bits + _mm_storeu_si128(reinterpret_cast<__m128i*>(&to[i]), packed); + + i += 16; + } + // Process remaining elements one at a time + for (; i < length; ++i) { + to[i] = static_cast(from[i]); + } +} +#elif defined(FURY_HAS_NEON) +inline uint16_t getMaxValue(const uint16_t* arr, size_t length) { if (length == 0) { return 0; // Return 0 for empty arrays } @@ -33,9 +103,21 @@ inline uint16_t getMaxValue(uint16_t* arr, size_t length) { return max_neon; } -#elif defined(FURY_HAS_SSE2) +inline void copyValue(const uint16_t* from, uint8_t* to, size_t length) { + size_t i = 0; + for (; i + 7 < length; i += 8) { + uint16x8_t src = vld1q_u16(&from[i]); + uint8x8_t result = vmovn_u16(src); + vst1_u8(&to[i], result); + } -inline uint16_t getMaxValue(uint16_t* arr, size_t length) { + // Fallback for the remainder + for (; i < length; ++i) { + to[i] = static_cast(from[i]); + } +} +#elif defined(FURY_HAS_SSE2) +inline uint16_t getMaxValue(const uint16_t* arr, size_t length) { if (length == 0) { return 0; // Return 0 for empty arrays } @@ -66,8 +148,24 @@ inline uint16_t getMaxValue(uint16_t* arr, size_t length) { } return max_sse; } + +inline void copyValue(const uint16_t* from, uint8_t* to, size_t length) { + size_t i = 0; + __m128i mask = _mm_set1_epi16(0xFF); // Mask to zero out the high byte + for (; i + 7 < length; i += 8) { + __m128i src = _mm_loadu_si128(reinterpret_cast(&from[i])); + __m128i result = _mm_and_si128(src, mask); + _mm_storel_epi64(reinterpret_cast<__m128i*>(&to[i]), + _mm_packus_epi16(result, result)); + } + + // Fallback for the remainder + for (; i < length; ++i) { + to[i] = static_cast(from[i]); + } +} #else -inline uint16_t getMaxValue(uint16_t* arr, size_t length) { +inline uint16_t getMaxValue(const uint16_t* arr, size_t length) { if (length == 0) { return 0; // Return 0 for empty arrays } @@ -79,5 +177,12 @@ inline uint16_t getMaxValue(uint16_t* arr, size_t length) { } return max_val; } + +inline void copyValue(const uint16_t* from, const uint8_t* to, size_t length) { + // Fallback for systems without SSE2/NEON + for (size_t i = 0; i < length; ++i) { + to[i] = static_cast(from[i]); + } +} #endif } // namespace fury From eb7f7b8cf6475aa062ec4fef88802875b8aed423 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 15 Jan 2025 01:01:23 +0800 Subject: [PATCH 03/23] skip avx for python wheel --- cpp/fury/util/BUILD | 10 ++++++++++ cpp/fury/util/array_util.h | 12 +++++++----- cpp/fury/util/platform.h | 3 --- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/cpp/fury/util/BUILD b/cpp/fury/util/BUILD index 8f605dc75e..2a2a6e5396 100644 --- a/cpp/fury/util/BUILD +++ b/cpp/fury/util/BUILD @@ -62,4 +62,14 @@ cc_test( ":fury_util", "@com_google_googletest//:gtest", ], +) + + +cc_test( + name = "array_util_test", + srcs = ["array_util_test.cc"], + deps = [ + ":fury_util", + "@com_google_googletest//:gtest", + ], ) \ No newline at end of file diff --git a/cpp/fury/util/array_util.h b/cpp/fury/util/array_util.h index 5dcdd31c2f..5decaa3524 100644 --- a/cpp/fury/util/array_util.h +++ b/cpp/fury/util/array_util.h @@ -2,13 +2,14 @@ namespace fury { #if defined(FURY_HAS_IMMINTRIN) -inline uint16_t getMaxValue(const uint16_t* arr, size_t length) { +// TODO: runtime dispatch for avx +// We can not distribute a seperate wheel for avx, so we need to +// check instcuctions set at runtime +inline uint16_t getMaxValueAVX(const uint16_t* arr, size_t length) { if (length == 0) { return 0; // Return 0 for empty arrays } - __m256i max_val = _mm256_setzero_si256(); // Initialize max vector with zeros - size_t i = 0; for (; i + 16 <= length; i += 16) { __m256i current_val = _mm256_loadu_si256((__m256i*)&arr[i]); @@ -34,7 +35,7 @@ inline uint16_t getMaxValue(const uint16_t* arr, size_t length) { return max_avx; } -inline void copyValue(const uint16_t* from, uint8_t* to, size_t length) { +inline void copyValueAVX(const uint16_t* from, uint8_t* to, size_t length) { size_t i = 0; // Process chunks of 32 bytes (16 uint16_t elements at a time) for (; i + 31 < length; i += 32) { @@ -71,7 +72,8 @@ inline void copyValue(const uint16_t* from, uint8_t* to, size_t length) { to[i] = static_cast(from[i]); } } -#elif defined(FURY_HAS_NEON) +#endif +#if defined(FURY_HAS_NEON) inline uint16_t getMaxValue(const uint16_t* arr, size_t length) { if (length == 0) { return 0; // Return 0 for empty arrays diff --git a/cpp/fury/util/platform.h b/cpp/fury/util/platform.h index 70a699bc20..bc5b13bbb9 100644 --- a/cpp/fury/util/platform.h +++ b/cpp/fury/util/platform.h @@ -23,9 +23,6 @@ #elif defined(__ARM_NEON) || defined(__ARM_NEON__) #include #define FURY_HAS_NEON -#elif defined(__SSE2__) -#include -#define FURY_HAS_SSE2 #elif defined(__riscv) && __riscv_vector #include #define FURY_HAS_RISCV_VECTOR From 84e0b0b25d845bcd51ec77e75b5cd24f682451cf Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 15 Jan 2025 01:03:47 +0800 Subject: [PATCH 04/23] enable avx for cpp test --- cpp/fury/util/BUILD | 4 +++- cpp/fury/util/array_util.h | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cpp/fury/util/BUILD b/cpp/fury/util/BUILD index 2a2a6e5396..124825b9b3 100644 --- a/cpp/fury/util/BUILD +++ b/cpp/fury/util/BUILD @@ -72,4 +72,6 @@ cc_test( ":fury_util", "@com_google_googletest//:gtest", ], -) \ No newline at end of file + copts = ["-mavx2"], # Enable AVX2 support + linkopts = ["-mavx2"], # Ensure linker also knows about AVX2 +) diff --git a/cpp/fury/util/array_util.h b/cpp/fury/util/array_util.h index 5decaa3524..0058c675b6 100644 --- a/cpp/fury/util/array_util.h +++ b/cpp/fury/util/array_util.h @@ -180,7 +180,7 @@ inline uint16_t getMaxValue(const uint16_t* arr, size_t length) { return max_val; } -inline void copyValue(const uint16_t* from, const uint8_t* to, size_t length) { +inline void copyValue(const uint16_t* from, uint8_t* to, size_t length) { // Fallback for systems without SSE2/NEON for (size_t i = 0; i < length; ++i) { to[i] = static_cast(from[i]); From 9fd56f06691a8bb000d5a2e7b4fc4fbcd891b0a7 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 15 Jan 2025 13:21:46 +0800 Subject: [PATCH 05/23] implement pyunicode library --- BUILD | 3 + cpp/fury/python/BUILD | 33 ++++++++++ cpp/fury/python/pyunicode.cc | 24 ++++++++ cpp/fury/python/pyunicode.h | 97 ++++++++++++++++++++++++++++++ python/pyfury/includes/libutil.pxd | 6 ++ 5 files changed, 163 insertions(+) create mode 100644 cpp/fury/python/BUILD create mode 100644 cpp/fury/python/pyunicode.cc create mode 100644 cpp/fury/python/pyunicode.h diff --git a/BUILD b/BUILD index d5f1063377..c3c58b4eef 100644 --- a/BUILD +++ b/BUILD @@ -31,6 +31,7 @@ pyx_library( ), deps = [ "//cpp/fury/util:fury_util", + "//cpp/fury/python:pyunicode", ], ) @@ -63,6 +64,7 @@ pyx_library( deps = [ "//cpp/fury/util:fury_util", "//cpp/fury/type:fury_type", + "//cpp/fury/python:pyunicode", "@com_google_absl//absl/container:flat_hash_map", ], ) @@ -83,6 +85,7 @@ pyx_library( ), deps = [ "//cpp/fury:fury", + "//cpp/fury/python:pyunicode", "@local_config_pyarrow//:python_numpy_headers", "@local_config_pyarrow//:arrow_python_shared_library" ], diff --git a/cpp/fury/python/BUILD b/cpp/fury/python/BUILD new file mode 100644 index 0000000000..6b218d0a60 --- /dev/null +++ b/cpp/fury/python/BUILD @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") +load("@com_github_grpc_grpc//bazel:cython_library.bzl", "pyx_library") + +cc_library( + name = "pyunicode", + srcs = ["pyunicode.cc"], + hdrs = ["pyunicode.h"], + alwayslink=True, + linkstatic=True, + strip_include_prefix = "/cpp", + deps = [ + "//cpp/fury/util:fury_util", + "@local_config_python//:python_headers", + ], + visibility = ["//visibility:public"], +) diff --git a/cpp/fury/python/pyunicode.cc b/cpp/fury/python/pyunicode.cc new file mode 100644 index 0000000000..c25ce80945 --- /dev/null +++ b/cpp/fury/python/pyunicode.cc @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "pyunicode.h" + +namespace fury { + +} // namespace fury diff --git a/cpp/fury/python/pyunicode.h b/cpp/fury/python/pyunicode.h new file mode 100644 index 0000000000..1c0ea57861 --- /dev/null +++ b/cpp/fury/python/pyunicode.h @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include "fury/util/array_util.h" +#include "fury/util/buffer.h" +#include "fury/util/logging.h" +#include "fury/util/string_util.h" +#include "pyport.h" +#include "object.h" +#include "unicodeobject.h" + +namespace fury { + +static PyObject* unicode_latin1[256] = {nullptr}; + +static PyObject* get_latin1_char(unsigned char ch) { + PyObject* unicode = unicode_latin1[ch]; + if (!unicode) { + unicode = PyUnicode_New(1, ch); + if (!unicode) return NULL; + PyUnicode_1BYTE_DATA(unicode)[0] = ch; + // assert(_PyUnicode_CheckConsistency(unicode, 1)); + unicode_latin1[ch] = unicode; + } + Py_INCREF(unicode); + return unicode; +} + +// unicodeobject.c +inline PyObject* Fury_PyUnicode_FromUCS1(const uint8_t* u, Py_ssize_t size) { + PyObject* res; + unsigned char max_char; + FURY_CHECK(size > 0); + if (size == 1) return get_latin1_char(u[0]); + max_char = isAscii(reinterpret_cast(u), size) ? 127 : 255; + res = PyUnicode_New(size, max_char); + if (!res) return NULL; + std::memcpy(PyUnicode_1BYTE_DATA(res), u, size); + // assert(_PyUnicode_CheckConsistency(res, 1)); + return res; +} + +inline PyObject* Fury_PyUnicode_FromUCS2(const uint16_t* u, Py_ssize_t size) { + PyObject* res; + Py_UCS2 max_char; + FURY_CHECK(size > 0); + if (size == 1) { + max_char = u[0]; + if (max_char < 256) { + return get_latin1_char(max_char); + } else { + res = PyUnicode_New(1, max_char); + if (res == NULL) { + return NULL; + } + if (PyUnicode_KIND(res) == PyUnicode_2BYTE_KIND) { + PyUnicode_2BYTE_DATA(res)[0] = (Py_UCS2)max_char; + } else { + FURY_CHECK(PyUnicode_KIND(res) == PyUnicode_4BYTE_KIND); + PyUnicode_4BYTE_DATA(res)[0] = max_char; + } + return res; + } + } + max_char = getMaxValue(u, size); + res = PyUnicode_New(size, max_char); + if (!res) { + return NULL; + } + if (max_char >= 256) { + std::memcpy(PyUnicode_2BYTE_DATA(res), u, sizeof(Py_UCS2) * size); + } else { + copyValue(u, PyUnicode_1BYTE_DATA(res), size); + } + // assert(_PyUnicode_CheckConsistency(res, 1)); + return res; +} + +} // namespace fury diff --git a/python/pyfury/includes/libutil.pxd b/python/pyfury/includes/libutil.pxd index 72a640033d..5618417c23 100644 --- a/python/pyfury/includes/libutil.pxd +++ b/python/pyfury/includes/libutil.pxd @@ -19,6 +19,7 @@ from libc.stdint cimport * from libcpp cimport bool as c_bool from libcpp.memory cimport shared_ptr from libcpp.string cimport string as c_string +from cpython cimport PyObject cdef extern from "fury/util/buffer.h" namespace "fury" nogil: cdef cppclass CStatus" fury::Status": @@ -111,3 +112,8 @@ cdef extern from "fury/util/bit_util.h" namespace "fury::util" nogil: cdef extern from "fury/util/string_util.h" namespace "fury" nogil: c_bool utf16HasSurrogatePairs(uint16_t* data, size_t size) + + +cdef extern from "fury/python/pyunicode.h" namespace "fury" nogil: + PyObject* Fury_PyUnicode_FromUCS1(const uint8_t* u, Py_ssize_t size) + PyObject* Fury_PyUnicode_FromUCS2(const uint16_t* u, Py_ssize_t size) From 77fbec9351bc45c45be1e175082c89a5e78cdfc9 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 15 Jan 2025 22:10:36 +0800 Subject: [PATCH 06/23] use pyunicode for python ucs1/2 string decoding --- cpp/fury/python/pyunicode.cc | 63 +++++++++++++++++++++++++++++ cpp/fury/python/pyunicode.h | 64 +----------------------------- cpp/fury/util/array_util_test.cc | 4 +- cpp/fury/util/platform.h | 4 ++ cpp/fury/util/string_util_test.cc | 21 ++++++++-- python/pyfury/_util.pyx | 11 +++-- python/pyfury/includes/libutil.pxd | 2 +- 7 files changed, 97 insertions(+), 72 deletions(-) diff --git a/cpp/fury/python/pyunicode.cc b/cpp/fury/python/pyunicode.cc index c25ce80945..e875c357e3 100644 --- a/cpp/fury/python/pyunicode.cc +++ b/cpp/fury/python/pyunicode.cc @@ -21,4 +21,67 @@ namespace fury { +static PyObject* unicode_latin1[256] = {nullptr}; + +static PyObject* get_latin1_char(unsigned char ch) { + PyObject* unicode = unicode_latin1[ch]; + if (!unicode) { + unicode = PyUnicode_New(1, ch); + if (!unicode) return NULL; + PyUnicode_1BYTE_DATA(unicode)[0] = ch; + // assert(_PyUnicode_CheckConsistency(unicode, 1)); + unicode_latin1[ch] = unicode; + } + Py_INCREF(unicode); + return unicode; +} + +PyObject* Fury_PyUnicode_FromUCS1(const char* u, Py_ssize_t size) { + PyObject* res; + unsigned char max_char; + FURY_CHECK(size > 0); + if (size == 1) return get_latin1_char(u[0]); + max_char = isAscii(reinterpret_cast(u), size) ? 127 : 255; + res = PyUnicode_New(size, max_char); + if (!res) return NULL; + memcpy(PyUnicode_1BYTE_DATA(res), u, size); + // assert(_PyUnicode_CheckConsistency(res, 1)); + return res; +} + +PyObject* Fury_PyUnicode_FromUCS2(const uint16_t* u, Py_ssize_t size) { + PyObject* res; + Py_UCS2 max_char; + FURY_CHECK(size > 0); + if (size == 1) { + max_char = u[0]; + if (max_char < 256) { + return get_latin1_char(max_char); + } else { + res = PyUnicode_New(1, max_char); + if (res == NULL) { + return NULL; + } + if (PyUnicode_KIND(res) == PyUnicode_2BYTE_KIND) { + PyUnicode_2BYTE_DATA(res)[0] = (Py_UCS2)max_char; + } else { + FURY_CHECK(PyUnicode_KIND(res) == PyUnicode_4BYTE_KIND); + PyUnicode_4BYTE_DATA(res)[0] = max_char; + } + return res; + } + } + max_char = getMaxValue(u, size); + res = PyUnicode_New(size, max_char); + if (!res) { + return NULL; + } + if (max_char >= 256) { + memcpy(PyUnicode_2BYTE_DATA(res), u, sizeof(Py_UCS2) * size); + } else { + copyValue(u, PyUnicode_1BYTE_DATA(res), size); + } + // assert(_PyUnicode_CheckConsistency(res, 1)); + return res; +} } // namespace fury diff --git a/cpp/fury/python/pyunicode.h b/cpp/fury/python/pyunicode.h index 1c0ea57861..2512fbb45b 100644 --- a/cpp/fury/python/pyunicode.h +++ b/cpp/fury/python/pyunicode.h @@ -29,69 +29,9 @@ namespace fury { -static PyObject* unicode_latin1[256] = {nullptr}; - -static PyObject* get_latin1_char(unsigned char ch) { - PyObject* unicode = unicode_latin1[ch]; - if (!unicode) { - unicode = PyUnicode_New(1, ch); - if (!unicode) return NULL; - PyUnicode_1BYTE_DATA(unicode)[0] = ch; - // assert(_PyUnicode_CheckConsistency(unicode, 1)); - unicode_latin1[ch] = unicode; - } - Py_INCREF(unicode); - return unicode; -} - // unicodeobject.c -inline PyObject* Fury_PyUnicode_FromUCS1(const uint8_t* u, Py_ssize_t size) { - PyObject* res; - unsigned char max_char; - FURY_CHECK(size > 0); - if (size == 1) return get_latin1_char(u[0]); - max_char = isAscii(reinterpret_cast(u), size) ? 127 : 255; - res = PyUnicode_New(size, max_char); - if (!res) return NULL; - std::memcpy(PyUnicode_1BYTE_DATA(res), u, size); - // assert(_PyUnicode_CheckConsistency(res, 1)); - return res; -} +PyObject* Fury_PyUnicode_FromUCS1(const char* u, Py_ssize_t size); -inline PyObject* Fury_PyUnicode_FromUCS2(const uint16_t* u, Py_ssize_t size) { - PyObject* res; - Py_UCS2 max_char; - FURY_CHECK(size > 0); - if (size == 1) { - max_char = u[0]; - if (max_char < 256) { - return get_latin1_char(max_char); - } else { - res = PyUnicode_New(1, max_char); - if (res == NULL) { - return NULL; - } - if (PyUnicode_KIND(res) == PyUnicode_2BYTE_KIND) { - PyUnicode_2BYTE_DATA(res)[0] = (Py_UCS2)max_char; - } else { - FURY_CHECK(PyUnicode_KIND(res) == PyUnicode_4BYTE_KIND); - PyUnicode_4BYTE_DATA(res)[0] = max_char; - } - return res; - } - } - max_char = getMaxValue(u, size); - res = PyUnicode_New(size, max_char); - if (!res) { - return NULL; - } - if (max_char >= 256) { - std::memcpy(PyUnicode_2BYTE_DATA(res), u, sizeof(Py_UCS2) * size); - } else { - copyValue(u, PyUnicode_1BYTE_DATA(res), size); - } - // assert(_PyUnicode_CheckConsistency(res, 1)); - return res; -} +PyObject* Fury_PyUnicode_FromUCS2(const uint16_t* u, Py_ssize_t size); } // namespace fury diff --git a/cpp/fury/util/array_util_test.cc b/cpp/fury/util/array_util_test.cc index 399018730a..50bfc3c92d 100644 --- a/cpp/fury/util/array_util_test.cc +++ b/cpp/fury/util/array_util_test.cc @@ -25,9 +25,9 @@ TEST(GetMaxValueTest, HandlesLargeArray) { } EXPECT_EQ(getMaxValue(arr, length), 1023); } -} // namespace fury +} // namespace fury -int main(int argc, char** argv) { +int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } diff --git a/cpp/fury/util/platform.h b/cpp/fury/util/platform.h index bc5b13bbb9..0c725478d7 100644 --- a/cpp/fury/util/platform.h +++ b/cpp/fury/util/platform.h @@ -27,3 +27,7 @@ #include #define FURY_HAS_RISCV_VECTOR #endif +#if defined(__SSE2__) +#include +#define FURY_HAS_SSE2 +#endif diff --git a/cpp/fury/util/string_util_test.cc b/cpp/fury/util/string_util_test.cc index f57f75186f..080fd603ed 100644 --- a/cpp/fury/util/string_util_test.cc +++ b/cpp/fury/util/string_util_test.cc @@ -58,21 +58,34 @@ bool isAscii_BaseLine(const std::string &str) { TEST(StringUtilTest, TestisAsciiFunctions) { std::string testStr = generateRandomString(100000); auto start_time = std::chrono::high_resolution_clock::now(); - bool result = isAscii_BaseLine(testStr); + bool result; + int c = 0; + for (size_t i = 0; i < 10000; i++) { + result = isAscii_BaseLine(testStr); + if (result) { + c++; + } + } + auto end_time = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast( end_time - start_time) .count(); FURY_LOG(INFO) << "BaseLine Running Time: " << duration << " ns."; - + FURY_LOG(DEBUG) << "Avoid compiler optimized loop " << c; start_time = std::chrono::high_resolution_clock::now(); - result = isAscii(testStr); + for (size_t i = 0; i < 10000; i++) { + result = isAscii(testStr); + if (result) { + c++; + } + } end_time = std::chrono::high_resolution_clock::now(); duration = std::chrono::duration_cast(end_time - start_time) .count(); FURY_LOG(INFO) << "Optimized Running Time: " << duration << " ns."; - + FURY_LOG(DEBUG) << "Avoid compiler optimized loop " << c; EXPECT_TRUE(result); } diff --git a/python/pyfury/_util.pyx b/python/pyfury/_util.pyx index ca87d81e0c..0db77a81ad 100644 --- a/python/pyfury/_util.pyx +++ b/python/pyfury/_util.pyx @@ -27,7 +27,8 @@ from libcpp.memory cimport shared_ptr, make_shared from libc.stdint cimport * from libcpp cimport bool as c_bool from pyfury.includes.libutil cimport( - CBuffer, AllocateBuffer, GetBit, SetBit, ClearBit, SetBitTo, CStatus, StatusCode, utf16HasSurrogatePairs + CBuffer, AllocateBuffer, GetBit, SetBit, ClearBit, SetBitTo, CStatus, StatusCode, utf16HasSurrogatePairs, + Fury_PyUnicode_FromUCS1, Fury_PyUnicode_FromUCS2 ) cdef int32_t max_buffer_size = 2 ** 31 - 1 @@ -572,12 +573,15 @@ cdef class Buffer: cdef uint64_t header = self.read_varuint64() cdef uint32_t size = header >> 2 self.check_bound(self.reader_index, size) + if size == 0: + return "" cdef const char * buf = (self.c_buffer.get().data() + self.reader_index) self.reader_index += size cdef uint32_t encoding = header & 0b11 if encoding == 0: # PyUnicode_FromASCII - return PyUnicode_DecodeLatin1(buf, size, "strict") + return Fury_PyUnicode_FromUCS1(buf, size) + # return PyUnicode_DecodeLatin1(buf, size, "strict") elif encoding == 1: if utf16HasSurrogatePairs(buf, size >> 1): return PyUnicode_DecodeUTF16( @@ -587,7 +591,8 @@ cdef class Buffer: &UTF16_LE, # fury use little-endian ) else: - return PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, buf, size >> 1) + # return PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, buf, size >> 1) + return Fury_PyUnicode_FromUCS2(buf, size >> 1) else: return PyUnicode_DecodeUTF8(buf, size, "strict") diff --git a/python/pyfury/includes/libutil.pxd b/python/pyfury/includes/libutil.pxd index 5618417c23..b79287a659 100644 --- a/python/pyfury/includes/libutil.pxd +++ b/python/pyfury/includes/libutil.pxd @@ -115,5 +115,5 @@ cdef extern from "fury/util/string_util.h" namespace "fury" nogil: cdef extern from "fury/python/pyunicode.h" namespace "fury" nogil: - PyObject* Fury_PyUnicode_FromUCS1(const uint8_t* u, Py_ssize_t size) + PyObject* Fury_PyUnicode_FromUCS1(const char* u, Py_ssize_t size) PyObject* Fury_PyUnicode_FromUCS2(const uint16_t* u, Py_ssize_t size) From ec2c4d4474fd6b84b440e1332ece505b2b056c6f Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 15 Jan 2025 22:13:48 +0800 Subject: [PATCH 07/23] remove avx getMaxValue and copyValue --- cpp/fury/util/array_util.h | 72 -------------------------------------- 1 file changed, 72 deletions(-) diff --git a/cpp/fury/util/array_util.h b/cpp/fury/util/array_util.h index 0058c675b6..b4c88866a0 100644 --- a/cpp/fury/util/array_util.h +++ b/cpp/fury/util/array_util.h @@ -1,78 +1,6 @@ #include "fury/util/platform.h" namespace fury { -#if defined(FURY_HAS_IMMINTRIN) -// TODO: runtime dispatch for avx -// We can not distribute a seperate wheel for avx, so we need to -// check instcuctions set at runtime -inline uint16_t getMaxValueAVX(const uint16_t* arr, size_t length) { - if (length == 0) { - return 0; // Return 0 for empty arrays - } - __m256i max_val = _mm256_setzero_si256(); // Initialize max vector with zeros - size_t i = 0; - for (; i + 16 <= length; i += 16) { - __m256i current_val = _mm256_loadu_si256((__m256i*)&arr[i]); - max_val = _mm256_max_epu16(max_val, current_val); // Max operation - } - - // Find the max value in the resulting vector - uint16_t temp[16]; - _mm256_storeu_si256((__m256i*)temp, max_val); - uint16_t max_avx = temp[0]; - for (int j = 1; j < 16; j++) { - if (temp[j] > max_avx) { - max_avx = temp[j]; - } - } - - // Handle remaining elements - for (; i < length; i++) { - if (arr[i] > max_avx) { - max_avx = arr[i]; - } - } - return max_avx; -} - -inline void copyValueAVX(const uint16_t* from, uint8_t* to, size_t length) { - size_t i = 0; - // Process chunks of 32 bytes (16 uint16_t elements at a time) - for (; i + 31 < length; i += 32) { - // Load two 256-bit blocks (32 uint16_t elements total) - __m256i src1 = _mm256_loadu_si256(reinterpret_cast(&from[i])); - __m256i src2 = _mm256_loadu_si256(reinterpret_cast(&from[i + 16])); - - // Narrow the 16-bit integers to 8-bit integers - __m256i packed = _mm256_packus_epi16(src1, src2); - - // Shuffle the packed result to interleave lower and upper parts - packed = _mm256_permute4x64_epi64(packed, _MM_SHUFFLE(3, 1, 2, 0)); - - // Store the result - _mm256_storeu_si256(reinterpret_cast<__m256i*>(&to[i]), packed); - } - // Check if at least 16 elements are left to process - if (i + 15 < length) { - // Process the next 16 elements - __m256i src1 = _mm256_loadu_si256(reinterpret_cast(&from[i])); - // Narrow the 16-bit integers to 8-bit integers by zeroing the upper halves - __m128i packed1 = _mm256_castsi256_si128(src1); // Lower 128 bits - __m128i packed2 = _mm256_extracti128_si256(src1, 1); // Upper 128 bits - // Pack two 128-wide vectors into 8-bit integers, ignore saturating with itself. - __m128i packed = _mm_packus_epi16(packed1, packed2); - - // Store the result; using only the first 128 bits - _mm_storeu_si128(reinterpret_cast<__m128i*>(&to[i]), packed); - - i += 16; - } - // Process remaining elements one at a time - for (; i < length; ++i) { - to[i] = static_cast(from[i]); - } -} -#endif #if defined(FURY_HAS_NEON) inline uint16_t getMaxValue(const uint16_t* arr, size_t length) { if (length == 0) { From a0d74f14220b146b9562238651fce26c60db90a9 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 15 Jan 2025 22:14:18 +0800 Subject: [PATCH 08/23] rename copyValue to copyArray --- cpp/fury/python/pyunicode.cc | 2 +- cpp/fury/util/array_util.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/fury/python/pyunicode.cc b/cpp/fury/python/pyunicode.cc index e875c357e3..57654ffead 100644 --- a/cpp/fury/python/pyunicode.cc +++ b/cpp/fury/python/pyunicode.cc @@ -79,7 +79,7 @@ PyObject* Fury_PyUnicode_FromUCS2(const uint16_t* u, Py_ssize_t size) { if (max_char >= 256) { memcpy(PyUnicode_2BYTE_DATA(res), u, sizeof(Py_UCS2) * size); } else { - copyValue(u, PyUnicode_1BYTE_DATA(res), size); + copyArray(u, PyUnicode_1BYTE_DATA(res), size); } // assert(_PyUnicode_CheckConsistency(res, 1)); return res; diff --git a/cpp/fury/util/array_util.h b/cpp/fury/util/array_util.h index b4c88866a0..b826b08572 100644 --- a/cpp/fury/util/array_util.h +++ b/cpp/fury/util/array_util.h @@ -33,7 +33,7 @@ inline uint16_t getMaxValue(const uint16_t* arr, size_t length) { return max_neon; } -inline void copyValue(const uint16_t* from, uint8_t* to, size_t length) { +inline void copyArray(const uint16_t* from, uint8_t* to, size_t length) { size_t i = 0; for (; i + 7 < length; i += 8) { uint16x8_t src = vld1q_u16(&from[i]); @@ -79,7 +79,7 @@ inline uint16_t getMaxValue(const uint16_t* arr, size_t length) { return max_sse; } -inline void copyValue(const uint16_t* from, uint8_t* to, size_t length) { +inline void copyArray(const uint16_t* from, uint8_t* to, size_t length) { size_t i = 0; __m128i mask = _mm_set1_epi16(0xFF); // Mask to zero out the high byte for (; i + 7 < length; i += 8) { @@ -108,7 +108,7 @@ inline uint16_t getMaxValue(const uint16_t* arr, size_t length) { return max_val; } -inline void copyValue(const uint16_t* from, uint8_t* to, size_t length) { +inline void copyArray(const uint16_t* from, uint8_t* to, size_t length) { // Fallback for systems without SSE2/NEON for (size_t i = 0; i < length; ++i) { to[i] = static_cast(from[i]); From 8e2a4b261f9264ee764a3ce47d7be2eda2919828 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 15 Jan 2025 22:20:58 +0800 Subject: [PATCH 09/23] add header and #pragma once --- cpp/fury/util/array_util.h | 20 ++++++++++++++++++++ cpp/fury/util/array_util_test.cc | 19 +++++++++++++++++++ cpp/fury/util/platform.h | 2 ++ 3 files changed, 41 insertions(+) diff --git a/cpp/fury/util/array_util.h b/cpp/fury/util/array_util.h index b826b08572..e4df6fd148 100644 --- a/cpp/fury/util/array_util.h +++ b/cpp/fury/util/array_util.h @@ -1,3 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once #include "fury/util/platform.h" namespace fury { diff --git a/cpp/fury/util/array_util_test.cc b/cpp/fury/util/array_util_test.cc index 50bfc3c92d..669bb96223 100644 --- a/cpp/fury/util/array_util_test.cc +++ b/cpp/fury/util/array_util_test.cc @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + #include "fury/util/array_util.h" #include "gtest/gtest.h" diff --git a/cpp/fury/util/platform.h b/cpp/fury/util/platform.h index 0c725478d7..9aa562a3db 100644 --- a/cpp/fury/util/platform.h +++ b/cpp/fury/util/platform.h @@ -17,6 +17,8 @@ * under the License. */ +#pragma once + #if defined(__x86_64__) || defined(_M_X64) #include #define FURY_HAS_IMMINTRIN From d1d02e71ef2e6a4a3065fef473ffae247c0659f0 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 15 Jan 2025 22:24:46 +0800 Subject: [PATCH 10/23] add cstdint include --- cpp/fury/util/array_util.h | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/fury/util/array_util.h b/cpp/fury/util/array_util.h index e4df6fd148..ef7328ab70 100644 --- a/cpp/fury/util/array_util.h +++ b/cpp/fury/util/array_util.h @@ -18,6 +18,7 @@ */ #pragma once +#include #include "fury/util/platform.h" namespace fury { From 221a6f10962ecc855d39c989dafc85892f07239a Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 15 Jan 2025 22:25:46 +0800 Subject: [PATCH 11/23] lint code --- cpp/fury/python/pyunicode.cc | 29 ++++++++++++++------------ cpp/fury/python/pyunicode.h | 12 +++++------ cpp/fury/util/array_util.h | 40 ++++++++++++++++++------------------ 3 files changed, 42 insertions(+), 39 deletions(-) diff --git a/cpp/fury/python/pyunicode.cc b/cpp/fury/python/pyunicode.cc index 57654ffead..e8cfee7cae 100644 --- a/cpp/fury/python/pyunicode.cc +++ b/cpp/fury/python/pyunicode.cc @@ -21,13 +21,14 @@ namespace fury { -static PyObject* unicode_latin1[256] = {nullptr}; +static PyObject *unicode_latin1[256] = {nullptr}; -static PyObject* get_latin1_char(unsigned char ch) { - PyObject* unicode = unicode_latin1[ch]; +static PyObject *get_latin1_char(unsigned char ch) { + PyObject *unicode = unicode_latin1[ch]; if (!unicode) { unicode = PyUnicode_New(1, ch); - if (!unicode) return NULL; + if (!unicode) + return NULL; PyUnicode_1BYTE_DATA(unicode)[0] = ch; // assert(_PyUnicode_CheckConsistency(unicode, 1)); unicode_latin1[ch] = unicode; @@ -36,21 +37,23 @@ static PyObject* get_latin1_char(unsigned char ch) { return unicode; } -PyObject* Fury_PyUnicode_FromUCS1(const char* u, Py_ssize_t size) { - PyObject* res; +PyObject *Fury_PyUnicode_FromUCS1(const char *u, Py_ssize_t size) { + PyObject *res; unsigned char max_char; FURY_CHECK(size > 0); - if (size == 1) return get_latin1_char(u[0]); - max_char = isAscii(reinterpret_cast(u), size) ? 127 : 255; + if (size == 1) + return get_latin1_char(u[0]); + max_char = isAscii(reinterpret_cast(u), size) ? 127 : 255; res = PyUnicode_New(size, max_char); - if (!res) return NULL; + if (!res) + return NULL; memcpy(PyUnicode_1BYTE_DATA(res), u, size); // assert(_PyUnicode_CheckConsistency(res, 1)); return res; } -PyObject* Fury_PyUnicode_FromUCS2(const uint16_t* u, Py_ssize_t size) { - PyObject* res; +PyObject *Fury_PyUnicode_FromUCS2(const uint16_t *u, Py_ssize_t size) { + PyObject *res; Py_UCS2 max_char; FURY_CHECK(size > 0); if (size == 1) { @@ -79,9 +82,9 @@ PyObject* Fury_PyUnicode_FromUCS2(const uint16_t* u, Py_ssize_t size) { if (max_char >= 256) { memcpy(PyUnicode_2BYTE_DATA(res), u, sizeof(Py_UCS2) * size); } else { - copyArray(u, PyUnicode_1BYTE_DATA(res), size); + copyArray(u, PyUnicode_1BYTE_DATA(res), size); } // assert(_PyUnicode_CheckConsistency(res, 1)); return res; } -} // namespace fury +} // namespace fury diff --git a/cpp/fury/python/pyunicode.h b/cpp/fury/python/pyunicode.h index 2512fbb45b..76b96751ba 100644 --- a/cpp/fury/python/pyunicode.h +++ b/cpp/fury/python/pyunicode.h @@ -17,21 +17,21 @@ * under the License. */ -#include -#include #include "fury/util/array_util.h" #include "fury/util/buffer.h" #include "fury/util/logging.h" #include "fury/util/string_util.h" -#include "pyport.h" #include "object.h" +#include "pyport.h" #include "unicodeobject.h" +#include +#include namespace fury { // unicodeobject.c -PyObject* Fury_PyUnicode_FromUCS1(const char* u, Py_ssize_t size); +PyObject *Fury_PyUnicode_FromUCS1(const char *u, Py_ssize_t size); -PyObject* Fury_PyUnicode_FromUCS2(const uint16_t* u, Py_ssize_t size); +PyObject *Fury_PyUnicode_FromUCS2(const uint16_t *u, Py_ssize_t size); -} // namespace fury +} // namespace fury diff --git a/cpp/fury/util/array_util.h b/cpp/fury/util/array_util.h index ef7328ab70..9bc7053dc9 100644 --- a/cpp/fury/util/array_util.h +++ b/cpp/fury/util/array_util.h @@ -18,21 +18,21 @@ */ #pragma once -#include #include "fury/util/platform.h" +#include namespace fury { #if defined(FURY_HAS_NEON) -inline uint16_t getMaxValue(const uint16_t* arr, size_t length) { +inline uint16_t getMaxValue(const uint16_t *arr, size_t length) { if (length == 0) { - return 0; // Return 0 for empty arrays + return 0; // Return 0 for empty arrays } - uint16x8_t max_val = vdupq_n_u16(0); // Initialize max vector to zero + uint16x8_t max_val = vdupq_n_u16(0); // Initialize max vector to zero size_t i = 0; for (; i + 8 <= length; i += 8) { uint16x8_t current_val = vld1q_u16(&arr[i]); - max_val = vmaxq_u16(max_val, current_val); // Max operation + max_val = vmaxq_u16(max_val, current_val); // Max operation } // Find the max value in the resulting vector @@ -54,7 +54,7 @@ inline uint16_t getMaxValue(const uint16_t* arr, size_t length) { return max_neon; } -inline void copyArray(const uint16_t* from, uint8_t* to, size_t length) { +inline void copyArray(const uint16_t *from, uint8_t *to, size_t length) { size_t i = 0; for (; i + 7 < length; i += 8) { uint16x8_t src = vld1q_u16(&from[i]); @@ -68,22 +68,22 @@ inline void copyArray(const uint16_t* from, uint8_t* to, size_t length) { } } #elif defined(FURY_HAS_SSE2) -inline uint16_t getMaxValue(const uint16_t* arr, size_t length) { +inline uint16_t getMaxValue(const uint16_t *arr, size_t length) { if (length == 0) { - return 0; // Return 0 for empty arrays + return 0; // Return 0 for empty arrays } - __m128i max_val = _mm_setzero_si128(); // Initialize max vector with zeros + __m128i max_val = _mm_setzero_si128(); // Initialize max vector with zeros size_t i = 0; for (; i + 8 <= length; i += 8) { - __m128i current_val = _mm_loadu_si128((__m128i*)&arr[i]); - max_val = _mm_max_epu16(max_val, current_val); // Max operation + __m128i current_val = _mm_loadu_si128((__m128i *)&arr[i]); + max_val = _mm_max_epu16(max_val, current_val); // Max operation } // Find the max value in the resulting vector uint16_t temp[8]; - _mm_storeu_si128((__m128i*)temp, max_val); + _mm_storeu_si128((__m128i *)temp, max_val); uint16_t max_sse = temp[0]; for (int j = 1; j < 8; j++) { if (temp[j] > max_sse) { @@ -100,13 +100,13 @@ inline uint16_t getMaxValue(const uint16_t* arr, size_t length) { return max_sse; } -inline void copyArray(const uint16_t* from, uint8_t* to, size_t length) { +inline void copyArray(const uint16_t *from, uint8_t *to, size_t length) { size_t i = 0; - __m128i mask = _mm_set1_epi16(0xFF); // Mask to zero out the high byte + __m128i mask = _mm_set1_epi16(0xFF); // Mask to zero out the high byte for (; i + 7 < length; i += 8) { - __m128i src = _mm_loadu_si128(reinterpret_cast(&from[i])); + __m128i src = _mm_loadu_si128(reinterpret_cast(&from[i])); __m128i result = _mm_and_si128(src, mask); - _mm_storel_epi64(reinterpret_cast<__m128i*>(&to[i]), + _mm_storel_epi64(reinterpret_cast<__m128i *>(&to[i]), _mm_packus_epi16(result, result)); } @@ -116,9 +116,9 @@ inline void copyArray(const uint16_t* from, uint8_t* to, size_t length) { } } #else -inline uint16_t getMaxValue(const uint16_t* arr, size_t length) { +inline uint16_t getMaxValue(const uint16_t *arr, size_t length) { if (length == 0) { - return 0; // Return 0 for empty arrays + return 0; // Return 0 for empty arrays } uint16_t max_val = arr[0]; for (size_t i = 1; i < length; i++) { @@ -129,11 +129,11 @@ inline uint16_t getMaxValue(const uint16_t* arr, size_t length) { return max_val; } -inline void copyArray(const uint16_t* from, uint8_t* to, size_t length) { +inline void copyArray(const uint16_t *from, uint8_t *to, size_t length) { // Fallback for systems without SSE2/NEON for (size_t i = 0; i < length; ++i) { to[i] = static_cast(from[i]); } } #endif -} // namespace fury +} // namespace fury From 4793946c468753ad0e3639c7305acc8655a95a6e Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 15 Jan 2025 22:28:12 +0800 Subject: [PATCH 12/23] add #include --- cpp/fury/python/pyunicode.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/fury/python/pyunicode.cc b/cpp/fury/python/pyunicode.cc index e8cfee7cae..a3454f02a9 100644 --- a/cpp/fury/python/pyunicode.cc +++ b/cpp/fury/python/pyunicode.cc @@ -18,6 +18,7 @@ */ #include "pyunicode.h" +#include namespace fury { From 6f0a64b0eb5ff5286081f81bbe10dcba62d9cb7e Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 15 Jan 2025 22:32:29 +0800 Subject: [PATCH 13/23] remove array util inline --- cpp/fury/util/array_util.cc | 139 ++++++++++++++++++++++++++++++++++++ cpp/fury/util/array_util.h | 115 +---------------------------- 2 files changed, 141 insertions(+), 113 deletions(-) create mode 100644 cpp/fury/util/array_util.cc diff --git a/cpp/fury/util/array_util.cc b/cpp/fury/util/array_util.cc new file mode 100644 index 0000000000..65a4cd862f --- /dev/null +++ b/cpp/fury/util/array_util.cc @@ -0,0 +1,139 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once +#include "fury/util/platform.h" +#include + +namespace fury { +#if defined(FURY_HAS_NEON) +uint16_t getMaxValue(const uint16_t *arr, size_t length) { + if (length == 0) { + return 0; // Return 0 for empty arrays + } + uint16x8_t max_val = vdupq_n_u16(0); // Initialize max vector to zero + + size_t i = 0; + for (; i + 8 <= length; i += 8) { + uint16x8_t current_val = vld1q_u16(&arr[i]); + max_val = vmaxq_u16(max_val, current_val); // Max operation + } + + // Find the max value in the resulting vector + uint16_t temp[8]; + vst1q_u16(temp, max_val); + uint16_t max_neon = temp[0]; + for (int j = 1; j < 8; j++) { + if (temp[j] > max_neon) { + max_neon = temp[j]; + } + } + + // Handle remaining elements + for (; i < length; i++) { + if (arr[i] > max_neon) { + max_neon = arr[i]; + } + } + return max_neon; +} + +void copyArray(const uint16_t *from, uint8_t *to, size_t length) { + size_t i = 0; + for (; i + 7 < length; i += 8) { + uint16x8_t src = vld1q_u16(&from[i]); + uint8x8_t result = vmovn_u16(src); + vst1_u8(&to[i], result); + } + + // Fallback for the remainder + for (; i < length; ++i) { + to[i] = static_cast(from[i]); + } +} +#elif defined(FURY_HAS_SSE2) +uint16_t getMaxValue(const uint16_t *arr, size_t length) { + if (length == 0) { + return 0; // Return 0 for empty arrays + } + + __m128i max_val = _mm_setzero_si128(); // Initialize max vector with zeros + + size_t i = 0; + for (; i + 8 <= length; i += 8) { + __m128i current_val = _mm_loadu_si128((__m128i *)&arr[i]); + max_val = _mm_max_epu16(max_val, current_val); // Max operation + } + + // Find the max value in the resulting vector + uint16_t temp[8]; + _mm_storeu_si128((__m128i *)temp, max_val); + uint16_t max_sse = temp[0]; + for (int j = 1; j < 8; j++) { + if (temp[j] > max_sse) { + max_sse = temp[j]; + } + } + + // Handle remaining elements + for (; i < length; i++) { + if (arr[i] > max_sse) { + max_sse = arr[i]; + } + } + return max_sse; +} + +void copyArray(const uint16_t *from, uint8_t *to, size_t length) { + size_t i = 0; + __m128i mask = _mm_set1_epi16(0xFF); // Mask to zero out the high byte + for (; i + 7 < length; i += 8) { + __m128i src = _mm_loadu_si128(reinterpret_cast(&from[i])); + __m128i result = _mm_and_si128(src, mask); + _mm_storel_epi64(reinterpret_cast<__m128i *>(&to[i]), + _mm_packus_epi16(result, result)); + } + + // Fallback for the remainder + for (; i < length; ++i) { + to[i] = static_cast(from[i]); + } +} +#else +uint16_t getMaxValue(const uint16_t *arr, size_t length) { + if (length == 0) { + return 0; // Return 0 for empty arrays + } + uint16_t max_val = arr[0]; + for (size_t i = 1; i < length; i++) { + if (arr[i] > max_val) { + max_val = arr[i]; + } + } + return max_val; +} + +void copyArray(const uint16_t *from, uint8_t *to, size_t length) { + // Fallback for systems without SSE2/NEON + for (size_t i = 0; i < length; ++i) { + to[i] = static_cast(from[i]); + } +} +#endif +} // namespace fury diff --git a/cpp/fury/util/array_util.h b/cpp/fury/util/array_util.h index 9bc7053dc9..e7d89f275d 100644 --- a/cpp/fury/util/array_util.h +++ b/cpp/fury/util/array_util.h @@ -22,118 +22,7 @@ #include namespace fury { -#if defined(FURY_HAS_NEON) -inline uint16_t getMaxValue(const uint16_t *arr, size_t length) { - if (length == 0) { - return 0; // Return 0 for empty arrays - } - uint16x8_t max_val = vdupq_n_u16(0); // Initialize max vector to zero +uint16_t getMaxValue(const uint16_t *arr, size_t length); - size_t i = 0; - for (; i + 8 <= length; i += 8) { - uint16x8_t current_val = vld1q_u16(&arr[i]); - max_val = vmaxq_u16(max_val, current_val); // Max operation - } - - // Find the max value in the resulting vector - uint16_t temp[8]; - vst1q_u16(temp, max_val); - uint16_t max_neon = temp[0]; - for (int j = 1; j < 8; j++) { - if (temp[j] > max_neon) { - max_neon = temp[j]; - } - } - - // Handle remaining elements - for (; i < length; i++) { - if (arr[i] > max_neon) { - max_neon = arr[i]; - } - } - return max_neon; -} - -inline void copyArray(const uint16_t *from, uint8_t *to, size_t length) { - size_t i = 0; - for (; i + 7 < length; i += 8) { - uint16x8_t src = vld1q_u16(&from[i]); - uint8x8_t result = vmovn_u16(src); - vst1_u8(&to[i], result); - } - - // Fallback for the remainder - for (; i < length; ++i) { - to[i] = static_cast(from[i]); - } -} -#elif defined(FURY_HAS_SSE2) -inline uint16_t getMaxValue(const uint16_t *arr, size_t length) { - if (length == 0) { - return 0; // Return 0 for empty arrays - } - - __m128i max_val = _mm_setzero_si128(); // Initialize max vector with zeros - - size_t i = 0; - for (; i + 8 <= length; i += 8) { - __m128i current_val = _mm_loadu_si128((__m128i *)&arr[i]); - max_val = _mm_max_epu16(max_val, current_val); // Max operation - } - - // Find the max value in the resulting vector - uint16_t temp[8]; - _mm_storeu_si128((__m128i *)temp, max_val); - uint16_t max_sse = temp[0]; - for (int j = 1; j < 8; j++) { - if (temp[j] > max_sse) { - max_sse = temp[j]; - } - } - - // Handle remaining elements - for (; i < length; i++) { - if (arr[i] > max_sse) { - max_sse = arr[i]; - } - } - return max_sse; -} - -inline void copyArray(const uint16_t *from, uint8_t *to, size_t length) { - size_t i = 0; - __m128i mask = _mm_set1_epi16(0xFF); // Mask to zero out the high byte - for (; i + 7 < length; i += 8) { - __m128i src = _mm_loadu_si128(reinterpret_cast(&from[i])); - __m128i result = _mm_and_si128(src, mask); - _mm_storel_epi64(reinterpret_cast<__m128i *>(&to[i]), - _mm_packus_epi16(result, result)); - } - - // Fallback for the remainder - for (; i < length; ++i) { - to[i] = static_cast(from[i]); - } -} -#else -inline uint16_t getMaxValue(const uint16_t *arr, size_t length) { - if (length == 0) { - return 0; // Return 0 for empty arrays - } - uint16_t max_val = arr[0]; - for (size_t i = 1; i < length; i++) { - if (arr[i] > max_val) { - max_val = arr[i]; - } - } - return max_val; -} - -inline void copyArray(const uint16_t *from, uint8_t *to, size_t length) { - // Fallback for systems without SSE2/NEON - for (size_t i = 0; i < length; ++i) { - to[i] = static_cast(from[i]); - } -} -#endif +void copyArray(const uint16_t *from, uint8_t *to, size_t length); } // namespace fury From 2ebfbc88c4d8d9b76c2a88528670d9232dcf934d Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 15 Jan 2025 22:36:30 +0800 Subject: [PATCH 14/23] include --- cpp/fury/util/array_util.h | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/fury/util/array_util.h b/cpp/fury/util/array_util.h index e7d89f275d..45eb0d33c7 100644 --- a/cpp/fury/util/array_util.h +++ b/cpp/fury/util/array_util.h @@ -20,6 +20,7 @@ #pragma once #include "fury/util/platform.h" #include +#include namespace fury { uint16_t getMaxValue(const uint16_t *arr, size_t length); From ad2f28acc0d633e1c44f7a6277f0db9dad2601cc Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 15 Jan 2025 22:39:28 +0800 Subject: [PATCH 15/23] fix include --- cpp/fury/util/array_util.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cpp/fury/util/array_util.cc b/cpp/fury/util/array_util.cc index 65a4cd862f..1182e5c19a 100644 --- a/cpp/fury/util/array_util.cc +++ b/cpp/fury/util/array_util.cc @@ -17,9 +17,7 @@ * under the License. */ -#pragma once -#include "fury/util/platform.h" -#include +#include "fury/util/array_util.h" namespace fury { #if defined(FURY_HAS_NEON) From ea206d99876335ffa629601afd51401d1d995cfd Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 15 Jan 2025 22:44:49 +0800 Subject: [PATCH 16/23] add #pragma once --- cpp/fury/python/pyunicode.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/fury/python/pyunicode.h b/cpp/fury/python/pyunicode.h index 76b96751ba..84880bad24 100644 --- a/cpp/fury/python/pyunicode.h +++ b/cpp/fury/python/pyunicode.h @@ -17,6 +17,8 @@ * under the License. */ +#pragma once + #include "fury/util/array_util.h" #include "fury/util/buffer.h" #include "fury/util/logging.h" From d2627fb5f1f2b01a2e0d09549f5fd61e8491b995 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 15 Jan 2025 22:46:22 +0800 Subject: [PATCH 17/23] fix include --- cpp/fury/python/pyunicode.cc | 5 +++++ cpp/fury/python/pyunicode.h | 6 ------ 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/cpp/fury/python/pyunicode.cc b/cpp/fury/python/pyunicode.cc index a3454f02a9..2d9c2bceda 100644 --- a/cpp/fury/python/pyunicode.cc +++ b/cpp/fury/python/pyunicode.cc @@ -18,6 +18,11 @@ */ #include "pyunicode.h" +#include "fury/util/array_util.h" +#include "fury/util/buffer.h" +#include "fury/util/logging.h" +#include "fury/util/string_util.h" +#include "unicodeobject.h" #include namespace fury { diff --git a/cpp/fury/python/pyunicode.h b/cpp/fury/python/pyunicode.h index 84880bad24..d96239a942 100644 --- a/cpp/fury/python/pyunicode.h +++ b/cpp/fury/python/pyunicode.h @@ -19,19 +19,13 @@ #pragma once -#include "fury/util/array_util.h" -#include "fury/util/buffer.h" -#include "fury/util/logging.h" -#include "fury/util/string_util.h" #include "object.h" #include "pyport.h" -#include "unicodeobject.h" #include #include namespace fury { -// unicodeobject.c PyObject *Fury_PyUnicode_FromUCS1(const char *u, Py_ssize_t size); PyObject *Fury_PyUnicode_FromUCS2(const uint16_t *u, Py_ssize_t size); From 28aaf2cecd4ff786a2912cb6732d1fe16c545278 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 15 Jan 2025 22:48:20 +0800 Subject: [PATCH 18/23] fix include --- cpp/fury/python/pyunicode.cc | 2 ++ cpp/fury/python/pyunicode.h | 3 +-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/fury/python/pyunicode.cc b/cpp/fury/python/pyunicode.cc index 2d9c2bceda..df5ef73198 100644 --- a/cpp/fury/python/pyunicode.cc +++ b/cpp/fury/python/pyunicode.cc @@ -24,6 +24,8 @@ #include "fury/util/string_util.h" #include "unicodeobject.h" #include +#include +#include namespace fury { diff --git a/cpp/fury/python/pyunicode.h b/cpp/fury/python/pyunicode.h index d96239a942..25504b7f5a 100644 --- a/cpp/fury/python/pyunicode.h +++ b/cpp/fury/python/pyunicode.h @@ -21,8 +21,7 @@ #include "object.h" #include "pyport.h" -#include -#include +#include namespace fury { From e326271dc0a0add5e1197c4047ffa38dff416dff Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 15 Jan 2025 22:59:43 +0800 Subject: [PATCH 19/23] fix include --- cpp/fury/python/pyunicode.cc | 4 ---- cpp/fury/python/pyunicode.h | 8 +++++--- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/cpp/fury/python/pyunicode.cc b/cpp/fury/python/pyunicode.cc index df5ef73198..5de937d1bb 100644 --- a/cpp/fury/python/pyunicode.cc +++ b/cpp/fury/python/pyunicode.cc @@ -19,13 +19,9 @@ #include "pyunicode.h" #include "fury/util/array_util.h" -#include "fury/util/buffer.h" #include "fury/util/logging.h" #include "fury/util/string_util.h" -#include "unicodeobject.h" #include -#include -#include namespace fury { diff --git a/cpp/fury/python/pyunicode.h b/cpp/fury/python/pyunicode.h index 25504b7f5a..d474915d81 100644 --- a/cpp/fury/python/pyunicode.h +++ b/cpp/fury/python/pyunicode.h @@ -18,10 +18,12 @@ */ #pragma once - -#include "object.h" -#include "pyport.h" +#include +#include #include +#include "pyport.h" +#include "object.h" +#include "unicodeobject.h" namespace fury { From 1ef388cfc68048a8b338e3f00e23620f447ee885 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 15 Jan 2025 23:14:04 +0800 Subject: [PATCH 20/23] add Python.h include --- cpp/fury/python/pyunicode.h | 1 + cpp/fury/util/array_util_test.cc | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/fury/python/pyunicode.h b/cpp/fury/python/pyunicode.h index d474915d81..2237ee0b36 100644 --- a/cpp/fury/python/pyunicode.h +++ b/cpp/fury/python/pyunicode.h @@ -21,6 +21,7 @@ #include #include #include +#include "Python.h" #include "pyport.h" #include "object.h" #include "unicodeobject.h" diff --git a/cpp/fury/util/array_util_test.cc b/cpp/fury/util/array_util_test.cc index 669bb96223..cc27770362 100644 --- a/cpp/fury/util/array_util_test.cc +++ b/cpp/fury/util/array_util_test.cc @@ -22,7 +22,7 @@ namespace fury { TEST(GetMaxValueTest, HandlesEmptyArray) { - uint16_t arr[] = {}; + uint16_t* arr = nullptr; EXPECT_EQ(getMaxValue(arr, 0), 0); } From d4837ffbd03d9b0f55be51be4cf51dc2e8d0ed13 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 15 Jan 2025 23:15:00 +0800 Subject: [PATCH 21/23] lint code --- cpp/fury/python/pyunicode.h | 8 ++++---- cpp/fury/util/array_util_test.cc | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/fury/python/pyunicode.h b/cpp/fury/python/pyunicode.h index 2237ee0b36..3308dce00b 100644 --- a/cpp/fury/python/pyunicode.h +++ b/cpp/fury/python/pyunicode.h @@ -18,13 +18,13 @@ */ #pragma once -#include -#include -#include #include "Python.h" -#include "pyport.h" #include "object.h" +#include "pyport.h" #include "unicodeobject.h" +#include +#include +#include namespace fury { diff --git a/cpp/fury/util/array_util_test.cc b/cpp/fury/util/array_util_test.cc index cc27770362..eb9eebd7f5 100644 --- a/cpp/fury/util/array_util_test.cc +++ b/cpp/fury/util/array_util_test.cc @@ -22,7 +22,7 @@ namespace fury { TEST(GetMaxValueTest, HandlesEmptyArray) { - uint16_t* arr = nullptr; + uint16_t *arr = nullptr; EXPECT_EQ(getMaxValue(arr, 0), 0); } From 8fe4de7895873bb2712173669579db4d0635f61e Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 15 Jan 2025 23:19:01 +0800 Subject: [PATCH 22/23] optimize include --- cpp/fury/python/pyunicode.cc | 1 + cpp/fury/python/pyunicode.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/fury/python/pyunicode.cc b/cpp/fury/python/pyunicode.cc index 5de937d1bb..bbb5b35dc1 100644 --- a/cpp/fury/python/pyunicode.cc +++ b/cpp/fury/python/pyunicode.cc @@ -21,6 +21,7 @@ #include "fury/util/array_util.h" #include "fury/util/logging.h" #include "fury/util/string_util.h" +#include "unicodeobject.h" #include namespace fury { diff --git a/cpp/fury/python/pyunicode.h b/cpp/fury/python/pyunicode.h index 3308dce00b..0f4ddeb793 100644 --- a/cpp/fury/python/pyunicode.h +++ b/cpp/fury/python/pyunicode.h @@ -21,7 +21,6 @@ #include "Python.h" #include "object.h" #include "pyport.h" -#include "unicodeobject.h" #include #include #include From a940ba3574c866a493d719f9926b08c989a91596 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 15 Jan 2025 23:22:48 +0800 Subject: [PATCH 23/23] remove comments --- cpp/fury/python/pyunicode.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/cpp/fury/python/pyunicode.cc b/cpp/fury/python/pyunicode.cc index bbb5b35dc1..3c50e00b98 100644 --- a/cpp/fury/python/pyunicode.cc +++ b/cpp/fury/python/pyunicode.cc @@ -35,7 +35,6 @@ static PyObject *get_latin1_char(unsigned char ch) { if (!unicode) return NULL; PyUnicode_1BYTE_DATA(unicode)[0] = ch; - // assert(_PyUnicode_CheckConsistency(unicode, 1)); unicode_latin1[ch] = unicode; } Py_INCREF(unicode); @@ -53,7 +52,6 @@ PyObject *Fury_PyUnicode_FromUCS1(const char *u, Py_ssize_t size) { if (!res) return NULL; memcpy(PyUnicode_1BYTE_DATA(res), u, size); - // assert(_PyUnicode_CheckConsistency(res, 1)); return res; } @@ -89,7 +87,6 @@ PyObject *Fury_PyUnicode_FromUCS2(const uint16_t *u, Py_ssize_t size) { } else { copyArray(u, PyUnicode_1BYTE_DATA(res), size); } - // assert(_PyUnicode_CheckConsistency(res, 1)); return res; } } // namespace fury