diff --git a/BUILD b/BUILD index d5f1063377..c3c58b4eef 100644 --- a/BUILD +++ b/BUILD @@ -31,6 +31,7 @@ pyx_library( ), deps = [ "//cpp/fury/util:fury_util", + "//cpp/fury/python:pyunicode", ], ) @@ -63,6 +64,7 @@ pyx_library( deps = [ "//cpp/fury/util:fury_util", "//cpp/fury/type:fury_type", + "//cpp/fury/python:pyunicode", "@com_google_absl//absl/container:flat_hash_map", ], ) @@ -83,6 +85,7 @@ pyx_library( ), deps = [ "//cpp/fury:fury", + "//cpp/fury/python:pyunicode", "@local_config_pyarrow//:python_numpy_headers", "@local_config_pyarrow//:arrow_python_shared_library" ], diff --git a/cpp/fury/python/BUILD b/cpp/fury/python/BUILD new file mode 100644 index 0000000000..6b218d0a60 --- /dev/null +++ b/cpp/fury/python/BUILD @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") +load("@com_github_grpc_grpc//bazel:cython_library.bzl", "pyx_library") + +cc_library( + name = "pyunicode", + srcs = ["pyunicode.cc"], + hdrs = ["pyunicode.h"], + alwayslink=True, + linkstatic=True, + strip_include_prefix = "/cpp", + deps = [ + "//cpp/fury/util:fury_util", + "@local_config_python//:python_headers", + ], + visibility = ["//visibility:public"], +) diff --git a/cpp/fury/python/pyunicode.cc b/cpp/fury/python/pyunicode.cc new file mode 100644 index 0000000000..3c50e00b98 --- /dev/null +++ b/cpp/fury/python/pyunicode.cc @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "pyunicode.h" +#include "fury/util/array_util.h" +#include "fury/util/logging.h" +#include "fury/util/string_util.h" +#include "unicodeobject.h" +#include + +namespace fury { + +static PyObject *unicode_latin1[256] = {nullptr}; + +static PyObject *get_latin1_char(unsigned char ch) { + PyObject *unicode = unicode_latin1[ch]; + if (!unicode) { + unicode = PyUnicode_New(1, ch); + if (!unicode) + return NULL; + PyUnicode_1BYTE_DATA(unicode)[0] = ch; + unicode_latin1[ch] = unicode; + } + Py_INCREF(unicode); + return unicode; +} + +PyObject *Fury_PyUnicode_FromUCS1(const char *u, Py_ssize_t size) { + PyObject *res; + unsigned char max_char; + FURY_CHECK(size > 0); + if (size == 1) + return get_latin1_char(u[0]); + max_char = isAscii(reinterpret_cast(u), size) ? 127 : 255; + res = PyUnicode_New(size, max_char); + if (!res) + return NULL; + memcpy(PyUnicode_1BYTE_DATA(res), u, size); + return res; +} + +PyObject *Fury_PyUnicode_FromUCS2(const uint16_t *u, Py_ssize_t size) { + PyObject *res; + Py_UCS2 max_char; + FURY_CHECK(size > 0); + if (size == 1) { + max_char = u[0]; + if (max_char < 256) { + return get_latin1_char(max_char); + } else { + res = PyUnicode_New(1, max_char); + if (res == NULL) { + return NULL; + } + if (PyUnicode_KIND(res) == PyUnicode_2BYTE_KIND) { + PyUnicode_2BYTE_DATA(res)[0] = (Py_UCS2)max_char; + } else { + FURY_CHECK(PyUnicode_KIND(res) == PyUnicode_4BYTE_KIND); + PyUnicode_4BYTE_DATA(res)[0] = max_char; + } + return res; + } + } + max_char = getMaxValue(u, size); + res = PyUnicode_New(size, max_char); + if (!res) { + return NULL; + } + if (max_char >= 256) { + memcpy(PyUnicode_2BYTE_DATA(res), u, sizeof(Py_UCS2) * size); + } else { + copyArray(u, PyUnicode_1BYTE_DATA(res), size); + } + return res; +} +} // namespace fury diff --git a/cpp/fury/python/pyunicode.h b/cpp/fury/python/pyunicode.h new file mode 100644 index 0000000000..0f4ddeb793 --- /dev/null +++ b/cpp/fury/python/pyunicode.h @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once +#include "Python.h" +#include "object.h" +#include "pyport.h" +#include +#include +#include + +namespace fury { + +PyObject *Fury_PyUnicode_FromUCS1(const char *u, Py_ssize_t size); + +PyObject *Fury_PyUnicode_FromUCS2(const uint16_t *u, Py_ssize_t size); + +} // namespace fury diff --git a/cpp/fury/util/BUILD b/cpp/fury/util/BUILD index 8f605dc75e..124825b9b3 100644 --- a/cpp/fury/util/BUILD +++ b/cpp/fury/util/BUILD @@ -62,4 +62,16 @@ cc_test( ":fury_util", "@com_google_googletest//:gtest", ], -) \ No newline at end of file +) + + +cc_test( + name = "array_util_test", + srcs = ["array_util_test.cc"], + deps = [ + ":fury_util", + "@com_google_googletest//:gtest", + ], + copts = ["-mavx2"], # Enable AVX2 support + linkopts = ["-mavx2"], # Ensure linker also knows about AVX2 +) diff --git a/cpp/fury/util/array_util.cc b/cpp/fury/util/array_util.cc new file mode 100644 index 0000000000..1182e5c19a --- /dev/null +++ b/cpp/fury/util/array_util.cc @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "fury/util/array_util.h" + +namespace fury { +#if defined(FURY_HAS_NEON) +uint16_t getMaxValue(const uint16_t *arr, size_t length) { + if (length == 0) { + return 0; // Return 0 for empty arrays + } + uint16x8_t max_val = vdupq_n_u16(0); // Initialize max vector to zero + + size_t i = 0; + for (; i + 8 <= length; i += 8) { + uint16x8_t current_val = vld1q_u16(&arr[i]); + max_val = vmaxq_u16(max_val, current_val); // Max operation + } + + // Find the max value in the resulting vector + uint16_t temp[8]; + vst1q_u16(temp, max_val); + uint16_t max_neon = temp[0]; + for (int j = 1; j < 8; j++) { + if (temp[j] > max_neon) { + max_neon = temp[j]; + } + } + + // Handle remaining elements + for (; i < length; i++) { + if (arr[i] > max_neon) { + max_neon = arr[i]; + } + } + return max_neon; +} + +void copyArray(const uint16_t *from, uint8_t *to, size_t length) { + size_t i = 0; + for (; i + 7 < length; i += 8) { + uint16x8_t src = vld1q_u16(&from[i]); + uint8x8_t result = vmovn_u16(src); + vst1_u8(&to[i], result); + } + + // Fallback for the remainder + for (; i < length; ++i) { + to[i] = static_cast(from[i]); + } +} +#elif defined(FURY_HAS_SSE2) +uint16_t getMaxValue(const uint16_t *arr, size_t length) { + if (length == 0) { + return 0; // Return 0 for empty arrays + } + + __m128i max_val = _mm_setzero_si128(); // Initialize max vector with zeros + + size_t i = 0; + for (; i + 8 <= length; i += 8) { + __m128i current_val = _mm_loadu_si128((__m128i *)&arr[i]); + max_val = _mm_max_epu16(max_val, current_val); // Max operation + } + + // Find the max value in the resulting vector + uint16_t temp[8]; + _mm_storeu_si128((__m128i *)temp, max_val); + uint16_t max_sse = temp[0]; + for (int j = 1; j < 8; j++) { + if (temp[j] > max_sse) { + max_sse = temp[j]; + } + } + + // Handle remaining elements + for (; i < length; i++) { + if (arr[i] > max_sse) { + max_sse = arr[i]; + } + } + return max_sse; +} + +void copyArray(const uint16_t *from, uint8_t *to, size_t length) { + size_t i = 0; + __m128i mask = _mm_set1_epi16(0xFF); // Mask to zero out the high byte + for (; i + 7 < length; i += 8) { + __m128i src = _mm_loadu_si128(reinterpret_cast(&from[i])); + __m128i result = _mm_and_si128(src, mask); + _mm_storel_epi64(reinterpret_cast<__m128i *>(&to[i]), + _mm_packus_epi16(result, result)); + } + + // Fallback for the remainder + for (; i < length; ++i) { + to[i] = static_cast(from[i]); + } +} +#else +uint16_t getMaxValue(const uint16_t *arr, size_t length) { + if (length == 0) { + return 0; // Return 0 for empty arrays + } + uint16_t max_val = arr[0]; + for (size_t i = 1; i < length; i++) { + if (arr[i] > max_val) { + max_val = arr[i]; + } + } + return max_val; +} + +void copyArray(const uint16_t *from, uint8_t *to, size_t length) { + // Fallback for systems without SSE2/NEON + for (size_t i = 0; i < length; ++i) { + to[i] = static_cast(from[i]); + } +} +#endif +} // namespace fury diff --git a/cpp/fury/util/array_util.h b/cpp/fury/util/array_util.h new file mode 100644 index 0000000000..45eb0d33c7 --- /dev/null +++ b/cpp/fury/util/array_util.h @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once +#include "fury/util/platform.h" +#include +#include + +namespace fury { +uint16_t getMaxValue(const uint16_t *arr, size_t length); + +void copyArray(const uint16_t *from, uint8_t *to, size_t length); +} // namespace fury diff --git a/cpp/fury/util/array_util_test.cc b/cpp/fury/util/array_util_test.cc new file mode 100644 index 0000000000..eb9eebd7f5 --- /dev/null +++ b/cpp/fury/util/array_util_test.cc @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "fury/util/array_util.h" +#include "gtest/gtest.h" + +namespace fury { +TEST(GetMaxValueTest, HandlesEmptyArray) { + uint16_t *arr = nullptr; + EXPECT_EQ(getMaxValue(arr, 0), 0); +} + +TEST(GetMaxValueTest, HandlesSingleElementArray) { + uint16_t arr[] = {42}; + EXPECT_EQ(getMaxValue(arr, 1), 42); +} + +TEST(GetMaxValueTest, HandlesSmallArray) { + uint16_t arr[] = {10, 20, 30, 40, 5}; + EXPECT_EQ(getMaxValue(arr, 5), 40); +} + +TEST(GetMaxValueTest, HandlesLargeArray) { + const size_t length = 1024; + uint16_t arr[length]; + for (size_t i = 0; i < length; ++i) { + arr[i] = static_cast(i); + } + EXPECT_EQ(getMaxValue(arr, length), 1023); +} +} // namespace fury + +int main(int argc, char **argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/cpp/fury/util/platform.h b/cpp/fury/util/platform.h index 70a699bc20..9aa562a3db 100644 --- a/cpp/fury/util/platform.h +++ b/cpp/fury/util/platform.h @@ -17,16 +17,19 @@ * under the License. */ +#pragma once + #if defined(__x86_64__) || defined(_M_X64) #include #define FURY_HAS_IMMINTRIN #elif defined(__ARM_NEON) || defined(__ARM_NEON__) #include #define FURY_HAS_NEON -#elif defined(__SSE2__) -#include -#define FURY_HAS_SSE2 #elif defined(__riscv) && __riscv_vector #include #define FURY_HAS_RISCV_VECTOR #endif +#if defined(__SSE2__) +#include +#define FURY_HAS_SSE2 +#endif diff --git a/cpp/fury/util/string_util_test.cc b/cpp/fury/util/string_util_test.cc index f57f75186f..080fd603ed 100644 --- a/cpp/fury/util/string_util_test.cc +++ b/cpp/fury/util/string_util_test.cc @@ -58,21 +58,34 @@ bool isAscii_BaseLine(const std::string &str) { TEST(StringUtilTest, TestisAsciiFunctions) { std::string testStr = generateRandomString(100000); auto start_time = std::chrono::high_resolution_clock::now(); - bool result = isAscii_BaseLine(testStr); + bool result; + int c = 0; + for (size_t i = 0; i < 10000; i++) { + result = isAscii_BaseLine(testStr); + if (result) { + c++; + } + } + auto end_time = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast( end_time - start_time) .count(); FURY_LOG(INFO) << "BaseLine Running Time: " << duration << " ns."; - + FURY_LOG(DEBUG) << "Avoid compiler optimized loop " << c; start_time = std::chrono::high_resolution_clock::now(); - result = isAscii(testStr); + for (size_t i = 0; i < 10000; i++) { + result = isAscii(testStr); + if (result) { + c++; + } + } end_time = std::chrono::high_resolution_clock::now(); duration = std::chrono::duration_cast(end_time - start_time) .count(); FURY_LOG(INFO) << "Optimized Running Time: " << duration << " ns."; - + FURY_LOG(DEBUG) << "Avoid compiler optimized loop " << c; EXPECT_TRUE(result); } diff --git a/python/pyfury/_util.pyx b/python/pyfury/_util.pyx index 3d0ac05fd9..d439ba8059 100644 --- a/python/pyfury/_util.pyx +++ b/python/pyfury/_util.pyx @@ -27,7 +27,8 @@ from libcpp.memory cimport shared_ptr, make_shared from libc.stdint cimport * from libcpp cimport bool as c_bool from pyfury.includes.libutil cimport( - CBuffer, AllocateBuffer, GetBit, SetBit, ClearBit, SetBitTo, CStatus, StatusCode, utf16HasSurrogatePairs + CBuffer, AllocateBuffer, GetBit, SetBit, ClearBit, SetBitTo, CStatus, StatusCode, utf16HasSurrogatePairs, + Fury_PyUnicode_FromUCS1, Fury_PyUnicode_FromUCS2 ) cdef int32_t max_buffer_size = 2 ** 31 - 1 @@ -573,12 +574,15 @@ cdef class Buffer: cdef uint64_t header = self.read_varuint64() cdef uint32_t size = header >> 2 self.check_bound(self.reader_index, size) + if size == 0: + return "" cdef const char * buf = (self.c_buffer.get().data() + self.reader_index) self.reader_index += size cdef uint32_t encoding = header & 0b11 if encoding == 0: # PyUnicode_FromASCII - return PyUnicode_DecodeLatin1(buf, size, "strict") + return Fury_PyUnicode_FromUCS1(buf, size) + # return PyUnicode_DecodeLatin1(buf, size, "strict") elif encoding == 1: if utf16HasSurrogatePairs(buf, size >> 1): return PyUnicode_DecodeUTF16( @@ -588,7 +592,8 @@ cdef class Buffer: &UTF16_LE, # fury use little-endian ) else: - return PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, buf, size >> 1) + # return PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, buf, size >> 1) + return Fury_PyUnicode_FromUCS2(buf, size >> 1) else: return PyUnicode_DecodeUTF8(buf, size, "strict") diff --git a/python/pyfury/includes/libutil.pxd b/python/pyfury/includes/libutil.pxd index 72a640033d..b79287a659 100644 --- a/python/pyfury/includes/libutil.pxd +++ b/python/pyfury/includes/libutil.pxd @@ -19,6 +19,7 @@ from libc.stdint cimport * from libcpp cimport bool as c_bool from libcpp.memory cimport shared_ptr from libcpp.string cimport string as c_string +from cpython cimport PyObject cdef extern from "fury/util/buffer.h" namespace "fury" nogil: cdef cppclass CStatus" fury::Status": @@ -111,3 +112,8 @@ cdef extern from "fury/util/bit_util.h" namespace "fury::util" nogil: cdef extern from "fury/util/string_util.h" namespace "fury" nogil: c_bool utf16HasSurrogatePairs(uint16_t* data, size_t size) + + +cdef extern from "fury/python/pyunicode.h" namespace "fury" nogil: + PyObject* Fury_PyUnicode_FromUCS1(const char* u, Py_ssize_t size) + PyObject* Fury_PyUnicode_FromUCS2(const uint16_t* u, Py_ssize_t size)