Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf(python): optimize pystr deserialize perf #2007

Open
wants to merge 24 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 23 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ pyx_library(
),
deps = [
"//cpp/fury/util:fury_util",
"//cpp/fury/python:pyunicode",
],
)

Expand Down Expand Up @@ -63,6 +64,7 @@ pyx_library(
deps = [
"//cpp/fury/util:fury_util",
"//cpp/fury/type:fury_type",
"//cpp/fury/python:pyunicode",
"@com_google_absl//absl/container:flat_hash_map",
],
)
Expand All @@ -83,6 +85,7 @@ pyx_library(
),
deps = [
"//cpp/fury:fury",
"//cpp/fury/python:pyunicode",
"@local_config_pyarrow//:python_numpy_headers",
"@local_config_pyarrow//:arrow_python_shared_library"
],
Expand Down
33 changes: 33 additions & 0 deletions cpp/fury/python/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test")
load("@com_github_grpc_grpc//bazel:cython_library.bzl", "pyx_library")

cc_library(
name = "pyunicode",
srcs = ["pyunicode.cc"],
hdrs = ["pyunicode.h"],
alwayslink=True,
linkstatic=True,
strip_include_prefix = "/cpp",
deps = [
"//cpp/fury/util:fury_util",
"@local_config_python//:python_headers",
],
visibility = ["//visibility:public"],
)
92 changes: 92 additions & 0 deletions cpp/fury/python/pyunicode.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include "pyunicode.h"
#include "fury/util/array_util.h"
#include "fury/util/logging.h"
#include "fury/util/string_util.h"
#include "unicodeobject.h"
#include <cassert>

namespace fury {

static PyObject *unicode_latin1[256] = {nullptr};

static PyObject *get_latin1_char(unsigned char ch) {
PyObject *unicode = unicode_latin1[ch];
if (!unicode) {
unicode = PyUnicode_New(1, ch);
if (!unicode)
return NULL;
PyUnicode_1BYTE_DATA(unicode)[0] = ch;
unicode_latin1[ch] = unicode;
}
Py_INCREF(unicode);
return unicode;
}

PyObject *Fury_PyUnicode_FromUCS1(const char *u, Py_ssize_t size) {
PyObject *res;
unsigned char max_char;
FURY_CHECK(size > 0);
if (size == 1)
return get_latin1_char(u[0]);
max_char = isAscii(reinterpret_cast<const char *>(u), size) ? 127 : 255;
res = PyUnicode_New(size, max_char);
if (!res)
return NULL;
memcpy(PyUnicode_1BYTE_DATA(res), u, size);
return res;
}

PyObject *Fury_PyUnicode_FromUCS2(const uint16_t *u, Py_ssize_t size) {
PyObject *res;
Py_UCS2 max_char;
FURY_CHECK(size > 0);
if (size == 1) {
max_char = u[0];
if (max_char < 256) {
return get_latin1_char(max_char);
} else {
res = PyUnicode_New(1, max_char);
if (res == NULL) {
return NULL;
}
if (PyUnicode_KIND(res) == PyUnicode_2BYTE_KIND) {
PyUnicode_2BYTE_DATA(res)[0] = (Py_UCS2)max_char;
} else {
FURY_CHECK(PyUnicode_KIND(res) == PyUnicode_4BYTE_KIND);
PyUnicode_4BYTE_DATA(res)[0] = max_char;
}
return res;
}
}
max_char = getMaxValue(u, size);
res = PyUnicode_New(size, max_char);
if (!res) {
return NULL;
}
if (max_char >= 256) {
memcpy(PyUnicode_2BYTE_DATA(res), u, sizeof(Py_UCS2) * size);
} else {
copyArray(u, PyUnicode_1BYTE_DATA(res), size);
}
return res;
}
} // namespace fury
34 changes: 34 additions & 0 deletions cpp/fury/python/pyunicode.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#pragma once
#include "Python.h"
#include "object.h"
#include "pyport.h"
#include <cstdint>
#include <cstring>
#include <string>

namespace fury {

PyObject *Fury_PyUnicode_FromUCS1(const char *u, Py_ssize_t size);

PyObject *Fury_PyUnicode_FromUCS2(const uint16_t *u, Py_ssize_t size);

} // namespace fury
14 changes: 13 additions & 1 deletion cpp/fury/util/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,16 @@ cc_test(
":fury_util",
"@com_google_googletest//:gtest",
],
)
)


cc_test(
name = "array_util_test",
srcs = ["array_util_test.cc"],
deps = [
":fury_util",
"@com_google_googletest//:gtest",
],
copts = ["-mavx2"], # Enable AVX2 support
linkopts = ["-mavx2"], # Ensure linker also knows about AVX2
)
137 changes: 137 additions & 0 deletions cpp/fury/util/array_util.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include "fury/util/array_util.h"

namespace fury {
#if defined(FURY_HAS_NEON)
uint16_t getMaxValue(const uint16_t *arr, size_t length) {
if (length == 0) {
return 0; // Return 0 for empty arrays
}
uint16x8_t max_val = vdupq_n_u16(0); // Initialize max vector to zero

size_t i = 0;
for (; i + 8 <= length; i += 8) {
uint16x8_t current_val = vld1q_u16(&arr[i]);
max_val = vmaxq_u16(max_val, current_val); // Max operation
}

// Find the max value in the resulting vector
uint16_t temp[8];
vst1q_u16(temp, max_val);
uint16_t max_neon = temp[0];
for (int j = 1; j < 8; j++) {
if (temp[j] > max_neon) {
max_neon = temp[j];
}
}

// Handle remaining elements
for (; i < length; i++) {
if (arr[i] > max_neon) {
max_neon = arr[i];
}
}
return max_neon;
}

void copyArray(const uint16_t *from, uint8_t *to, size_t length) {
size_t i = 0;
for (; i + 7 < length; i += 8) {
uint16x8_t src = vld1q_u16(&from[i]);
uint8x8_t result = vmovn_u16(src);
vst1_u8(&to[i], result);
}

// Fallback for the remainder
for (; i < length; ++i) {
to[i] = static_cast<uint8_t>(from[i]);
}
}
#elif defined(FURY_HAS_SSE2)
uint16_t getMaxValue(const uint16_t *arr, size_t length) {
if (length == 0) {
return 0; // Return 0 for empty arrays
}

__m128i max_val = _mm_setzero_si128(); // Initialize max vector with zeros

size_t i = 0;
for (; i + 8 <= length; i += 8) {
__m128i current_val = _mm_loadu_si128((__m128i *)&arr[i]);
max_val = _mm_max_epu16(max_val, current_val); // Max operation
}

// Find the max value in the resulting vector
uint16_t temp[8];
_mm_storeu_si128((__m128i *)temp, max_val);
uint16_t max_sse = temp[0];
for (int j = 1; j < 8; j++) {
if (temp[j] > max_sse) {
max_sse = temp[j];
}
}

// Handle remaining elements
for (; i < length; i++) {
if (arr[i] > max_sse) {
max_sse = arr[i];
}
}
return max_sse;
}

void copyArray(const uint16_t *from, uint8_t *to, size_t length) {
size_t i = 0;
__m128i mask = _mm_set1_epi16(0xFF); // Mask to zero out the high byte
for (; i + 7 < length; i += 8) {
__m128i src = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&from[i]));
__m128i result = _mm_and_si128(src, mask);
_mm_storel_epi64(reinterpret_cast<__m128i *>(&to[i]),
_mm_packus_epi16(result, result));
}

// Fallback for the remainder
for (; i < length; ++i) {
to[i] = static_cast<uint8_t>(from[i]);
}
}
#else
uint16_t getMaxValue(const uint16_t *arr, size_t length) {
if (length == 0) {
return 0; // Return 0 for empty arrays
}
uint16_t max_val = arr[0];
for (size_t i = 1; i < length; i++) {
if (arr[i] > max_val) {
max_val = arr[i];
}
}
return max_val;
}

void copyArray(const uint16_t *from, uint8_t *to, size_t length) {
// Fallback for systems without SSE2/NEON
for (size_t i = 0; i < length; ++i) {
to[i] = static_cast<uint8_t>(from[i]);
}
}
#endif
} // namespace fury
29 changes: 29 additions & 0 deletions cpp/fury/util/array_util.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#pragma once
#include "fury/util/platform.h"
#include <cstdint>
#include <stdlib.h>

namespace fury {
uint16_t getMaxValue(const uint16_t *arr, size_t length);

void copyArray(const uint16_t *from, uint8_t *to, size_t length);
} // namespace fury
Loading
Loading