From 58b552388fd10787f1807558c0d4bf607bc95c88 Mon Sep 17 00:00:00 2001 From: Chester Liu <4710575+skyline75489@users.noreply.github.com> Date: Thu, 20 Jun 2024 08:20:30 +0800 Subject: [PATCH 1/3] Fix several C5038 warnings (#748) --- include/ort_c_to_cpp.h | 2 +- shared/api/image_processor.cc | 2 +- shared/api/runner.hpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/ort_c_to_cpp.h b/include/ort_c_to_cpp.h index 92c2fb01d..7969f080e 100644 --- a/include/ort_c_to_cpp.h +++ b/include/ort_c_to_cpp.h @@ -343,8 +343,8 @@ struct BaseKernel { OrtErrorCode GetErrorCodeAndRelease(OrtStatusPtr status) const noexcept; const OrtApi& api_; - OrtW::CustomOpApi ort_; const OrtKernelInfo& info_; + OrtW::CustomOpApi ort_; }; // Deprecated: Use OrtW::CustomOpApi::KernelInfoGetAttribute instead diff --git a/shared/api/image_processor.cc b/shared/api/image_processor.cc index 028015972..9ecedf917 100644 --- a/shared/api/image_processor.cc +++ b/shared/api/image_processor.cc @@ -85,7 +85,7 @@ OrtxStatus ImageProcessor::Init(std::string_view processor_def) { } ImageProcessor::ImageProcessor() - : allocator_(&CppAllocator::Instance()), OrtxObjectImpl(kOrtxKindProcessor) { + : OrtxObjectImpl(kOrtxKindProcessor), allocator_(&CppAllocator::Instance()) { } template diff --git a/shared/api/runner.hpp b/shared/api/runner.hpp index b3170e0ae..ba5991400 100644 --- a/shared/api/runner.hpp +++ b/shared/api/runner.hpp @@ -278,8 +278,8 @@ class OrtxRunner { } private: - std::vector ops_; ortc::IAllocator* allocator_; + std::vector ops_; }; } // namespace ort_extensions From cbed8fd5758c277081fc0ff627906d5ee3bbce6d Mon Sep 17 00:00:00 2001 From: Wenbing Li <10278425+wenbingl@users.noreply.github.com> Date: Thu, 20 Jun 2024 10:53:49 -0700 Subject: [PATCH 2/3] Add a generic image processor and its C API (#745) * Add a generic image processor * add more tests * Fix the test failures * Update runner.hpp --- .clang-format | 5 +- cmake/ext_tests.cmake | 4 +- cmake/externals/json.cmake | 2 + include/custom_op/tensor_api.h | 14 + include/ortx_c_helper.h | 97 +++++ include/ortx_processor.h | 53 ++- include/ortx_utils.h | 44 ++- shared/api/c_api_processor.cc | 103 +++++ shared/api/c_api_utils.cc | 64 ++- shared/api/c_api_utils.hpp | 79 ++-- shared/api/image_processor.cc | 116 +++++- shared/api/image_processor.h | 29 +- shared/api/image_transforms.hpp | 374 +++++++++--------- shared/api/image_transforms_phi_3.hpp | 209 ++++++++++ shared/api/runner.hpp | 94 +++-- test/data/processor/clip_image.json | 59 +++ ...{image_processor.json => phi_3_image.json} | 0 test/pp_api_test/test_processor.cc | 40 +- 18 files changed, 1064 insertions(+), 322 deletions(-) create mode 100644 include/ortx_c_helper.h create mode 100644 shared/api/image_transforms_phi_3.hpp create mode 100644 test/data/processor/clip_image.json rename test/data/processor/{image_processor.json => phi_3_image.json} (100%) diff --git a/.clang-format b/.clang-format index 491a83575..747fcdd0a 100644 --- a/.clang-format +++ b/.clang-format @@ -1,10 +1,7 @@ --- # Defaults for all languages. BasedOnStyle: Google - -# Setting ColumnLimit to 0 so developer choices about where to break lines are maintained. -# Developers are responsible for adhering to the 120 character maximum. -ColumnLimit: 0 +ColumnLimit: 120 SortIncludes: false DerivePointerAlignment: false diff --git a/cmake/ext_tests.cmake b/cmake/ext_tests.cmake index 4e39e7bab..c300bc9d1 100644 --- a/cmake/ext_tests.cmake +++ b/cmake/ext_tests.cmake @@ -145,8 +145,8 @@ if (OCOS_ENABLE_C_API) "$" "$") - if (ORTX_TEST_DATA2) - file(TO_NATIVE_PATH "${ORTX_TEST_DATA2}/tests/data2" _TEST_DATA2) + if (ORTX_DATA_PATH) + file(TO_NATIVE_PATH "${ORTX_DATA_PATH}/tests/data2" _TEST_DATA2) add_custom_command(TARGET pp_api_test POST_BUILD COMMAND ${CMAKE_COMMAND} -E create_symlink ${_TEST_DATA2} ${onnxruntime_extensions_BINARY_DIR}/data2) endif() diff --git a/cmake/externals/json.cmake b/cmake/externals/json.cmake index a41676c06..cdf63407a 100644 --- a/cmake/externals/json.cmake +++ b/cmake/externals/json.cmake @@ -8,3 +8,5 @@ FetchContent_GetProperties(nlohmann_json) if(NOT nlohmann_json_POPULATED) FetchContent_Populate(nlohmann_json) endif() + +add_compile_definitions(JSON_HAS_CPP_17=1) diff --git a/include/custom_op/tensor_api.h b/include/custom_op/tensor_api.h index 42f23d5dd..e8b6f9f54 100644 --- a/include/custom_op/tensor_api.h +++ b/include/custom_op/tensor_api.h @@ -174,6 +174,8 @@ class TensorBase : public Arg { virtual int64_t NumberOfElement() const = 0; virtual const void* DataRaw() const = 0; virtual size_t SizeInBytes() const = 0; + + virtual std::byte* AllocateRaw(const std::vector& shape) = 0; }; template @@ -283,6 +285,10 @@ class Tensor : public TensorBase { return static_cast(buffer); } + std::byte* AllocateRaw(const std::vector& shape) override { + return reinterpret_cast(Allocate(shape)); + } + const Span& AsSpan() { if (!storage_) ORTX_CXX_API_THROW("tensor not initialized.", ORT_RUNTIME_EXCEPTION); @@ -448,6 +454,10 @@ class Tensor : public TensorBase { return ss[0].size(); } + std::byte* AllocateRaw(const std::vector& shape) override { + ORTX_CXX_API_THROW("AllocateRaw() not supported for string tensor", ORT_RUNTIME_EXCEPTION); + } + void SetStringOutput(const strings& ss, const std::vector& dims) { storage_->SetStringOutput(ss, dims); } @@ -522,6 +532,10 @@ class Tensor : public TensorBase { return ss[0].size(); } + std::byte* AllocateRaw(const std::vector& shape) override { + ORTX_CXX_API_THROW("AllocateRaw() not supported for string tensor", ORT_RUNTIME_EXCEPTION); + } + void SetStringOutput(const strings& ss, const std::vector& dims) { storage_->SetStringOutput(ss, dims); } diff --git a/include/ortx_c_helper.h b/include/ortx_c_helper.h new file mode 100644 index 000000000..ce001ca31 --- /dev/null +++ b/include/ortx_c_helper.h @@ -0,0 +1,97 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "ortx_utils.h" + +namespace ort_extensions { + +template +class OrtxDeleter { + public: + void operator()(T* p) const { + if (p) { + OrtxDisposeOnly(p); + } + } +}; + +/** + * @brief A smart pointer class that manages the lifetime of an OrtxObject. + * + * This class is derived from std::unique_ptr and provides additional functionality + * specific to OrtxObject. It automatically calls the OrtxDeleter to release the + * owned object when it goes out of scope. + * + * @tparam T The type of the object being managed. + */ +template +class OrtxObjectPtr : public std::unique_ptr> { + public: + /** + * @brief Default constructor. + * + * Constructs an OrtxObjectPtr with a null pointer. + */ + OrtxObjectPtr() : std::unique_ptr>(nullptr) {} + + /** + * @brief Constructor that creates an OrtxObjectPtr from a function call. + * + * This constructor calls the specified function with the given arguments to + * create an OrtxObject. If the function call succeeds, the created object is + * owned by the OrtxObjectPtr. + * + * @tparam TFn The type of the function pointer or function object. + * @tparam Args The types of the arguments to be passed to the function. + * @param fn The function pointer or function object used to create the OrtxObject. + * @param args The arguments to be passed to the function. + */ + template + OrtxObjectPtr(TFn fn, Args&&... args) { + OrtxObject* proc = nullptr; + err_ = fn(&proc, std::forward(args)...); + if (err_ == kOrtxOK) { + this->reset(static_cast(proc)); + } + } + + /** + * @brief Get the error code associated with the creation of the OrtxObject. + * + * @return The error code. + */ + extError_t Code() const { return err_; } + + private: + extError_t err_ = kOrtxOK; /**< The error code associated with the creation of the OrtxObject. */ +}; + +template +struct PointerAssigner { + OrtxObject* obj_{}; + OrtxObjectPtr& ptr_; + PointerAssigner(OrtxObjectPtr& ptr) : ptr_(ptr){}; + + ~PointerAssigner() { ptr_.reset(static_cast(obj_)); }; + + operator T**() { return reinterpret_cast(&obj_); }; +}; + +/** + * @brief A wrapper function for OrtxObjectPtr that can be used as a function parameter on creation. + * + * This function creates a PointerAssigner object for the given OrtxObjectPtr. The PointerAssigner + * object can be used to assign a pointer value to the OrtxObjectPtr. + * + * @tparam T The type of the object pointed to by the OrtxObjectPtr. + * @param ptr The OrtxObjectPtr to create the PointerAssigner for. + * @return A PointerAssigner object for the given OrtxObjectPtr. + */ +template +PointerAssigner ptr(OrtxObjectPtr& ptr) { + return PointerAssigner{ptr}; +}; + +} // namespace ort_extensions diff --git a/include/ortx_processor.h b/include/ortx_processor.h index d89f16460..6dcc5a84e 100644 --- a/include/ortx_processor.h +++ b/include/ortx_processor.h @@ -9,6 +9,8 @@ // typedefs to create/dispose function flood, and to make the API more C++ friendly with less casting typedef OrtxObject OrtxProcessor; +typedef OrtxObject OrtxRawImages; +typedef OrtxObject OrtxImageProcessorResult; #ifdef __cplusplus extern "C" { @@ -17,11 +19,58 @@ extern "C" { /** \brief Create a processor object with the specified processor definition * * \param processor Pointer to store the created processor object - * \param processor_def The processor definition, either a path to the processor directory or a JSON string, and is utf-8 encoded. - * \return Error code indicating the success or failure of the operation + * \param processor_def The processor definition, either a path to the processor directory or a JSON string, and is + * utf-8 encoded. \return Error code indicating the success or failure of the operation */ extError_t ORTX_API_CALL OrtxCreateProcessor(OrtxProcessor** processor, const char* processor_def); +/** + * @brief Loads a set of images from the specified image paths. + * + * This function loads a set of images from the given image paths and returns a pointer to the loaded images. + * The number of images loaded is also returned through the `num_images_loaded` parameter. + * + * @param[out] images A pointer to a pointer that will be set to the loaded images. + * @param[in] image_paths An array of image paths. + * @param[in] num_images The number of images to load. + * @param[out] num_images_loaded A pointer to a variable that will be set to the number of images loaded. + * + * @return An error code indicating the status of the operation. + */ +extError_t ORTX_API_CALL OrtxLoadImages(OrtxRawImages** images, const char** image_paths, size_t num_images, + size_t* num_images_loaded); + +/** + * @brief Preprocesses the given raw images using the specified processor. + * + * This function applies preprocessing operations on the raw images using the provided processor. + * The result of the preprocessing is stored in the `OrtxImageProcessorResult` object. + * + * @param processor A pointer to the `OrtxProcessor` object used for preprocessing. + * @param images A pointer to the `OrtxRawImages` object containing the raw images to be processed. + * @param result A pointer to the `OrtxImageProcessorResult` object to store the preprocessing result. + * @return An `extError_t` value indicating the success or failure of the preprocessing operation. + */ +extError_t ORTX_API_CALL OrtxImagePreProcess(OrtxProcessor* processor, OrtxRawImages* images, + OrtxImageProcessorResult** result); + +/** + * @brief Retrieves the image processor result at the specified index. + * + * @param result Pointer to the OrtxImageProcessorResult structure to store the result. + * @param index The index of the result to retrieve. + * @return extError_t The error code indicating the success or failure of the operation. + */ +extError_t ORTX_API_CALL OrtxImageGetTensorResult(OrtxImageProcessorResult* result, size_t index, OrtxTensor** tensor); + +/** \brief Clear the outputs of the processor + * + * \param processor The processor object + * \param result The result object to clear + * \return Error code indicating the success or failure of the operation + */ +extError_t ORTX_API_CALL OrtxClearOutputs(OrtxProcessor* processor, OrtxImageProcessorResult* result); + #ifdef __cplusplus } #endif diff --git a/include/ortx_utils.h b/include/ortx_utils.h index 8ee7bf217..e6c0af9aa 100644 --- a/include/ortx_utils.h +++ b/include/ortx_utils.h @@ -5,6 +5,8 @@ #include "ortx_types.h" +const int API_VERSION = 1; + typedef enum { kOrtxKindUnknown = 0, @@ -14,7 +16,10 @@ typedef enum { kOrtxKindTokenId2DArray = 0x778A, kOrtxKindDetokenizerCache = 0x778B, kOrtxKindProcessor = 0x778C, - kOrtxKindProcessorResult = 0x778D, + kOrtxKindRawImages = 0x778D, + kOrtxKindImageProcessorResult = 0x778E, + kOrtxKindProcessorResult = 0x778F, + kOrtxKindTensor = 0x7790, kOrtxKindEnd = 0x7999 } extObjectKind_t; @@ -24,7 +29,7 @@ typedef struct { int ext_kind_; } OrtxObject; -const int API_VERSION = 1; +typedef OrtxObject OrtxTensor; // C, instead of C++ doesn't cast automatically, // so we need to use a macro to cast the object to the correct type @@ -72,6 +77,41 @@ extError_t ORTX_API_CALL OrtxDispose(OrtxObject** object); */ extError_t ORTX_API_CALL OrtxDisposeOnly(OrtxObject* object); +/** \brief Get the data from the tensor + * + * \param tensor The tensor object + * \param data Pointer to store the data + * \param shape Pointer to store the shape + * \param num_dims Pointer to store the number of dimensions + * \return Error code indicating the success or failure of the operation + */ +extError_t ORTX_API_CALL OrtxGetTensorData(OrtxTensor* tensor, const void** data, const int64_t** shape, + size_t* num_dims); +/** + * \brief Get the data from the tensor as int64_t type + * + * \param tensor The tensor object + * \param data Pointer to store the data + * \param shape Pointer to store the shape + * \param num_dims Pointer to store the number of dimensions + * \return Error code indicating the success or failure of the operation + */ + +extError_t ORTX_API_CALL OrtxGetTensorDataInt64(OrtxTensor* tensor, const int64_t** data, const int64_t** shape, + size_t* num_dims); + +/** + * \brief Get the data from the tensor as float type + * + * \param tensor The tensor object + * \param data Pointer to store the data + * \param shape Pointer to store the shape + * \param num_dims Pointer to store the number of dimensions + * \return Error code indicating the success or failure of the operation + */ +extError_t ORTX_API_CALL OrtxGetTensorDataFloat(OrtxTensor* tensor, const float** data, const int64_t** shape, + size_t* num_dims); + #ifdef __cplusplus } #endif diff --git a/shared/api/c_api_processor.cc b/shared/api/c_api_processor.cc index 82a45f652..2beb90a13 100644 --- a/shared/api/c_api_processor.cc +++ b/shared/api/c_api_processor.cc @@ -1,6 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +#include "ortx_processor.h" #include "image_processor.h" using namespace ort_extensions; @@ -20,3 +21,105 @@ extError_t OrtxCreateProcessor(OrtxProcessor** processor, const char* def) { return status.Code(); } + +struct RawImagesObject : public OrtxObjectImpl { + public: + RawImagesObject() : OrtxObjectImpl(kOrtxKindRawImages) {} + std::unique_ptr images; + size_t num_images; +}; + +extError_t ORTX_API_CALL OrtxLoadImages(OrtxRawImages** images, const char** image_paths, size_t num_images, + size_t* num_images_loaded) { + if (images == nullptr || image_paths == nullptr) { + ReturnableStatus::last_error_message_ = "Invalid argument"; + return kOrtxErrorInvalidArgument; + } + + auto images_obj = std::make_unique(); + auto [img, num] = LoadRawImages(image_paths, image_paths + num_images); + images_obj->images = std::move(img); + images_obj->num_images = num; + if (num_images_loaded != nullptr) { + *num_images_loaded = num; + } + + *images = static_cast(images_obj.release()); + return extError_t(); +} + +extError_t ORTX_API_CALL OrtxImagePreProcess(OrtxProcessor* processor, OrtxRawImages* images, + OrtxImageProcessorResult** result) { + if (processor == nullptr || images == nullptr || result == nullptr) { + ReturnableStatus::last_error_message_ = "Invalid argument"; + return kOrtxErrorInvalidArgument; + } + + auto processor_ptr = static_cast(processor); + ReturnableStatus status(processor_ptr->IsInstanceOf(extObjectKind_t::kOrtxKindProcessor)); + if (!status.IsOk()) { + return status.Code(); + } + + auto images_ptr = static_cast(images); + status = images_ptr->IsInstanceOf(extObjectKind_t::kOrtxKindRawImages); + if (!status.IsOk()) { + return status.Code(); + } + + auto result_ptr = std::make_unique(); + status = + processor_ptr->PreProcess(ort_extensions::span(images_ptr->images.get(), images_ptr->num_images), *result_ptr); + if (status.IsOk()) { + *result = static_cast(result_ptr.release()); + } else { + *result = nullptr; + } + + return {}; +} + +extError_t ORTX_API_CALL OrtxImageGetTensorResult(OrtxImageProcessorResult* result, size_t index, OrtxTensor** tensor) { + if (result == nullptr || tensor == nullptr) { + ReturnableStatus::last_error_message_ = "Invalid argument"; + return kOrtxErrorInvalidArgument; + } + + auto result_ptr = static_cast(result); + ReturnableStatus status(result_ptr->IsInstanceOf(extObjectKind_t::kOrtxKindImageProcessorResult)); + if (!status.IsOk()) { + return status.Code(); + } + + if (index >= result_ptr->results.size()) { + ReturnableStatus::last_error_message_ = "Index out of range"; + return kOrtxErrorInvalidArgument; + } + + auto tensor_ptr = std::make_unique>(); + tensor_ptr->SetObject(result_ptr->results[index].get()); + *tensor = static_cast(tensor_ptr.release()); + return extError_t(); +} + +extError_t ORTX_API_CALL OrtxClearOutputs(OrtxProcessor* processor, OrtxImageProcessorResult* result) { + if (processor == nullptr || result == nullptr) { + ReturnableStatus::last_error_message_ = "Invalid argument"; + return kOrtxErrorInvalidArgument; + } + + const auto processor_ptr = static_cast(processor); + ReturnableStatus status(processor_ptr->IsInstanceOf(extObjectKind_t::kOrtxKindProcessor)); + if (!status.IsOk()) { + return status.Code(); + } + + auto result_ptr = static_cast(result); + status = result_ptr->IsInstanceOf(extObjectKind_t::kOrtxKindImageProcessorResult); + if (!status.IsOk()) { + return status.Code(); + } + + ImageProcessor::ClearOutputs(result_ptr); + return extError_t(); +} diff --git a/shared/api/c_api_utils.cc b/shared/api/c_api_utils.cc index c7963a20a..0345fdb23 100644 --- a/shared/api/c_api_utils.cc +++ b/shared/api/c_api_utils.cc @@ -6,6 +6,7 @@ #include "file_sys.h" #include "image_processor.h" #include "tokenizer_impl.h" +#include "ortx_utils.h" using namespace ort_extensions; @@ -13,19 +14,14 @@ thread_local std::string ReturnableStatus::last_error_message_; OrtxStatus OrtxObjectImpl::IsInstanceOf(extObjectKind_t kind) const { if (ext_kind_ != static_cast(kind)) { - return {extError_t::kOrtxErrorInvalidArgument, - "Object is not an instance of the requested type"}; + return {extError_t::kOrtxErrorInvalidArgument, "Object is not an instance of the requested type"}; } return {}; } -int ORTX_API_CALL OrtxGetAPIVersion() { - return API_VERSION; -} +int ORTX_API_CALL OrtxGetAPIVersion() { return API_VERSION; } -const char* OrtxGetLastErrorMessage() { - return ReturnableStatus::last_error_message_.c_str(); -} +const char* OrtxGetLastErrorMessage() { return ReturnableStatus::last_error_message_.c_str(); } extError_t ORTX_API_CALL OrtxCreate(extObjectKind_t kind, OrtxObject** object, ...) { if (object == nullptr) { @@ -50,8 +46,7 @@ extError_t ORTX_API_CALL OrtxCreate(extObjectKind_t kind, OrtxObject** object, . return extError_t(); } -extError_t ORTX_API_CALL OrtxCreateTokenizer(OrtxTokenizer** tokenizer, - const char* tokenizer_path) { +extError_t ORTX_API_CALL OrtxCreateTokenizer(OrtxTokenizer** tokenizer, const char* tokenizer_path) { // test if the tokenizer_path is a valid directory if (tokenizer_path == nullptr) { ReturnableStatus::last_error_message_ = "The tokenizer data directory is null"; @@ -86,17 +81,19 @@ extError_t ORTX_API_CALL OrtxDisposeOnly(OrtxObject* object) { } if (Ortx_object->ortx_kind() == extObjectKind_t::kOrtxKindStringArray) { - OrtxObjectFactory::Dispose(object); + OrtxObjectFactory::Dispose(object); } else if (Ortx_object->ortx_kind() == extObjectKind_t::kOrtxKindTokenId2DArray) { - OrtxObjectFactory::Dispose(object); + OrtxObjectFactory::Dispose(object); } else if (Ortx_object->ortx_kind() == extObjectKind_t::kOrtxKindDetokenizerCache) { - OrtxObjectFactory::DisposeForward(object); + OrtxObjectFactory::DisposeForward(object); } else if (Ortx_object->ortx_kind() == extObjectKind_t::kOrtxKindTokenizer) { - OrtxObjectFactory::Dispose(object); + OrtxObjectFactory::Dispose(object); } else if (Ortx_object->ortx_kind() == extObjectKind_t::kOrtxKindProcessorResult) { - OrtxObjectFactory::Dispose(object); + OrtxObjectFactory::Dispose(object); + } else if (Ortx_object->ortx_kind() == extObjectKind_t::kOrtxKindImageProcessorResult) { + OrtxObjectFactory::Dispose(object); } else if (Ortx_object->ortx_kind() == extObjectKind_t::kOrtxKindProcessor) { - OrtxObjectFactory::Dispose(object); + OrtxObjectFactory::Dispose(object); } return extError_t(); @@ -115,3 +112,38 @@ extError_t ORTX_API_CALL OrtxDispose(OrtxObject** object) { *object = nullptr; return err; } + +extError_t ORTX_API_CALL OrtxGetTensorData(OrtxTensor* tensor, const void** data, const int64_t** shape, + size_t* num_dims) { + if (tensor == nullptr) { + ReturnableStatus::last_error_message_ = "Invalid argument"; + return kOrtxErrorInvalidArgument; + } + + auto tensor_impl = static_cast*>(tensor); + if (tensor_impl->ortx_kind() != extObjectKind_t::kOrtxKindTensor) { + ReturnableStatus::last_error_message_ = "Invalid argument"; + return kOrtxErrorInvalidArgument; + } + + *data = tensor_impl->GetObject()->DataRaw(); + *shape = tensor_impl->GetObject()->Shape().data(); + *num_dims = tensor_impl->GetObject()->Shape().size(); + return extError_t(); +} + +extError_t ORTX_API_CALL OrtxGetTensorDataInt64(OrtxTensor* tensor, const int64_t** data, const int64_t** shape, + size_t* num_dims) { + const void* data_ptr; + auto err = OrtxGetTensorData(tensor, &data_ptr, shape, num_dims); + *data = reinterpret_cast(data_ptr); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) + return err; +} + +extError_t ORTX_API_CALL OrtxGetTensorDataFloat(OrtxTensor* tensor, const float** data, const int64_t** shape, + size_t* num_dims) { + const void* data_ptr; + auto err = OrtxGetTensorData(tensor, &data_ptr, shape, num_dims); + *data = reinterpret_cast(data_ptr); // NOLINT(cppcoreguidelines-pro-type-reinterpret-cast) + return err; +} diff --git a/shared/api/c_api_utils.hpp b/shared/api/c_api_utils.hpp index 99a2b7ba4..d7794b610 100644 --- a/shared/api/c_api_utils.hpp +++ b/shared/api/c_api_utils.hpp @@ -24,6 +24,30 @@ class OrtxObjectImpl : public OrtxObject { } return static_cast(ext_kind_); } + + template + struct Type2Kind { + static const extObjectKind_t value = kOrtxKindUnknown; + }; +}; + +template <> +struct OrtxObjectImpl::Type2Kind { + static const extObjectKind_t value = kOrtxKindTensor; +}; + +template +class OrtxObjectWrapper : public OrtxObjectImpl { + public: + OrtxObjectWrapper() : OrtxObjectImpl(OrtxObjectImpl::Type2Kind::value) {} + ~OrtxObjectWrapper() override = default; + + void SetObject(T* t) { stored_object_ = t; } + + [[nodiscard]] T* GetObject() const { return stored_object_; } + + private: + T* stored_object_{}; }; template @@ -39,7 +63,7 @@ class span { const T& operator[](size_t i) const { return data_[i]; } T& operator[](size_t i) { return data_[i]; } - + T* data() const { return data_; } [[nodiscard]] size_t size() const { return size_; } T* begin() const { return data_; } @@ -55,13 +79,9 @@ class TokenId2DArray : public OrtxObjectImpl { TokenId2DArray() : OrtxObjectImpl(extObjectKind_t::kOrtxKindTokenId2DArray) {} ~TokenId2DArray() override = default; - void SetTokenIds(std::vector>&& token_ids) { - token_ids_ = token_ids; - } + void SetTokenIds(std::vector>&& token_ids) { token_ids_ = token_ids; } - [[nodiscard]] const std::vector>& token_ids() const { - return token_ids_; - } + [[nodiscard]] const std::vector>& token_ids() const { return token_ids_; } private: std::vector> token_ids_; @@ -72,13 +92,9 @@ class StringArray : public OrtxObjectImpl { StringArray() : OrtxObjectImpl(extObjectKind_t::kOrtxKindStringArray) {} ~StringArray() override = default; - void SetStrings(std::vector&& strings) { - strings_ = strings; - } + void SetStrings(std::vector&& strings) { strings_ = strings; } - [[nodiscard]] const std::vector& strings() const { - return strings_; - } + [[nodiscard]] const std::vector& strings() const { return strings_; } private: std::vector strings_; @@ -109,10 +125,8 @@ struct ReturnableStatus { template class OrtxObjectFactory { - public: - static std::unique_ptr Create() { - return std::make_unique(); - } + public: + static std::unique_ptr Create() { return std::make_unique(); } static OrtxObject* CreateForward(); static void DisposeForward(OrtxObject* object); @@ -122,42 +136,15 @@ class OrtxObjectFactory { std::unique_ptr ptr(obj_ptr); ptr.reset(); } - }; class DetokenizerCache; // forward definition in tokenizer_impl.cc -class ProcessorResult; // forward definition in image_processor.h - -template -class OrtxDeleter { - public: - void operator()(T* p) const { - if (p) { - OrtxDisposeOnly(p); - } - } -}; - -template -class OrtxObjectPtr : public std::unique_ptr> { - public: - template - OrtxObjectPtr(TFn fn, const char* def) { - OrtxObject* proc = nullptr; - err_ = fn(&proc, def); - if (err_ == kOrtxOK) { - this->reset(static_cast(proc)); - } - } +class ProcessorResult; // forward definition in image_processor.h - int err_ = kOrtxOK; -}; class CppAllocator : public ortc::IAllocator { public: - void* Alloc(size_t size) override { - return std::make_unique(size).release(); - } + void* Alloc(size_t size) override { return std::make_unique(size).release(); } void Free(void* p) override { std::unique_ptr ptr(static_cast(p)); diff --git a/shared/api/image_processor.cc b/shared/api/image_processor.cc index 9ecedf917..1cbab6e10 100644 --- a/shared/api/image_processor.cc +++ b/shared/api/image_processor.cc @@ -9,17 +9,18 @@ #include "image_processor.h" #include "cv2/imgcodecs/imdecode.hpp" #include "image_transforms.hpp" +#include "image_transforms_phi_3.hpp" using namespace ort_extensions; using json = nlohmann::json; namespace ort_extensions { -std::tuple, size_t> -LoadRawImages(const std::initializer_list& image_paths) { - auto raw_images = std::make_unique(image_paths.size()); +template +std::tuple, size_t> LoadRawImages(It begin, It end) { + auto raw_images = std::make_unique(end - begin); size_t n = 0; - for (const auto& image_path : image_paths) { - std::ifstream ifs = path(image_path).open(std::ios::binary); + for (auto it = begin; it != end; ++it) { + std::ifstream ifs = path(*it).open(std::ios::binary); if (!ifs.is_open()) { break; } @@ -35,11 +36,23 @@ LoadRawImages(const std::initializer_list& image_paths) { return std::make_tuple(std::move(raw_images), n); } + +std::tuple, size_t> LoadRawImages( + const std::initializer_list& image_paths) { + return LoadRawImages(image_paths.begin(), image_paths.end()); +} + +template std::tuple, size_t> LoadRawImages(char const**, char const**); + } // namespace ort_extensions Operation::KernelRegistry ImageProcessor::kernel_registry_ = { {"DecodeImage", []() { return CreateKernelInstance(image_decoder); }}, - {"ConvertRGB", []() { return CreateKernelInstance(&ConvertToRGB::Compute); }}, + {"Resize", []() { return CreateKernelInstance(&Resize::Compute); }}, + {"Rescale", []() { return CreateKernelInstance(&Rescale::Compute); }}, + {"Normalize", []() { return CreateKernelInstance(&Normalize::Compute); }}, + {"CenterCrop", []() { return CreateKernelInstance(&CenterCrop::Compute); }}, + {"ConvertRGB", []() { return CreateKernelInstance(convert_to_rgb); }}, {"Phi3ImageTransform", []() { return CreateKernelInstance(phi3_hd_transform); }}, }; @@ -89,8 +102,7 @@ ImageProcessor::ImageProcessor() } template -static ortc::Tensor* -StackTensor(const std::vector& arg_lists, int axis, ortc::IAllocator* allocator) { +static ortc::Tensor* StackTensor(const std::vector& arg_lists, int axis, ortc::IAllocator* allocator) { using TT = ortc::Tensor; auto output = std::make_unique(allocator); @@ -124,12 +136,43 @@ StackTensor(const std::vector& arg_lists, int axis, ortc::IAllocator return output.release(); } -std::tuple -ImageProcessor::PreProcess( - ort_extensions::span image_data, - ortc::Tensor** pixel_values, - ortc::Tensor** image_sizes, - ortc::Tensor** num_img_takens) { +static OrtxStatus StackTensors(const std::vector& arg_lists, std::vector& outputs, + ortc::IAllocator* allocator) { + if (arg_lists.empty()) { + return {}; + } + + size_t batch_size = arg_lists.size(); + size_t num_outputs = arg_lists[0].size(); + for (size_t axis = 0; axis < num_outputs; ++axis) { + std::vector ts_ptrs; + ts_ptrs.reserve(arg_lists.size()); + std::vector shape = arg_lists[0][axis]->Shape(); + for (auto& ts : arg_lists) { + if (shape != ts[axis]->Shape()) { + return {kOrtxErrorInvalidArgument, "[StackTensors]: shapes of tensors to stack are not the same."}; + } + ts_ptrs.push_back(ts[axis]); + } + + std::vector output_shape = shape; + output_shape.insert(output_shape.begin(), batch_size); + std::byte* tensor_buf = outputs[axis]->AllocateRaw(output_shape); + for (size_t i = 0; i < batch_size; ++i) { + auto ts = ts_ptrs[i]; + const std::byte* ts_buff = reinterpret_cast(ts->DataRaw()); + auto ts_size = ts->SizeInBytes(); + std::memcpy(tensor_buf + i * ts_size, ts_buff, ts_size); + } + } + + return {}; +} + +std::tuple ImageProcessor::PreProcess(ort_extensions::span image_data, + ortc::Tensor** pixel_values, + ortc::Tensor** image_sizes, + ortc::Tensor** num_img_takens) const { ProcessorResult r; std::vector inputs; inputs.resize(image_data.size()); @@ -163,7 +206,39 @@ ImageProcessor::PreProcess( *image_sizes = r.image_sizes = StackTensor(outputs, 1, allocator_); *num_img_takens = r.num_img_takens = StackTensor(outputs, 2, allocator_); - return {status, r}; + return {status, std::move(r)}; +} + +OrtxStatus ImageProcessor::PreProcess(ort_extensions::span image_data, ImageProcessorResult& r) const { + std::vector inputs; + inputs.resize(image_data.size()); + for (size_t i = 0; i < image_data.size(); ++i) { + auto& ts_input = inputs[i]; + ImageRawData& image = image_data[i]; + std::vector shape = {static_cast(image.size())}; + ts_input.push_back(std::make_unique>(shape, image.data()).release()); + } + + std::vector outputs; + std::vector ops(operations_.size()); + std::transform(operations_.begin(), operations_.end(), ops.begin(), [](auto& op) { return op.get(); }); + OrtxRunner runner(allocator_, ops.data(), ops.size()); + auto status = runner.Run(inputs, outputs); + if (!status.IsOk()) { + return status; + } + + // clear the input tensors + for (auto& input : inputs) { + for (auto& ts : input) { + std::unique_ptr(ts).reset(); + } + } + + r.results = operations_.back()->AllocateOutputs(allocator_); + status = StackTensors(outputs, r.results, allocator_); + operations_.back()->ResetTensors(allocator_); + return status; } void ImageProcessor::ClearOutputs(ProcessorResult* r) { @@ -182,3 +257,14 @@ void ImageProcessor::ClearOutputs(ProcessorResult* r) { r->num_img_takens = nullptr; } } + +void ort_extensions::ImageProcessor::ClearOutputs(ImageProcessorResult* r) { + if (r == nullptr) { + return; + } + + for (auto& ts : r->results) { + ts.reset(); + } + r->results.clear(); // clear the vector +} diff --git a/shared/api/image_processor.h b/shared/api/image_processor.h index 5ff208d19..534e811d6 100644 --- a/shared/api/image_processor.h +++ b/shared/api/image_processor.h @@ -15,8 +15,12 @@ namespace ort_extensions { using ImageRawData = std::vector; -std::tuple, size_t> -LoadRawImages(const std::initializer_list& image_paths); + +template +std::tuple, size_t> LoadRawImages(It begin, It end); + +std::tuple, size_t> LoadRawImages( + const std::initializer_list& image_paths); class ProcessorResult : public OrtxObjectImpl { public: @@ -26,6 +30,12 @@ class ProcessorResult : public OrtxObjectImpl { ortc::Tensor* num_img_takens{}; }; +class ImageProcessorResult : public OrtxObjectImpl { + public: + ImageProcessorResult() : OrtxObjectImpl(kOrtxKindImageProcessorResult) {} + std::vector results; +}; + class ImageProcessor : public OrtxObjectImpl { public: ImageProcessor(); @@ -33,14 +43,15 @@ class ImageProcessor : public OrtxObjectImpl { OrtxStatus Init(std::string_view processor_def); - std::tuple - PreProcess( - ort_extensions::span image_data, - ortc::Tensor** pixel_values, - ortc::Tensor** image_sizes, - ortc::Tensor** num_img_takens); + std::tuple PreProcess(ort_extensions::span image_data, + ortc::Tensor** pixel_values, + ortc::Tensor** image_sizes, + ortc::Tensor** num_img_takens) const; + + OrtxStatus PreProcess(ort_extensions::span image_data, ImageProcessorResult& r) const; - void ClearOutputs(ProcessorResult* r); + static void ClearOutputs(ProcessorResult* r); + static void ClearOutputs(ImageProcessorResult* r); static Operation::KernelRegistry kernel_registry_; diff --git a/shared/api/image_transforms.hpp b/shared/api/image_transforms.hpp index 773d70cce..93f9ab120 100644 --- a/shared/api/image_transforms.hpp +++ b/shared/api/image_transforms.hpp @@ -5,232 +5,228 @@ #include "ocos.h" -constexpr int max_crops = 16; -constexpr int num_img_tokens = 144; -constexpr int image_resized_width = 336; -constexpr int image_resized_height = 336; +inline OrtxStatus convert_to_rgb(const ortc::Tensor& input, ortc::Tensor& output) { + auto& dimensions = input.Shape(); + if (dimensions.size() != 3ULL || dimensions[2] != 3) { + return {kOrtxErrorInvalidArgument, "[ConvertToRGB]: input is not (H, W, C)"}; + } + + std::uint8_t* p_output_image = output.Allocate(dimensions); + auto* input_data = input.Data(); + auto h = dimensions[0]; + auto w = dimensions[1]; + auto c = dimensions[2]; + + // convert BGR channel layouts to RGB + for (int64_t j = 0; j < h; ++j) { + for (int64_t k = 0; k < w; ++k) { + auto c0_index = j * w * c + k * c; + std::tie(p_output_image[c0_index], p_output_image[c0_index + 1], p_output_image[c0_index + 2]) = + std::make_tuple(input_data[c0_index + 2], input_data[c0_index + 1], input_data[c0_index]); + } + } + + return {}; +} + +struct Resize { + template + OrtxStatus Init(const DictT& attrs) { + for (const auto& [key, value] : attrs) { + if (key == "height") { + height_ = std::get(value); + } else if (key == "width") { + width_ = std::get(value); + } else if (key == "interpolation") { + interpolation_ = std::get(value); + if (interpolation_ != "NEAREST" && interpolation_ != "LINEAR" && interpolation_ != "CUBIC") { + return {kOrtxErrorInvalidArgument, "[Resize]: Invalid interpolation method"}; + } + } else { + return {kOrtxErrorInvalidArgument, "[Resize]: Invalid argument"}; + } + } + return {}; + } + + OrtxStatus Compute(const ortc::Tensor& input, ortc::Tensor& output) { + auto& dimensions = input.Shape(); + if (dimensions.size() != 3ULL) { + return {kOrtxErrorInvalidArgument, "[Resize]: Only raw image formats"}; + } + + auto* input_data = input.Data(); + int h = static_cast(dimensions[0]); + int w = static_cast(dimensions[1]); + int c = static_cast(dimensions[2]); + + cv::Mat image(h, w, CV_8UC3, const_cast(input_data)); + cv::Mat output_image; + cv::InterpolationFlags interp{}; + if (interpolation_ == "NEAREST") { + interp = cv::INTER_NEAREST; + } else if (interpolation_ == "LINEAR") { + interp = cv::INTER_LINEAR; + } else if (interpolation_ == "CUBIC") { + interp = cv::INTER_CUBIC; + } else { + return {kOrtxErrorInvalidArgument, "[Resize]: Invalid interpolation method"}; + } + + cv::resize(image, output_image, {static_cast(width_), static_cast(height_)}, 0.0, 0.0, interp); + + auto* p_output_image = output.Allocate({height_, width_, c}); + std::memcpy(p_output_image, output_image.data, height_ * width_ * c); + + return {}; + } + + private: + int64_t height_{256}; + int64_t width_{256}; + std::string interpolation_{"CUBIC"}; // LINEAR, NEAREST, CUBIC +}; + +struct Rescale { + template + OrtxStatus Init(const DictT& attrs) { + for (const auto& [key, value] : attrs) { + if (key == "scale") { + scale_ = static_cast(std::get(value)); + } else { + return {kOrtxErrorInvalidArgument, "[Rescale]: Invalid argument"}; + } + } -constexpr float OPENAI_CLIP_MEAN[] = {0.48145466f, 0.4578275f, 0.40821073f}; -constexpr float OPENAI_CLIP_STD[] = {0.26862954f, 0.26130258f, 0.27577711f}; + return {}; + } -struct ConvertToRGB { - OrtxStatus Compute(const ortc::Tensor& input, - ortc::Tensor& output) { + OrtxStatus Compute(const ortc::Tensor& input, ortc::Tensor& output) { auto& dimensions = input.Shape(); - if (dimensions.size() != 3ULL || dimensions[2] != 3) { - return {kOrtxErrorInvalidArgument, "[ConvertToRGB]: input is not (H, W, C)"}; + if (dimensions.size() != 3ULL) { // Only raw image formats + return {kOrtxErrorInvalidArgument, "[Rescale]: Only raw image formats"}; } - std::uint8_t* p_output_image = output.Allocate(dimensions); auto* input_data = input.Data(); auto h = dimensions[0]; auto w = dimensions[1]; auto c = dimensions[2]; + auto* p_output_image = output.Allocate({h, w, c}); - // convert BGR channel layouts to RGB for (int64_t j = 0; j < h; ++j) { for (int64_t k = 0; k < w; ++k) { auto c0_index = j * w * c + k * c; - std::tie(p_output_image[c0_index], p_output_image[c0_index + 1], p_output_image[c0_index + 2]) = - std::make_tuple(input_data[c0_index + 2], input_data[c0_index + 1], input_data[c0_index]); + for (int64_t l = 0; l < c; ++l) { + p_output_image[c0_index + l] = input_data[c0_index + l] * scale_; + } } } return {}; } -}; -inline cv::Mat padding_336(const cv::Mat& image) { - // def padding_336(b): - // width, height = b.size - // tar = int(np.ceil(height / 336) * 336) - // top_padding = int((tar - height)/2) - // bottom_padding = tar - height - top_padding - // left_padding = 0 - // right_padding = 0 - // b = torchvision.transforms.functional.pad(b, [left_padding, top_padding, right_padding, bottom_padding], fill=[255,255,255]) - - // return b - float height = static_cast(image.rows); - int32_t tar = static_cast(std::ceil(height / image_resized_height) * image_resized_height); - int32_t top_padding = static_cast((tar - height) / 2); - int32_t bottom_padding = tar - image.rows - top_padding; - - cv::Mat output; - cv::copyMakeBorder(image, output, top_padding, bottom_padding, 0, 0, cv::BORDER_CONSTANT, {255, 255, 255}); - return output; -} - -inline cv::Mat hd_transform(const cv::Mat& image, int hd_num) { - // width, height = img.size - auto [width, height] = std::make_tuple(image.cols, image.rows); + private: + float scale_{1.0f / 255.0f}; +}; - // ratio = width / height if width >= height else height / width - float ratio = 1.0f * width; - if (width >= height) { - ratio /= height; - } else { - ratio = 1.0f * height / width; - } +struct Normalize { + template + OrtxStatus Init(const DictT& attrs) { + for (const auto& [key, value] : attrs) { + if (key == "mean") { + auto mean = std::get>(value); + mean_ = {static_cast(mean[0]), static_cast(mean[1]), static_cast(mean[2])}; + } else if (key == "std") { + auto std = std::get>(value); + std_ = {static_cast(std[0]), static_cast(std[1]), static_cast(std[2])}; + } else { + return {kOrtxErrorInvalidArgument, "[Normalize]: Invalid argument"}; + } + } - // scale = 1 - // while scale * np.ceil(scale / ratio) <= hd_num: - // scale += 1 - // scale -= 1 - int scale = 1; - while (scale * std::ceil(scale / ratio) <= hd_num) { - scale += 1; + return {}; } - scale -= 1; - // new_w = int(scale * 336) - // new_h = int(new_w / ratio) - int64_t new_w = scale * image_resized_width; - int64_t new_h = static_cast(new_w / ratio); + OrtxStatus Compute(const ortc::Tensor& input, ortc::Tensor& output) { + auto& dimensions = input.Shape(); + if (dimensions.size() != 3ULL) { + return {kOrtxErrorInvalidArgument, "[Normalize]: Only raw image formats"}; + } - // if width < height: - // new_w, new_h = new_h, new_w - if (width < height) { - std::swap(new_w, new_h); - } + auto* input_data = input.Data(); + auto h = dimensions[0]; + auto w = dimensions[1]; + auto c = dimensions[2]; + auto* p_output_image = output.Allocate({h, w, c}); - // img = torchvision.transforms.functional.resize(img, [new_h, new_w]) - std::vector height_x_width{static_cast(new_h), // H - static_cast(new_w)}; // W + for (int64_t j = 0; j < h; ++j) { + for (int64_t k = 0; k < w; ++k) { + auto c0_index = j * w * c + k * c; + for (int64_t l = 0; l < c; ++l) { + p_output_image[c0_index + l] = (input_data[c0_index + l] - mean_[l]) / std_[l]; + } + } + } - cv::Mat output_image; - cv::resize(image, output_image, - {static_cast(new_w), static_cast(new_h)}, 0.0, 0.0, - cv::INTER_LINEAR); - // img = padding_336(img) - return padding_336(output_image); -} + return {}; + } -// Function to calculate 1D index from 3D indices -inline size_t Index3D(size_t i, size_t j, size_t k, size_t dim1, size_t dim2, size_t dim3) { - return i * dim2 * dim3 + j * dim3 + k; -} + private: + std::vector mean_{0.48145466f, 0.4578275f, 0.40821073f}; + std::vector std_{0.26862954f, 0.26130258f, 0.27577711f}; +}; -// Function to permute 3D array stored in 1D array from (X, Y, Z) to (Z, X, Y) -inline void Permute3DArray(const float* array, float* permutedArray, size_t X, size_t Y, size_t Z) { - for (size_t x = 0; x < X; ++x) { - for (size_t y = 0; y < Y; ++y) { - for (size_t z = 0; z < Z; ++z) { - size_t oldIndex = Index3D(x, y, z, X, Y, Z); - size_t newIndex = Index3D(z, x, y, Z, X, Y); - permutedArray[newIndex] = array[oldIndex]; +struct CenterCrop { + template + OrtxStatus Init(const DictT& attrs) { + for (const auto& [key, value] : attrs) { + if (key == "height") { + target_h_ = std::get(value); + } else if (key == "width") { + target_w_ = std::get(value); + } else { + return {kOrtxErrorInvalidArgument, "[CenterCrop]: Invalid attribute " + key}; } } - } -} -inline OrtxStatus phi3_hd_transform(const ortc::Tensor& input, - ortc::Tensor& pixel_values, - ortc::Tensor& image_sizes, - ortc::Tensor& num_img_takens) { - auto& dimensions = input.Shape(); - if (dimensions.size() != 3ULL) { - return {kOrtxErrorInvalidArgument, "[hd_transform]: Only raw image formats"}; + return {}; } - // Normalize the pixel value with mean and var - auto input_data = input.Data(); - int32_t h = static_cast(dimensions[0]); - int32_t w = static_cast(dimensions[1]); - int32_t c = static_cast(dimensions[2]); - std::vector height_x_width{static_cast(h), // H - static_cast(w)}; // W - - cv::Mat rgb_image(height_x_width, CV_8UC3, const_cast(input_data)); - // elems = [HD_transform(im, hd_num = self.num_crops) for im in images] - auto elem = hd_transform(rgb_image, max_crops); - // # tensor transform and normalize - // hd_images = [img_processor(im) for im in elems] - std::tie(w, h) = std::make_tuple(elem.cols, elem.rows); - auto elem_image = elem.data; - auto rgb_image_ptr = std::make_unique(h * w * c); - auto p_pixel_values = rgb_image_ptr.get(); - for (int64_t j = 0; j < h; ++j) { - for (int64_t k = 0; k < w; ++k) { - auto c0_index = j * w * c + k * c; - p_pixel_values[c0_index] = (static_cast(elem_image[c0_index]) / 255.f - OPENAI_CLIP_MEAN[0]) / OPENAI_CLIP_STD[0]; - p_pixel_values[c0_index + 1] = (static_cast(elem_image[c0_index + 1]) / 255.f - OPENAI_CLIP_MEAN[1]) / OPENAI_CLIP_STD[1]; - p_pixel_values[c0_index + 2] = (static_cast(elem_image[c0_index + 2]) / 255.f - OPENAI_CLIP_MEAN[2]) / OPENAI_CLIP_STD[2]; + // # T.CenterCrop(224), + // width, height = self.target_size, self.target_size + // img_h, img_w = img.shape[-2:] + // s_h = torch.div((img_h - height), 2, rounding_mode='trunc') + // s_w = torch.div((img_w - width), 2, rounding_mode='trunc') + // x = img[:, :, s_h:s_h + height, s_w:s_w + width] + + OrtxStatus Compute(const ortc::Tensor& input, ortc::Tensor& output) { + auto& dimensions = input.Shape(); + if (dimensions.size() != 3ULL) { + return {kOrtxErrorInvalidArgument, "[CenterCrop]: Only raw image formats"}; } - } - // Debug code to check the image parity - // auto rgb_image_ptr_debug = std::make_unique(h * w * c); - // Permute3DArray(p_pixel_values, rgb_image_ptr_debug.get(), h, w, c); - - cv::Mat hd_image(h, w, CV_32FC3, p_pixel_values); - // # create global image - // global_image = [torch.nn.functional.interpolate(im.unsqueeze(0).float(), size=(336, 336), mode='bicubic',).to(im.dtype) for im in hd_images] - cv::Mat global_image; - cv::resize(hd_image, global_image, {image_resized_height, image_resized_width}, 0.0, 0.0, cv::INTER_CUBIC); - - int64_t shape[2]; - // # [(3, h, w)], where h, w is multiple of 336 - // shapes = [[im.size(1), im.size(2)] for im in hd_images] - { - auto shapes = image_sizes.Allocate({2}); - shapes[0] = shape[0] = hd_image.rows; - shapes[1] = shape[1] = hd_image.cols; - } - // num_img_tokens = [int((h//336*w//336+1)*144 + 1 + (h//336+1)*12) for h, w in shapes] - { - auto n_tokens = num_img_takens.Allocate({1}); - auto [h_t, w_t] = std::make_tuple(image_sizes.Data()[0], image_sizes.Data()[1]); - auto num_t = (static_cast( - static_cast(h_t / image_resized_height) * w_t / image_resized_width) + - 1) * - 144 + - 1 + static_cast(h_t / image_resized_height + 1) * 12; - *n_tokens = static_cast(num_t); - } - // # reshape to channel dimension -> (num_images, num_crops, 3, 336, 336) - // # (1, 3, h//336, 336, w//336, 336) -> (1, h//336, w//336, 3, 336, 336) -> (h//336*w//336, 3, 336, 336) - // hd_images_reshape = [im.reshape(1, 3, h//336, 336, w//336, 336).permute(0,2,4,1,3,5).reshape(-1, 3, 336, 336).contiguous() for im, (h, w) in zip(hd_images, shapes)] - // # concat global image and local image - // hd_images_reshape = [torch.cat([_global_image] + [_im], dim=0) for _global_image, _im in zip(global_image, hd_images_reshape)] - // # pad to max_num_crops - // image_transformed = [pad_to_max_num_crops_tensor(im, self.num_crops+1) for im in hd_images_reshape] - // image_transformed = torch.stack(image_transformed, dim=0) - // padded_images = image_transformed - std::vector padded_image_shape = {max_crops + 1, 3, image_resized_height, image_resized_width}; - float* output_pixel = pixel_values.Allocate(padded_image_shape); - // Copy the image pixel value from the global image - const int image_c_size = image_resized_height * image_resized_width * 3; - Permute3DArray(reinterpret_cast(global_image.data), output_pixel, image_resized_height, image_resized_width, 3); - auto num_crops = static_cast((shape[0] / image_resized_height) * (shape[1] / image_resized_width)); - float* image_transformed = reinterpret_cast(hd_image.data); - // for (int i = 0; i < num_crops; ++i) { - // Permute3DArray(image_transformed + i * image_c_size, output_pixel + (i + 1) * image_c_size, image_resized_height, image_resized_width, 3); - // } - - float* output_pixel_n_1 = output_pixel + image_c_size; - int m = static_cast(shape[0] / image_resized_height); - int n = static_cast(shape[1] / image_resized_width); - h = image_resized_height; - w = image_resized_width; - assert(m * n == num_crops); - for (int i = 0; i < m; ++i) { - for (int j = 0; j < n; ++j) { - int sub_index = (i * n + j) * image_c_size; - for (int x = 0; x < image_resized_height; ++x) { - for (int y = 0; y < image_resized_width; ++y) { - for (int k = 0; k < 3; ++k) { // Loop over channels - output_pixel_n_1[sub_index + k * h * w + x * w + y] = image_transformed[((i * h + x) * shape[1] + (j * w + y)) * 3 + k]; - } + auto* input_data = input.Data(); + auto h = dimensions[0]; + auto w = dimensions[1]; + auto c = dimensions[2]; + + auto* p_output_image = output.Allocate({target_h_, target_w_, c}); + auto s_h = (h - target_h_) / 2; + auto s_w = (w - target_w_) / 2; + + for (int64_t j = 0; j < target_h_; ++j) { + for (int64_t k = 0; k < target_w_; ++k) { + auto c0_index = (j + s_h) * w * c + (k + s_w) * c; + for (int64_t l = 0; l < c; ++l) { + p_output_image[j * target_w_ * c + k * c + l] = input_data[c0_index + l]; } } } - } - // padding the rest of the crops - // pad = torch.zeros(max_crops - B, 3, H, W, dtype=images.dtype, device=images.device) - memset(output_pixel_n_1 + num_crops * image_c_size, 0, image_c_size * (max_crops - num_crops) * sizeof(float)); + return {}; + } - // image_sizes = shapes - return {}; -} + private: + int64_t target_h_{224}; + int64_t target_w_{224}; +}; diff --git a/shared/api/image_transforms_phi_3.hpp b/shared/api/image_transforms_phi_3.hpp new file mode 100644 index 000000000..172793ae4 --- /dev/null +++ b/shared/api/image_transforms_phi_3.hpp @@ -0,0 +1,209 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "ocos.h" + +constexpr int max_crops = 16; +constexpr int num_img_tokens = 144; +constexpr int image_resized_width = 336; +constexpr int image_resized_height = 336; + +constexpr float OPENAI_CLIP_MEAN[] = {0.48145466f, 0.4578275f, 0.40821073f}; +constexpr float OPENAI_CLIP_STD[] = {0.26862954f, 0.26130258f, 0.27577711f}; + +inline cv::Mat padding_336(const cv::Mat& image) { + // def padding_336(b): + // width, height = b.size + // tar = int(np.ceil(height / 336) * 336) + // top_padding = int((tar - height)/2) + // bottom_padding = tar - height - top_padding + // left_padding = 0 + // right_padding = 0 + // b = torchvision.transforms.functional.pad(b, [left_padding, top_padding, right_padding, bottom_padding], fill=[255,255,255]) + + // return b + float height = static_cast(image.rows); + int32_t tar = static_cast(std::ceil(height / image_resized_height) * image_resized_height); + int32_t top_padding = static_cast((tar - height) / 2); + int32_t bottom_padding = tar - image.rows - top_padding; + + cv::Mat output; + cv::copyMakeBorder(image, output, top_padding, bottom_padding, 0, 0, cv::BORDER_CONSTANT, {255, 255, 255}); + return output; +} + +inline cv::Mat hd_transform(const cv::Mat& image, int hd_num) { + // width, height = img.size + auto [width, height] = std::make_tuple(image.cols, image.rows); + + // ratio = width / height if width >= height else height / width + float ratio = 1.0f * width; + if (width >= height) { + ratio /= height; + } else { + ratio = 1.0f * height / width; + } + + // scale = 1 + // while scale * np.ceil(scale / ratio) <= hd_num: + // scale += 1 + // scale -= 1 + int scale = 1; + while (scale * std::ceil(scale / ratio) <= hd_num) { + scale += 1; + } + scale -= 1; + + // new_w = int(scale * 336) + // new_h = int(new_w / ratio) + int64_t new_w = scale * image_resized_width; + int64_t new_h = static_cast(new_w / ratio); + + // if width < height: + // new_w, new_h = new_h, new_w + if (width < height) { + std::swap(new_w, new_h); + } + + // img = torchvision.transforms.functional.resize(img, [new_h, new_w]) + std::vector height_x_width{static_cast(new_h), // H + static_cast(new_w)}; // W + + cv::Mat output_image; + cv::resize(image, output_image, + {static_cast(new_w), static_cast(new_h)}, 0.0, 0.0, + cv::INTER_LINEAR); + // img = padding_336(img) + return padding_336(output_image); +} + +// Function to calculate 1D index from 3D indices +inline size_t Index3D(size_t i, size_t j, size_t k, size_t dim1, size_t dim2, size_t dim3) { + return i * dim2 * dim3 + j * dim3 + k; +} + +// Function to permute 3D array stored in 1D array from (X, Y, Z) to (Z, X, Y) +inline void Permute3DArray(const float* array, float* permutedArray, size_t X, size_t Y, size_t Z) { + for (size_t x = 0; x < X; ++x) { + for (size_t y = 0; y < Y; ++y) { + for (size_t z = 0; z < Z; ++z) { + size_t oldIndex = Index3D(x, y, z, X, Y, Z); + size_t newIndex = Index3D(z, x, y, Z, X, Y); + permutedArray[newIndex] = array[oldIndex]; + } + } + } +} + +inline OrtxStatus phi3_hd_transform(const ortc::Tensor& input, + ortc::Tensor& pixel_values, + ortc::Tensor& image_sizes, + ortc::Tensor& num_img_takens) { + auto& dimensions = input.Shape(); + if (dimensions.size() != 3ULL) { + return {kOrtxErrorInvalidArgument, "[hd_transform]: Only raw image formats"}; + } + + // Normalize the pixel value with mean and var + auto input_data = input.Data(); + int32_t h = static_cast(dimensions[0]); + int32_t w = static_cast(dimensions[1]); + int32_t c = static_cast(dimensions[2]); + std::vector height_x_width{static_cast(h), // H + static_cast(w)}; // W + + cv::Mat rgb_image(height_x_width, CV_8UC3, const_cast(input_data)); + // elems = [HD_transform(im, hd_num = self.num_crops) for im in images] + auto elem = hd_transform(rgb_image, max_crops); + // # tensor transform and normalize + // hd_images = [img_processor(im) for im in elems] + std::tie(w, h) = std::make_tuple(elem.cols, elem.rows); + auto elem_image = elem.data; + auto rgb_image_ptr = std::make_unique(h * w * c); + auto p_pixel_values = rgb_image_ptr.get(); + for (int64_t j = 0; j < h; ++j) { + for (int64_t k = 0; k < w; ++k) { + auto c0_index = j * w * c + k * c; + p_pixel_values[c0_index] = (static_cast(elem_image[c0_index]) / 255.f - OPENAI_CLIP_MEAN[0]) / OPENAI_CLIP_STD[0]; + p_pixel_values[c0_index + 1] = (static_cast(elem_image[c0_index + 1]) / 255.f - OPENAI_CLIP_MEAN[1]) / OPENAI_CLIP_STD[1]; + p_pixel_values[c0_index + 2] = (static_cast(elem_image[c0_index + 2]) / 255.f - OPENAI_CLIP_MEAN[2]) / OPENAI_CLIP_STD[2]; + } + } + + // Debug code to check the image parity + // auto rgb_image_ptr_debug = std::make_unique(h * w * c); + // Permute3DArray(p_pixel_values, rgb_image_ptr_debug.get(), h, w, c); + + cv::Mat hd_image(h, w, CV_32FC3, p_pixel_values); + // # create global image + // global_image = [torch.nn.functional.interpolate(im.unsqueeze(0).float(), size=(336, 336), mode='bicubic',).to(im.dtype) for im in hd_images] + cv::Mat global_image; + cv::resize(hd_image, global_image, {image_resized_height, image_resized_width}, 0.0, 0.0, cv::INTER_CUBIC); + + int64_t shape[2]; + // # [(3, h, w)], where h, w is multiple of 336 + // shapes = [[im.size(1), im.size(2)] for im in hd_images] + { + auto shapes = image_sizes.Allocate({2}); + shapes[0] = shape[0] = hd_image.rows; + shapes[1] = shape[1] = hd_image.cols; + } + // num_img_tokens = [int((h//336*w//336+1)*144 + 1 + (h//336+1)*12) for h, w in shapes] + { + auto n_tokens = num_img_takens.Allocate({1}); + auto [h_t, w_t] = std::make_tuple(image_sizes.Data()[0], image_sizes.Data()[1]); + auto num_t = (static_cast( + static_cast(h_t / image_resized_height) * w_t / image_resized_width) + + 1) * + 144 + + 1 + static_cast(h_t / image_resized_height + 1) * 12; + *n_tokens = static_cast(num_t); + } + // # reshape to channel dimension -> (num_images, num_crops, 3, 336, 336) + // # (1, 3, h//336, 336, w//336, 336) -> (1, h//336, w//336, 3, 336, 336) -> (h//336*w//336, 3, 336, 336) + // hd_images_reshape = [im.reshape(1, 3, h//336, 336, w//336, 336).permute(0,2,4,1,3,5).reshape(-1, 3, 336, 336).contiguous() for im, (h, w) in zip(hd_images, shapes)] + // # concat global image and local image + // hd_images_reshape = [torch.cat([_global_image] + [_im], dim=0) for _global_image, _im in zip(global_image, hd_images_reshape)] + // # pad to max_num_crops + // image_transformed = [pad_to_max_num_crops_tensor(im, self.num_crops+1) for im in hd_images_reshape] + // image_transformed = torch.stack(image_transformed, dim=0) + // padded_images = image_transformed + std::vector padded_image_shape = {max_crops + 1, 3, image_resized_height, image_resized_width}; + float* output_pixel = pixel_values.Allocate(padded_image_shape); + // Copy the image pixel value from the global image + const int image_c_size = image_resized_height * image_resized_width * 3; + Permute3DArray(reinterpret_cast(global_image.data), output_pixel, image_resized_height, image_resized_width, 3); + auto num_crops = static_cast((shape[0] / image_resized_height) * (shape[1] / image_resized_width)); + float* image_transformed = reinterpret_cast(hd_image.data); + // for (int i = 0; i < num_crops; ++i) { + // Permute3DArray(image_transformed + i * image_c_size, output_pixel + (i + 1) * image_c_size, image_resized_height, image_resized_width, 3); + // } + + float* output_pixel_n_1 = output_pixel + image_c_size; + int m = static_cast(shape[0] / image_resized_height); + int n = static_cast(shape[1] / image_resized_width); + h = image_resized_height; + w = image_resized_width; + assert(m * n == num_crops); + for (int i = 0; i < m; ++i) { + for (int j = 0; j < n; ++j) { + int sub_index = (i * n + j) * image_c_size; + for (int x = 0; x < image_resized_height; ++x) { + for (int y = 0; y < image_resized_width; ++y) { + for (int k = 0; k < 3; ++k) { // Loop over channels + output_pixel_n_1[sub_index + k * h * w + x * w + y] = image_transformed[((i * h + x) * shape[1] + (j * w + y)) * 3 + k]; + } + } + } + } + } + + // padding the rest of the crops + // pad = torch.zeros(max_crops - B, 3, H, W, dtype=images.dtype, device=images.device) + memset(output_pixel_n_1 + num_crops * image_c_size, 0, image_c_size * (max_crops - num_crops) * sizeof(float)); + + // image_sizes = shapes + return {}; +} diff --git a/shared/api/runner.hpp b/shared/api/runner.hpp index ba5991400..3590190bb 100644 --- a/shared/api/runner.hpp +++ b/shared/api/runner.hpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include "nlohmann/json.hpp" @@ -16,6 +17,7 @@ namespace ort_extensions { using json = nlohmann::json; +using TensorPtr = std::unique_ptr; using TensorArgs = std::vector; class KernelDef { @@ -26,6 +28,9 @@ class KernelDef { virtual TensorArgs AllocateOutput(ortc::IAllocator* allocator) const = 0; virtual OrtxStatus Apply(TensorArgs& inputs, TensorArgs& output) const = 0; + using AttrType = std::variant>; + using AttrDict = std::unordered_map; + template using tuple_function_args = std::tuple::type*...>; @@ -50,14 +55,14 @@ class KernelDef { } template - static typename std::enable_if::value, ortc::TensorBase*>::type - AllocateTensor(ortc::IAllocator* allocator) { + static typename std::enable_if::value, ortc::TensorBase*>::type AllocateTensor( + ortc::IAllocator* allocator) { return nullptr; } template - static typename std::enable_if::value, ortc::TensorBase*>::type - AllocateTensor(ortc::IAllocator* allocator) { + static typename std::enable_if::value, ortc::TensorBase*>::type AllocateTensor( + ortc::IAllocator* allocator) { return std::make_unique(allocator).release(); } @@ -70,18 +75,17 @@ class KernelDef { static std::vector AllocateOutput(ortc::IAllocator* allocator) { using tuple_no_ref = std::tuple::type...>; auto result = AllocateTuple(allocator, (tuple_no_ref*)0); - return std::apply([](auto&&... elems) { return std::vector{std::forward(elems)...}; }, std::move(result)); + return std::apply( + [](auto&&... elems) { return std::vector{std::forward(elems)...}; }, + std::move(result)); } - static auto CastOutputAllType(TensorArgs::iterator tensor) { - return std::make_tuple(); - } + static auto CastOutputAllType(TensorArgs::iterator tensor) { return std::make_tuple(); } template static auto CastOutputAllType(TensorArgs::iterator tensor, T& arg, Args&... args) { // return std::make_tuple(static_cast(*tensor), CastOutputAllType(args...)); - return std::tuple_cat(CastOutputImpl(tensor), - CastOutputAllType(tensor + 1, args...)); + return std::tuple_cat(CastOutputImpl(tensor), CastOutputAllType(tensor + 1, args...)); } template @@ -115,15 +119,14 @@ class KernelFunction : public KernelDef { all_args.insert(all_args.end(), inputs.begin(), inputs.end()); all_args.insert(all_args.end(), outputs.begin(), outputs.end()); auto args_tuple = std::tuple_cat(CastTensors(all_args)); - return std::apply([this](auto&&... args) { return this->Compute(std::forward(*args)...); }, std::move(args_tuple)); + return std::apply([this](auto&&... args) { return this->Compute(std::forward(*args)...); }, + std::move(args_tuple)); } private: std::function body_; - OrtxStatus Compute(Args... args) const { - return body_(std::forward(args)...); - } + OrtxStatus Compute(Args... args) const { return body_(std::forward(args)...); } }; template @@ -144,10 +147,34 @@ class KernelStruct : public KernelDef { return all_args; } - template - OrtxStatus Init(DT attr) { + OrtxStatus Init(std::string_view attr_str) override { instance_ = std::make_unique(); - return instance_->Init(std::move(attr)); + + AttrDict attr_dict; + if (attr_str.empty()) { + return instance_->Init(attr_dict); + } + + auto attr = json::parse(attr_str, nullptr, false); + if (attr.is_discarded()) { + return {kOrtxErrorCorruptData, "Failed to parse JSON for kernel attributes."}; + } + attr_dict.reserve(attr.size()); + for (auto& [key, value] : attr.items()) { + if (value.is_string()) { + attr_dict[key] = value.template get(); + } else if (value.is_number_integer() || value.is_number_unsigned()) { + attr_dict[key] = value.template get(); + } else if (value.is_number_float()) { + attr_dict[key] = value.template get(); + } else if (value.is_array()) { + attr_dict[key] = value.template get>(); + } else { + return {kOrtxErrorCorruptData, "Invalid attribute type."}; + } + } + + return instance_->Init(attr_dict); } OrtxStatus Apply(TensorArgs& inputs, TensorArgs& outputs) const override { @@ -156,8 +183,9 @@ class KernelStruct : public KernelDef { all_args.insert(all_args.end(), inputs.begin(), inputs.end()); all_args.insert(all_args.end(), outputs.begin(), outputs.end()); auto args_tuple = std::tuple_cat(CastTensors(all_args)); - return std::apply([this](auto&&... args) { - return (instance_.get()->*body_)(std::forward(*args)...); }, std::move(args_tuple)); + return std::apply( + [this](auto&&... args) { return (instance_.get()->*body_)(std::forward(*args)...); }, + std::move(args_tuple)); } private: @@ -207,32 +235,36 @@ class Operation { op_name_ = op_name; kernel_ = kernel_iter->second(); + std::string attr_str; if (op_json.contains("attrs")) { auto attrs = op_json.at("attrs"); - auto status = kernel_->Init(attrs.dump()); - if (!status.IsOk()) { - return status; - } + attr_str = attrs.dump(); } - return {}; + return kernel_->Init(attr_str); } - virtual ~Operation() { - ResetTensors(allocator_); - } + virtual ~Operation() { ResetTensors(allocator_); } - std::tuple> - Apply(ortc::IAllocator* allocator, std::vector inputs) { + std::tuple> Apply(ortc::IAllocator* allocator, + std::vector inputs) { auto outputs = kernel_->AllocateOutput(allocator); auto status = kernel_->Apply(inputs, outputs); return std::make_tuple(status, outputs); } - void ResetTensors(ortc::IAllocator* allocator) { - outputs_.clear(); + std::vector AllocateOutputs(ortc::IAllocator* allocator) { + auto tensors = kernel_->AllocateOutput(allocator); + std::vector outputs; + for (auto& tensor : tensors) { + outputs.push_back(std::unique_ptr(tensor)); + } + + return outputs; } + void ResetTensors(ortc::IAllocator* allocator) { outputs_.clear(); } + private: std::vector> outputs_; diff --git a/test/data/processor/clip_image.json b/test/data/processor/clip_image.json new file mode 100644 index 000000000..6891de8e4 --- /dev/null +++ b/test/data/processor/clip_image.json @@ -0,0 +1,59 @@ +{ + "processor": { + "name": "image_processing", + "transforms": [ + { + "operation": { + "name": "decode_image", + "type": "DecodeImage", + "attrs": { + "color_space": "BGR" + } + } + }, + { + "operation": { + "name": "convert_to_rgb", + "type": "ConvertRGB" + } + }, + { + "operation": { + "name": "resize", + "type": "Resize", + "attrs": { + "interpolation": "CUBIC", + "width": 256, + "height": 256 + } + } + }, + { + "operation": { + "name": "center_crop", + "type": "CenterCrop", + "attrs": { + "width": 224, + "height": 224 + } + } + }, + { + "operation": { + "name": "re-scale", + "type": "Rescale" + } + }, + { + "operation": { + "name": "normalize", + "type": "Normalize", + "attrs": { + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225] + } + } + } + ] + } +} diff --git a/test/data/processor/image_processor.json b/test/data/processor/phi_3_image.json similarity index 100% rename from test/data/processor/image_processor.json rename to test/data/processor/phi_3_image.json diff --git a/test/pp_api_test/test_processor.cc b/test/pp_api_test/test_processor.cc index 076c3b9d7..df06e54e8 100644 --- a/test/pp_api_test/test_processor.cc +++ b/test/pp_api_test/test_processor.cc @@ -7,6 +7,7 @@ #include #include "gtest/gtest.h" +#include "ortx_c_helper.h" #include "shared/api/image_processor.h" using namespace ort_extensions; @@ -30,16 +31,13 @@ TEST(ProcessorTest, TestPhi3VImageProcessing) { auto [input_data, n_data] = ort_extensions::LoadRawImages( {"data/processor/standard_s.jpg", "data/processor/australia.jpg", "data/processor/exceltable.png"}); - auto proc = OrtxObjectPtr(OrtxCreateProcessor, "data/processor/image_processor.json"); + auto proc = OrtxObjectPtr(OrtxCreateProcessor, "data/processor/phi_3_image.json"); ortc::Tensor* pixel_values; ortc::Tensor* image_sizes; ortc::Tensor* num_img_tokens; - auto [status, r] = proc->PreProcess( - ort_extensions::span(input_data.get(), (size_t)n_data), - &pixel_values, - &image_sizes, - &num_img_tokens); + auto [status, r] = proc->PreProcess(ort_extensions::span(input_data.get(), (size_t)n_data), &pixel_values, + &image_sizes, &num_img_tokens); ASSERT_TRUE(status.IsOk()); int64_t expected_image_size[] = {1344, 1344, 1008, 1344, 1008, 1680}; @@ -72,3 +70,33 @@ TEST(ProcessorTest, TestPhi3VImageProcessing) { proc->ClearOutputs(&r); } + +TEST(ProcessorTest, TestClipImageProcessing) { + const char* images_path[] = {"data/processor/standard_s.jpg", "data/processor/australia.jpg", + "data/processor/exceltable.png"}; + OrtxObjectPtr raw_images; + extError_t err = OrtxLoadImages(ort_extensions::ptr(raw_images), images_path, 3, nullptr); + ASSERT_EQ(err, kOrtxOK); + + OrtxObjectPtr processor; + err = OrtxCreateProcessor(ort_extensions::ptr(processor), "data/processor/clip_image.json"); + if (err != kOrtxOK) { + std::cout << "Error: " << OrtxGetLastErrorMessage() << std::endl; + } + ASSERT_EQ(err, kOrtxOK); + + OrtxObjectPtr result; + err = OrtxImagePreProcess(processor.get(), raw_images.get(), ort_extensions::ptr(result)); + ASSERT_EQ(err, kOrtxOK); + + OrtxObjectPtr tensor; + err = OrtxImageGetTensorResult(result.get(), 0, ort_extensions::ptr(tensor)); + ASSERT_EQ(err, kOrtxOK); + + const float* data{}; + const int64_t* shape{}; + size_t num_dims; + err = OrtxGetTensorDataFloat(tensor.get(), &data, &shape, &num_dims); + ASSERT_EQ(err, kOrtxOK); + ASSERT_EQ(num_dims, 4); +} From 3b275b16bc7a11fb89f82546d1209a161ab057ca Mon Sep 17 00:00:00 2001 From: Wenbing Li <10278425+wenbingl@users.noreply.github.com> Date: Thu, 20 Jun 2024 15:18:17 -0700 Subject: [PATCH 3/3] Upgrade pybind11 2.12 to support both numpy 1.x and 2.x (#750) --- cgmanifest.json | 2 +- cmake/externals/pybind11.cmake | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cgmanifest.json b/cgmanifest.json index df867353f..7eabed5a8 100644 --- a/cgmanifest.json +++ b/cgmanifest.json @@ -144,7 +144,7 @@ "component": { "type": "git", "git": { - "commitHash": "80dc998efced8ceb2be59756668a7e90e8bef917", + "commitHash": "3e9dfa2866941655c56877882565e7577de6fc7b", "repositoryUrl": "https://github.com/pybind/pybind11.git" }, "comments": "v2.10.1" diff --git a/cmake/externals/pybind11.cmake b/cmake/externals/pybind11.cmake index c6460d06a..e8064bfaf 100644 --- a/cmake/externals/pybind11.cmake +++ b/cmake/externals/pybind11.cmake @@ -1,7 +1,7 @@ FetchContent_Declare( pybind11 - URL https://github.com/pybind/pybind11/archive/refs/tags/v2.10.1.zip - URL_HASH SHA1=769b6aa67a77f17a770960f604b727645b6f6a13 + URL https://github.com/pybind/pybind11/archive/refs/tags/v2.12.0.zip + URL_HASH SHA1=8482f57ed55c7b100672815a311d5450858723fb ) FetchContent_GetProperties(pybind11)