From 370430239c6a40b70969236337263da687845847 Mon Sep 17 00:00:00 2001 From: vibhatha Date: Tue, 5 Sep 2023 23:20:40 +0530 Subject: [PATCH 01/12] feat: initial draft --- java/source/index.rst | 1 + java/source/python_java.rst | 152 ++++++++++++++++++++++++++++++++++++ 2 files changed, 153 insertions(+) create mode 100644 java/source/python_java.rst diff --git a/java/source/index.rst b/java/source/index.rst index 63f94c0d..04962cd0 100644 --- a/java/source/index.rst +++ b/java/source/index.rst @@ -43,6 +43,7 @@ This cookbook is tested with Apache Arrow |version|. data avro jdbc + python_java Indices and tables ================== diff --git a/java/source/python_java.rst b/java/source/python_java.rst new file mode 100644 index 00000000..72bd508a --- /dev/null +++ b/java/source/python_java.rst @@ -0,0 +1,152 @@ +.. _arrow-python-java: + +======================== +PyArrow Java Integration +======================== + +PyArrow for Python is a strong API which is useful for certain Java application development. + +.. contents:: + +Dictionary Data Roundtrip +========================= + + To demonstrate how dictionary data can be used in both Python and Java, consider the following. + + Data is created in Python, then it will be accessed in Java and data will be updated. updated + data will be again used in Python and validated for consistency through out the entire application. + +Python Component: +----------------- + +.. code-block:: python + + import jpype + import jpype.imports + from jpype.types import * + import pyarrow as pa + from pyarrow.cffi import ffi as arrow_c + + # Init the JVM and make MapValuesV2 class available to Python. + jpype.startJVM(classpath=[ "../arrow-java-playground/target/*"]) + java_c_package = jpype.JPackage("org").apache.arrow.c + MapValuesConsumer = JClass('io.arrow.playground.python.MapValuesConsumer') + CDataDictionaryProvider = JClass('org.apache.arrow.c.CDataDictionaryProvider') + + # Starting from Python and generating data + + # Create a Python DictionaryArray + + dictionary = pa.dictionary(pa.int64(), pa.utf8()) + array = pa.array(["A", "B", "C", "A", "D"], dictionary) + print("From Python") + print("Dictionary Created: ", array) + + # create the CDataDictionaryProvider instance which is + # required to create dictionary array precisely + c_provider = CDataDictionaryProvider() + + consumer = MapValuesConsumer(c_provider) + + # Export the Python array through C Data + c_array = arrow_c.new("struct ArrowArray*") + c_array_ptr = int(arrow_c.cast("uintptr_t", c_array)) + array._export_to_c(c_array_ptr) + + # Export the Schema of the Array through C Data + c_schema = arrow_c.new("struct ArrowSchema*") + c_schema_ptr = int(arrow_c.cast("uintptr_t", c_schema)) + array.type._export_to_c(c_schema_ptr) + + # Send Array and its Schema to the Java function + # that will update the dictionary + consumer.update(c_array_ptr, c_schema_ptr) + + # Importing updated values from Java to Python + + # Export the Python array through C Data + updated_c_array = arrow_c.new("struct ArrowArray*") + updated_c_array_ptr = int(arrow_c.cast("uintptr_t", updated_c_array)) + + # Export the Schema of the Array through C Data + updated_c_schema = arrow_c.new("struct ArrowSchema*") + updated_c_schema_ptr = int(arrow_c.cast("uintptr_t", updated_c_schema)) + + java_wrapped_array = java_c_package.ArrowArray.wrap(updated_c_array_ptr) + java_wrapped_schema = java_c_package.ArrowSchema.wrap(updated_c_schema_ptr) + + java_c_package.Data.exportVector( + consumer.getAllocatorForJavaConsumer(), + consumer.getVector(), + c_provider, + java_wrapped_array, + java_wrapped_schema + ) + + print("From Java back to Python") + updated_array = pa.Array._import_from_c(updated_c_array_ptr, updated_c_schema_ptr) + + # In Java and Python, the same memory is being accessed through the C Data interface. + # Since the array from Java and array created in Python should have same data. + assert updated_array.equals(array) + print("Updated Array: ", updated_array) + + del updated_array + +In Python component, the following steps are taken to demonstrate the roundtrip process. + +1. Create Data in Python +2. Access Data from Java +3. Update Data from Java +4. Access Data from Python +5. Validate the data change + + +Java Component: +--------------- + +.. code-block:: java + + import org.apache.arrow.c.ArrowArray; + import org.apache.arrow.c.ArrowSchema; + import org.apache.arrow.c.Data; + import org.apache.arrow.c.CDataDictionaryProvider; + import org.apache.arrow.memory.BufferAllocator; + import org.apache.arrow.memory.RootAllocator; + import org.apache.arrow.vector.FieldVector; + import org.apache.arrow.vector.BigIntVector; + + + public class MapValuesConsumer { + private final static BufferAllocator allocator = new RootAllocator(); + private final CDataDictionaryProvider provider; + private FieldVector vector; + + public MapValuesConsumer(CDataDictionaryProvider provider) { + this.provider = provider; + } + + public static BufferAllocator getAllocatorForJavaConsumer() { + return allocator; + } + + public FieldVector getVector() { + return this.vector; + } + + public void update(long c_array_ptr, long c_schema_ptr) { + ArrowArray arrow_array = ArrowArray.wrap(c_array_ptr); + ArrowSchema arrow_schema = ArrowSchema.wrap(c_schema_ptr); + this.vector = Data.importVector(allocator, arrow_array, arrow_schema, this.provider); + this.doWorkInJava(vector); + } + + private void doWorkInJava(FieldVector vector) { + System.out.println("Doing work in Java"); + BigIntVector bigIntVector = (BigIntVector)vector; + bigIntVector.setSafe(0, 2); + } + } + +Java component access the data from Python and update the vector, +and this is later accessed in Python component. From 71877c8ca5fb855c44757e138adf7d0b5d20e9d2 Mon Sep 17 00:00:00 2001 From: vibhatha Date: Tue, 5 Sep 2023 23:46:05 +0530 Subject: [PATCH 02/12] fix: format --- java/source/python_java.rst | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/java/source/python_java.rst b/java/source/python_java.rst index 72bd508a..a3b4e5fa 100644 --- a/java/source/python_java.rst +++ b/java/source/python_java.rst @@ -4,21 +4,24 @@ PyArrow Java Integration ======================== -PyArrow for Python is a strong API which is useful for certain Java application development. +The PyArrow library offers a powerful API for Python that can be integrated with Java applications. +This document provides a guide on how to enable seamless data exchange between Python and Java components using PyArrow. .. contents:: Dictionary Data Roundtrip ========================= - To demonstrate how dictionary data can be used in both Python and Java, consider the following. + This section demonstrates a data roundtrip, where a dictionary array is created in Python, accessed and updated in Java, + and finally re-accessed and validated in Python for data consistency. - Data is created in Python, then it will be accessed in Java and data will be updated. updated - data will be again used in Python and validated for consistency through out the entire application. Python Component: ----------------- + The Python code uses jpype to start the JVM and make the Java class MapValuesConsumer available to Python. + Data is generated in PyArrow and exported through C Data to Java. + .. code-block:: python import jpype @@ -93,18 +96,19 @@ Python Component: del updated_array -In Python component, the following steps are taken to demonstrate the roundtrip process. +In the Python component, the following steps are executed to demonstrate the data roundtrip: -1. Create Data in Python -2. Access Data from Java -3. Update Data from Java -4. Access Data from Python -5. Validate the data change +1. Create data in Python +2. Export data to Java +3. Import updated data from Java +4. Validate the data consistency Java Component: --------------- + In the Java component, the MapValuesConsumer class receives data from the Python component through C Data. It then updates the data and sends it back to the Python component. + .. code-block:: java import org.apache.arrow.c.ArrowArray; @@ -148,5 +152,11 @@ Java Component: } } -Java component access the data from Python and update the vector, -and this is later accessed in Python component. +The Java component performs the following actions: + +1. Receives data from the Python component. +2. Updates the data. +3. Exports the updated data back to Python. + +By integrating PyArrow in Python and Java components, this example demonstrates that +a system can be created where data is shared and updated across both languages seamlessly. From 0b029e4279725475c91f3c7e232a74bbb7245b8a Mon Sep 17 00:00:00 2001 From: vibhatha Date: Tue, 5 Sep 2023 23:49:23 +0530 Subject: [PATCH 03/12] fix: text formatting --- java/source/python_java.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/java/source/python_java.rst b/java/source/python_java.rst index a3b4e5fa..24a79e5a 100644 --- a/java/source/python_java.rst +++ b/java/source/python_java.rst @@ -107,7 +107,8 @@ In the Python component, the following steps are executed to demonstrate the dat Java Component: --------------- - In the Java component, the MapValuesConsumer class receives data from the Python component through C Data. It then updates the data and sends it back to the Python component. + In the Java component, the MapValuesConsumer class receives data from the Python component through C Data. + It then updates the data and sends it back to the Python component. .. code-block:: java From 740a13958439692ee67e6503e2ff27d442ad6e4d Mon Sep 17 00:00:00 2001 From: vibhatha Date: Tue, 5 Sep 2023 23:51:38 +0530 Subject: [PATCH 04/12] fix: python code cleanup --- java/source/python_java.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/java/source/python_java.rst b/java/source/python_java.rst index 24a79e5a..fa76cc15 100644 --- a/java/source/python_java.rst +++ b/java/source/python_java.rst @@ -31,9 +31,9 @@ Python Component: from pyarrow.cffi import ffi as arrow_c # Init the JVM and make MapValuesV2 class available to Python. - jpype.startJVM(classpath=[ "../arrow-java-playground/target/*"]) + jpype.startJVM(classpath=[ "../target/*"]) java_c_package = jpype.JPackage("org").apache.arrow.c - MapValuesConsumer = JClass('io.arrow.playground.python.MapValuesConsumer') + MapValuesConsumer = JClass('MapValuesConsumer') CDataDictionaryProvider = JClass('org.apache.arrow.c.CDataDictionaryProvider') # Starting from Python and generating data From d2b0491375ef24b864052cda98871560c047e283 Mon Sep 17 00:00:00 2001 From: vibhatha Date: Wed, 6 Sep 2023 00:02:09 +0530 Subject: [PATCH 05/12] fix: adding python output --- java/source/python_java.rst | 38 +++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/java/source/python_java.rst b/java/source/python_java.rst index fa76cc15..8ebbe1bf 100644 --- a/java/source/python_java.rst +++ b/java/source/python_java.rst @@ -96,6 +96,44 @@ Python Component: del updated_array +.. code-block:: shell + + From Python + Dictionary Created: + -- dictionary: + [ + "A", + "B", + "C", + "D" + ] + -- indices: + [ + 0, + 1, + 2, + 0, + 3 + ] + Doing work in Java + From Java back to Python + Updated Array: + -- dictionary: + [ + "A", + "B", + "C", + "D" + ] + -- indices: + [ + 2, + 1, + 2, + 0, + 3 + ] + In the Python component, the following steps are executed to demonstrate the data roundtrip: 1. Create data in Python From c35eefda31f228df83a13e5b729e96f0caaf928b Mon Sep 17 00:00:00 2001 From: vibhatha Date: Wed, 6 Sep 2023 08:16:26 +0530 Subject: [PATCH 06/12] fix: minor wording issue --- java/source/python_java.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/source/python_java.rst b/java/source/python_java.rst index 8ebbe1bf..11917368 100644 --- a/java/source/python_java.rst +++ b/java/source/python_java.rst @@ -30,7 +30,7 @@ Python Component: import pyarrow as pa from pyarrow.cffi import ffi as arrow_c - # Init the JVM and make MapValuesV2 class available to Python. + # Init the JVM and make MapValuesConsumer class available to Python. jpype.startJVM(classpath=[ "../target/*"]) java_c_package = jpype.JPackage("org").apache.arrow.c MapValuesConsumer = JClass('MapValuesConsumer') From 46bba7484902c0ea9e1b38ce35ad5c590c03daaf Mon Sep 17 00:00:00 2001 From: vibhatha Date: Wed, 13 Sep 2023 10:10:37 +0530 Subject: [PATCH 07/12] fix: address reviews v1 --- java/source/c_data.rst | 12 ++++++++ java/source/index.rst | 2 +- java/source/python_java.rst | 55 ++++++++++++++++++++++++++++++++++++- 3 files changed, 67 insertions(+), 2 deletions(-) create mode 100644 java/source/c_data.rst diff --git a/java/source/c_data.rst b/java/source/c_data.rst new file mode 100644 index 00000000..2ccd84d6 --- /dev/null +++ b/java/source/c_data.rst @@ -0,0 +1,12 @@ +.. _c-data-java: + +================== +C Data Integration +================== + +C Data interface is an important aspect of supporting multiple languages in Apache Arrow. +A Java programme can seamlessly work with C++ and Python programmes. The following examples +demonstrates how it can be done. + +:ref:`arrow-python-java` +------------------------ diff --git a/java/source/index.rst b/java/source/index.rst index 04962cd0..ece43c38 100644 --- a/java/source/index.rst +++ b/java/source/index.rst @@ -43,7 +43,7 @@ This cookbook is tested with Apache Arrow |version|. data avro jdbc - python_java + c_data Indices and tables ================== diff --git a/java/source/python_java.rst b/java/source/python_java.rst index 11917368..54211e96 100644 --- a/java/source/python_java.rst +++ b/java/source/python_java.rst @@ -148,7 +148,7 @@ Java Component: In the Java component, the MapValuesConsumer class receives data from the Python component through C Data. It then updates the data and sends it back to the Python component. -.. code-block:: java +.. testcode:: java import org.apache.arrow.c.ArrowArray; import org.apache.arrow.c.ArrowSchema; @@ -164,6 +164,8 @@ Java Component: private final static BufferAllocator allocator = new RootAllocator(); private final CDataDictionaryProvider provider; private FieldVector vector; + private final static BigIntVector intVector = new BigIntVector("internal_test_vector", allocator); + public MapValuesConsumer(CDataDictionaryProvider provider) { this.provider = provider; @@ -184,13 +186,64 @@ Java Component: this.doWorkInJava(vector); } + public FieldVector updateFromJava(long c_array_ptr, long c_schema_ptr) { + ArrowArray arrow_array = ArrowArray.wrap(c_array_ptr); + ArrowSchema arrow_schema = ArrowSchema.wrap(c_schema_ptr); + vector = Data.importVector(allocator, arrow_array, arrow_schema, null); + this.doWorkInJava(vector); + return vector; + } + private void doWorkInJava(FieldVector vector) { System.out.println("Doing work in Java"); BigIntVector bigIntVector = (BigIntVector)vector; bigIntVector.setSafe(0, 2); } + + private static BigIntVector getIntVectorForJavaConsumers() { + intVector.allocateNew(3); + intVector.set(0, 1); + intVector.set(1, 7); + intVector.set(2, 93); + intVector.setValueCount(3); + return intVector; + } + + public static void simulateAsAJavaConsumers() { + CDataDictionaryProvider provider = new CDataDictionaryProvider(); + MapValueConsumerV2 mvc = new MapValueConsumerV2(provider);//FIXME! Use constructor with dictionary provider + try ( + ArrowArray arrowArray = ArrowArray.allocateNew(allocator); + ArrowSchema arrowSchema = ArrowSchema.allocateNew(allocator) + ) { + Data.exportVector(allocator, getIntVectorForJavaConsumers(), provider, arrowArray, arrowSchema); + FieldVector updatedVector = mvc.updateFromJava(arrowArray.memoryAddress(), arrowSchema.memoryAddress()); + try (ArrowArray usedArray = ArrowArray.allocateNew(allocator); + ArrowSchema usedSchema = ArrowSchema.allocateNew(allocator)) { + Data.exportVector(allocator, updatedVector, provider, usedArray, usedSchema); + try(FieldVector valueVectors = Data.importVector(allocator, usedArray, usedSchema, provider)) { + System.out.println(valueVectors); + } + } + } + } + + public static void close() { + intVector.close(); + } + + public static void main(String[] args) { + simulateAsAJavaConsumers(); + close(); + } } +.. testoutput:: + + Doing work in Java + [2, 7, 93] + + The Java component performs the following actions: 1. Receives data from the Python component. From 850805978d12cbe2437e67725166e7ebceb8cc8f Mon Sep 17 00:00:00 2001 From: vibhatha Date: Wed, 13 Sep 2023 10:53:55 +0530 Subject: [PATCH 08/12] fix: minor --- java/source/python_java.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/source/python_java.rst b/java/source/python_java.rst index 54211e96..99fd4b8d 100644 --- a/java/source/python_java.rst +++ b/java/source/python_java.rst @@ -148,7 +148,7 @@ Java Component: In the Java component, the MapValuesConsumer class receives data from the Python component through C Data. It then updates the data and sends it back to the Python component. -.. testcode:: java +.. testcode:: import org.apache.arrow.c.ArrowArray; import org.apache.arrow.c.ArrowSchema; From 9329cda0866479da5daa4b6ceaefbe1b334a3acf Mon Sep 17 00:00:00 2001 From: vibhatha Date: Thu, 14 Sep 2023 15:26:59 +0530 Subject: [PATCH 09/12] fix: addressing reviews --- java/source/python_java.rst | 67 ++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 30 deletions(-) diff --git a/java/source/python_java.rst b/java/source/python_java.rst index 99fd4b8d..b8f6cdce 100644 --- a/java/source/python_java.rst +++ b/java/source/python_java.rst @@ -158,20 +158,23 @@ Java Component: import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.BigIntVector; + import org.apache.arrow.util.AutoCloseables; - public class MapValuesConsumer { - private final static BufferAllocator allocator = new RootAllocator(); + class MapValuesConsumer implements AutoCloseable { + private final BufferAllocator allocator; private final CDataDictionaryProvider provider; private FieldVector vector; - private final static BigIntVector intVector = new BigIntVector("internal_test_vector", allocator); + private final BigIntVector intVector; - public MapValuesConsumer(CDataDictionaryProvider provider) { + public MapValuesConsumer(CDataDictionaryProvider provider, BufferAllocator allocator) { this.provider = provider; + this.allocator = allocator; + this.intVector = new BigIntVector("internal_test_vector", allocator); } - public static BufferAllocator getAllocatorForJavaConsumer() { + public BufferAllocator getAllocatorForJavaConsumer() { return allocator; } @@ -189,7 +192,7 @@ Java Component: public FieldVector updateFromJava(long c_array_ptr, long c_schema_ptr) { ArrowArray arrow_array = ArrowArray.wrap(c_array_ptr); ArrowSchema arrow_schema = ArrowSchema.wrap(c_schema_ptr); - vector = Data.importVector(allocator, arrow_array, arrow_schema, null); + this.vector = Data.importVector(allocator, arrow_array, arrow_schema, this.provider); this.doWorkInJava(vector); return vector; } @@ -200,7 +203,7 @@ Java Component: bigIntVector.setSafe(0, 2); } - private static BigIntVector getIntVectorForJavaConsumers() { + public BigIntVector getIntVectorForJavaConsumer() { intVector.allocateNew(3); intVector.set(0, 1); intVector.set(1, 7); @@ -209,35 +212,39 @@ Java Component: return intVector; } - public static void simulateAsAJavaConsumers() { - CDataDictionaryProvider provider = new CDataDictionaryProvider(); - MapValueConsumerV2 mvc = new MapValueConsumerV2(provider);//FIXME! Use constructor with dictionary provider + @Override + public void close() throws Exception { + AutoCloseables.close(intVector); + } + } + try (BufferAllocator allocator = new RootAllocator()) { + CDataDictionaryProvider provider = new CDataDictionaryProvider(); + try (final MapValuesConsumer mvc = new MapValuesConsumer(provider, allocator)) { try ( - ArrowArray arrowArray = ArrowArray.allocateNew(allocator); - ArrowSchema arrowSchema = ArrowSchema.allocateNew(allocator) - ) { - Data.exportVector(allocator, getIntVectorForJavaConsumers(), provider, arrowArray, arrowSchema); - FieldVector updatedVector = mvc.updateFromJava(arrowArray.memoryAddress(), arrowSchema.memoryAddress()); - try (ArrowArray usedArray = ArrowArray.allocateNew(allocator); - ArrowSchema usedSchema = ArrowSchema.allocateNew(allocator)) { - Data.exportVector(allocator, updatedVector, provider, usedArray, usedSchema); - try(FieldVector valueVectors = Data.importVector(allocator, usedArray, usedSchema, provider)) { - System.out.println(valueVectors); + ArrowArray arrowArray = ArrowArray.allocateNew(allocator); + ArrowSchema arrowSchema = ArrowSchema.allocateNew(allocator) + ) { + Data.exportVector(allocator, mvc.getIntVectorForJavaConsumer(), provider, arrowArray, arrowSchema); + FieldVector updatedVector = mvc.updateFromJava(arrowArray.memoryAddress(), arrowSchema.memoryAddress()); + try (ArrowArray usedArray = ArrowArray.allocateNew(allocator); + ArrowSchema usedSchema = ArrowSchema.allocateNew(allocator)) { + Data.exportVector(allocator, updatedVector, provider, usedArray, usedSchema); + try(FieldVector valueVectors = Data.importVector(allocator, usedArray, usedSchema, provider)) { + System.out.println(valueVectors); + } } + updatedVector.close(); + } catch (Exception ex) { + ex.printStackTrace(); } - } - } - - public static void close() { - intVector.close(); - } - - public static void main(String[] args) { - simulateAsAJavaConsumers(); - close(); + } catch (Exception ex) { + ex.printStackTrace(); } + } catch (Exception ex) { + ex.printStackTrace(); } + .. testoutput:: Doing work in Java From 23617f064055889ed79da581c23b5c8b9c833b1b Mon Sep 17 00:00:00 2001 From: vibhatha Date: Thu, 21 Sep 2023 08:09:30 +0530 Subject: [PATCH 10/12] fix: address reviews v2 --- java/source/c_data.rst | 33 ++++++++++++++----- java/source/python_java.rst | 66 +++++++++++++++++++++++-------------- 2 files changed, 66 insertions(+), 33 deletions(-) diff --git a/java/source/c_data.rst b/java/source/c_data.rst index 2ccd84d6..851e8dec 100644 --- a/java/source/c_data.rst +++ b/java/source/c_data.rst @@ -1,12 +1,29 @@ -.. _c-data-java: +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at -================== -C Data Integration -================== +.. http://www.apache.org/licenses/LICENSE-2.0 -C Data interface is an important aspect of supporting multiple languages in Apache Arrow. -A Java programme can seamlessly work with C++ and Python programmes. The following examples -demonstrates how it can be done. +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. -:ref:`arrow-python-java` +.. _c-data: + +================ +C Data Interface +================ + +The Arrow C Data Interface enables zero-copy sharing of Arrow data between language +runtimes. A Java programme can seamlessly work with C++ and Python programs. +The following examples demonstrates how it can be done. + +:ref:`Python Java ` ------------------------ diff --git a/java/source/python_java.rst b/java/source/python_java.rst index b8f6cdce..b6794641 100644 --- a/java/source/python_java.rst +++ b/java/source/python_java.rst @@ -1,3 +1,20 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + .. _arrow-python-java: ======================== @@ -12,21 +29,28 @@ This document provides a guide on how to enable seamless data exchange between P Dictionary Data Roundtrip ========================= - This section demonstrates a data roundtrip, where a dictionary array is created in Python, accessed and updated in Java, - and finally re-accessed and validated in Python for data consistency. +This section demonstrates a data roundtrip where C Data interface is being used to provide +the seamless access to data across language boundaries. + + +Python Component +---------------- +In the Python-based component, the data roundtrip process is demonstrated through a sequential workflow. -Python Component: ------------------ +1. Create data in Python +2. Export data to Java +3. Import updated data from Java +4. Validate the data consistency - The Python code uses jpype to start the JVM and make the Java class MapValuesConsumer available to Python. - Data is generated in PyArrow and exported through C Data to Java. +The Python code uses `jpype `_ to start the JVM and make the Java class MapValuesConsumer available to Python. +Data is generated in PyArrow and exported through C Data to Java. .. code-block:: python import jpype import jpype.imports - from jpype.types import * + from jpype.types import JClass import pyarrow as pa from pyarrow.cffi import ffi as arrow_c @@ -43,7 +67,7 @@ Python Component: dictionary = pa.dictionary(pa.int64(), pa.utf8()) array = pa.array(["A", "B", "C", "A", "D"], dictionary) print("From Python") - print("Dictionary Created: ", array) + print("Dictionary Created:", array) # create the CDataDictionaryProvider instance which is # required to create dictionary array precisely @@ -62,7 +86,7 @@ Python Component: array.type._export_to_c(c_schema_ptr) # Send Array and its Schema to the Java function - # that will update the dictionary + # update values in Java consumer.update(c_array_ptr, c_schema_ptr) # Importing updated values from Java to Python @@ -92,7 +116,7 @@ Python Component: # In Java and Python, the same memory is being accessed through the C Data interface. # Since the array from Java and array created in Python should have same data. assert updated_array.equals(array) - print("Updated Array: ", updated_array) + print("Updated Array:", updated_array) del updated_array @@ -134,19 +158,17 @@ Python Component: 3 ] -In the Python component, the following steps are executed to demonstrate the data roundtrip: -1. Create data in Python -2. Export data to Java -3. Import updated data from Java -4. Validate the data consistency +Java Component +-------------- +In the Java-based component of the system, the following operations are executed: -Java Component: ---------------- +1. Receives data from the Python component. +2. Updates the data. +3. Exports the updated data back to Python. - In the Java component, the MapValuesConsumer class receives data from the Python component through C Data. - It then updates the data and sends it back to the Python component. +MapValuesConsumer class uses C Data interface to access the data created in Python. .. testcode:: @@ -251,11 +273,5 @@ Java Component: [2, 7, 93] -The Java component performs the following actions: - -1. Receives data from the Python component. -2. Updates the data. -3. Exports the updated data back to Python. - By integrating PyArrow in Python and Java components, this example demonstrates that a system can be created where data is shared and updated across both languages seamlessly. From 59ac6bb8221bd8d1517b4a6e25a9fe8cbd8c2222 Mon Sep 17 00:00:00 2001 From: vibhatha Date: Mon, 25 Sep 2023 13:12:14 +0530 Subject: [PATCH 11/12] fix: format --- java/source/c_data.rst | 2 +- java/source/python_java.rst | 119 +++++++++++++++++++----------------- 2 files changed, 64 insertions(+), 57 deletions(-) diff --git a/java/source/c_data.rst b/java/source/c_data.rst index 851e8dec..4716c1a7 100644 --- a/java/source/c_data.rst +++ b/java/source/c_data.rst @@ -21,7 +21,7 @@ C Data Interface ================ -The Arrow C Data Interface enables zero-copy sharing of Arrow data between language +The `Arrow C Data Interface `_ enables zero-copy sharing of Arrow data between language runtimes. A Java programme can seamlessly work with C++ and Python programs. The following examples demonstrates how it can be done. diff --git a/java/source/python_java.rst b/java/source/python_java.rst index b6794641..80c397ab 100644 --- a/java/source/python_java.rst +++ b/java/source/python_java.rst @@ -86,21 +86,20 @@ Data is generated in PyArrow and exported through C Data to Java. array.type._export_to_c(c_schema_ptr) # Send Array and its Schema to the Java function - # update values in Java - consumer.update(c_array_ptr, c_schema_ptr) + consumer.callToJava(c_array_ptr, c_schema_ptr) # Importing updated values from Java to Python # Export the Python array through C Data - updated_c_array = arrow_c.new("struct ArrowArray*") - updated_c_array_ptr = int(arrow_c.cast("uintptr_t", updated_c_array)) + c_array_from_java = arrow_c.new("struct ArrowArray*") + c_array_ptr_from_java = int(arrow_c.cast("uintptr_t", c_array_from_java)) # Export the Schema of the Array through C Data - updated_c_schema = arrow_c.new("struct ArrowSchema*") - updated_c_schema_ptr = int(arrow_c.cast("uintptr_t", updated_c_schema)) + c_schema_from_java = arrow_c.new("struct ArrowSchema*") + c_schema_ptr_from_java = int(arrow_c.cast("uintptr_t", c_schema_from_java)) - java_wrapped_array = java_c_package.ArrowArray.wrap(updated_c_array_ptr) - java_wrapped_schema = java_c_package.ArrowSchema.wrap(updated_c_schema_ptr) + java_wrapped_array = java_c_package.ArrowArray.wrap(c_array_ptr_from_java) + java_wrapped_schema = java_c_package.ArrowSchema.wrap(c_schema_ptr_from_java) java_c_package.Data.exportVector( consumer.getAllocatorForJavaConsumer(), @@ -111,14 +110,19 @@ Data is generated in PyArrow and exported through C Data to Java. ) print("From Java back to Python") - updated_array = pa.Array._import_from_c(updated_c_array_ptr, updated_c_schema_ptr) + array_from_java = pa.Array._import_from_c(c_array_ptr_from_java, c_schema_ptr_from_java) # In Java and Python, the same memory is being accessed through the C Data interface. # Since the array from Java and array created in Python should have same data. - assert updated_array.equals(array) - print("Updated Array:", updated_array) + assert array_from_java.equals(array) + print("Array from Java: ", array_from_java) + + # Releasing Java C Data source. + del array_from_java + + consumer.close() + jpype.shutdownJVM() - del updated_array .. code-block:: shell @@ -141,7 +145,7 @@ Data is generated in PyArrow and exported through C Data to Java. ] Doing work in Java From Java back to Python - Updated Array: + Array from Java: -- dictionary: [ "A", @@ -191,79 +195,82 @@ MapValuesConsumer class uses C Data interface to access the data created in Pyth public MapValuesConsumer(CDataDictionaryProvider provider, BufferAllocator allocator) { - this.provider = provider; - this.allocator = allocator; - this.intVector = new BigIntVector("internal_test_vector", allocator); + this.provider = provider; + this.allocator = allocator; + this.intVector = new BigIntVector("internal_test_vector", allocator); } public BufferAllocator getAllocatorForJavaConsumer() { - return allocator; + return allocator; } public FieldVector getVector() { - return this.vector; + return this.vector; } public void update(long c_array_ptr, long c_schema_ptr) { - ArrowArray arrow_array = ArrowArray.wrap(c_array_ptr); - ArrowSchema arrow_schema = ArrowSchema.wrap(c_schema_ptr); - this.vector = Data.importVector(allocator, arrow_array, arrow_schema, this.provider); - this.doWorkInJava(vector); + ArrowArray arrow_array = ArrowArray.wrap(c_array_ptr); + ArrowSchema arrow_schema = ArrowSchema.wrap(c_schema_ptr); + this.vector = Data.importVector(allocator, arrow_array, arrow_schema, this.provider); + this.doWorkInJava(vector); } public FieldVector updateFromJava(long c_array_ptr, long c_schema_ptr) { - ArrowArray arrow_array = ArrowArray.wrap(c_array_ptr); - ArrowSchema arrow_schema = ArrowSchema.wrap(c_schema_ptr); - this.vector = Data.importVector(allocator, arrow_array, arrow_schema, this.provider); - this.doWorkInJava(vector); - return vector; + ArrowArray arrow_array = ArrowArray.wrap(c_array_ptr); + ArrowSchema arrow_schema = ArrowSchema.wrap(c_schema_ptr); + this.vector = Data.importVector(allocator, arrow_array, arrow_schema, this.provider); + this.doWorkInJava(vector); + return vector; } private void doWorkInJava(FieldVector vector) { - System.out.println("Doing work in Java"); - BigIntVector bigIntVector = (BigIntVector)vector; - bigIntVector.setSafe(0, 2); + System.out.println("Doing work in Java"); + BigIntVector bigIntVector = (BigIntVector)vector; + bigIntVector.setSafe(0, 2); } public BigIntVector getIntVectorForJavaConsumer() { - intVector.allocateNew(3); - intVector.set(0, 1); - intVector.set(1, 7); - intVector.set(2, 93); - intVector.setValueCount(3); - return intVector; + intVector.allocateNew(3); + intVector.set(0, 1); + intVector.set(1, 7); + intVector.set(2, 93); + intVector.setValueCount(3); + return intVector; } @Override public void close() throws Exception { - AutoCloseables.close(intVector); + AutoCloseables.close(intVector); } } try (BufferAllocator allocator = new RootAllocator()) { - CDataDictionaryProvider provider = new CDataDictionaryProvider(); - try (final MapValuesConsumer mvc = new MapValuesConsumer(provider, allocator)) { - try ( + CDataDictionaryProvider provider = new CDataDictionaryProvider(); + try (final MapValuesConsumer mvc = new MapValuesConsumer(provider, allocator)) { + try ( ArrowArray arrowArray = ArrowArray.allocateNew(allocator); ArrowSchema arrowSchema = ArrowSchema.allocateNew(allocator) - ) { - Data.exportVector(allocator, mvc.getIntVectorForJavaConsumer(), provider, arrowArray, arrowSchema); - FieldVector updatedVector = mvc.updateFromJava(arrowArray.memoryAddress(), arrowSchema.memoryAddress()); - try (ArrowArray usedArray = ArrowArray.allocateNew(allocator); - ArrowSchema usedSchema = ArrowSchema.allocateNew(allocator)) { - Data.exportVector(allocator, updatedVector, provider, usedArray, usedSchema); - try(FieldVector valueVectors = Data.importVector(allocator, usedArray, usedSchema, provider)) { - System.out.println(valueVectors); - } - } - updatedVector.close(); - } catch (Exception ex) { - ex.printStackTrace(); - } + ) { + Data.exportVector(allocator, mvc.getIntVectorForJavaConsumer(), provider, arrowArray, + arrowSchema); + FieldVector updatedVector = mvc.updateFromJava(arrowArray.memoryAddress(), + arrowSchema.memoryAddress()); + try (ArrowArray usedArray = ArrowArray.allocateNew(allocator); + ArrowSchema usedSchema = ArrowSchema.allocateNew(allocator)) { + Data.exportVector(allocator, updatedVector, provider, usedArray, usedSchema); + try (FieldVector valueVectors = Data.importVector(allocator, usedArray, usedSchema, + provider)) { + System.out.println(valueVectors); + } + } + updatedVector.close(); } catch (Exception ex) { - ex.printStackTrace(); + ex.printStackTrace(); } - } catch (Exception ex) { + } catch (Exception ex) { ex.printStackTrace(); + } + } catch (Exception ex) { + ex.printStackTrace(); } From 33e2cbfcffead2d8bc113223f605189c9dba1bf4 Mon Sep 17 00:00:00 2001 From: vibhatha Date: Mon, 25 Sep 2023 13:20:35 +0530 Subject: [PATCH 12/12] fix: indentation --- java/source/python_java.rst | 411 ++++++++++++++++++------------------ 1 file changed, 203 insertions(+), 208 deletions(-) diff --git a/java/source/python_java.rst b/java/source/python_java.rst index 80c397ab..ba295f01 100644 --- a/java/source/python_java.rst +++ b/java/source/python_java.rst @@ -48,120 +48,115 @@ Data is generated in PyArrow and exported through C Data to Java. .. code-block:: python - import jpype - import jpype.imports - from jpype.types import JClass - import pyarrow as pa - from pyarrow.cffi import ffi as arrow_c - - # Init the JVM and make MapValuesConsumer class available to Python. - jpype.startJVM(classpath=[ "../target/*"]) - java_c_package = jpype.JPackage("org").apache.arrow.c - MapValuesConsumer = JClass('MapValuesConsumer') - CDataDictionaryProvider = JClass('org.apache.arrow.c.CDataDictionaryProvider') - - # Starting from Python and generating data - - # Create a Python DictionaryArray - - dictionary = pa.dictionary(pa.int64(), pa.utf8()) - array = pa.array(["A", "B", "C", "A", "D"], dictionary) - print("From Python") - print("Dictionary Created:", array) - - # create the CDataDictionaryProvider instance which is - # required to create dictionary array precisely - c_provider = CDataDictionaryProvider() - - consumer = MapValuesConsumer(c_provider) - - # Export the Python array through C Data - c_array = arrow_c.new("struct ArrowArray*") - c_array_ptr = int(arrow_c.cast("uintptr_t", c_array)) - array._export_to_c(c_array_ptr) - - # Export the Schema of the Array through C Data - c_schema = arrow_c.new("struct ArrowSchema*") - c_schema_ptr = int(arrow_c.cast("uintptr_t", c_schema)) - array.type._export_to_c(c_schema_ptr) - - # Send Array and its Schema to the Java function - consumer.callToJava(c_array_ptr, c_schema_ptr) - - # Importing updated values from Java to Python - - # Export the Python array through C Data - c_array_from_java = arrow_c.new("struct ArrowArray*") - c_array_ptr_from_java = int(arrow_c.cast("uintptr_t", c_array_from_java)) - - # Export the Schema of the Array through C Data - c_schema_from_java = arrow_c.new("struct ArrowSchema*") - c_schema_ptr_from_java = int(arrow_c.cast("uintptr_t", c_schema_from_java)) - - java_wrapped_array = java_c_package.ArrowArray.wrap(c_array_ptr_from_java) - java_wrapped_schema = java_c_package.ArrowSchema.wrap(c_schema_ptr_from_java) - - java_c_package.Data.exportVector( - consumer.getAllocatorForJavaConsumer(), - consumer.getVector(), - c_provider, - java_wrapped_array, - java_wrapped_schema - ) - - print("From Java back to Python") - array_from_java = pa.Array._import_from_c(c_array_ptr_from_java, c_schema_ptr_from_java) - - # In Java and Python, the same memory is being accessed through the C Data interface. - # Since the array from Java and array created in Python should have same data. - assert array_from_java.equals(array) - print("Array from Java: ", array_from_java) - - # Releasing Java C Data source. - del array_from_java - - consumer.close() - jpype.shutdownJVM() + import jpype + import jpype.imports + from jpype.types import JClass + import pyarrow as pa + from pyarrow.cffi import ffi as arrow_c + + # Init the JVM and make MapValuesConsumer class available to Python. + jpype.startJVM(classpath=[ "../target/*"]) + java_c_package = jpype.JPackage("org").apache.arrow.c + MapValuesConsumer = JClass('MapValuesConsumer') + CDataDictionaryProvider = JClass('org.apache.arrow.c.CDataDictionaryProvider') + + # Starting from Python and generating data + # Create a Python DictionaryArray + dictionary = pa.dictionary(pa.int64(), pa.utf8()) + array = pa.array(["A", "B", "C", "A", "D"], dictionary) + print("From Python") + print("Dictionary Created:", array) + + # create the CDataDictionaryProvider instance which is + # required to create dictionary array precisely + c_provider = CDataDictionaryProvider() + consumer = MapValuesConsumer(c_provider) + + # Export the Python array through C Data + c_array = arrow_c.new("struct ArrowArray*") + c_array_ptr = int(arrow_c.cast("uintptr_t", c_array)) + array._export_to_c(c_array_ptr) + + # Export the Schema of the Array through C Data + c_schema = arrow_c.new("struct ArrowSchema*") + c_schema_ptr = int(arrow_c.cast("uintptr_t", c_schema)) + array.type._export_to_c(c_schema_ptr) + + # Send Array and its Schema to the Java function + consumer.callToJava(c_array_ptr, c_schema_ptr) + + # Importing updated values from Java to Python + # Export the Python array through C Data + c_array_from_java = arrow_c.new("struct ArrowArray*") + c_array_ptr_from_java = int(arrow_c.cast("uintptr_t", c_array_from_java)) + + # Export the Schema of the Array through C Data + c_schema_from_java = arrow_c.new("struct ArrowSchema*") + c_schema_ptr_from_java = int(arrow_c.cast("uintptr_t", c_schema_from_java)) + java_wrapped_array = java_c_package.ArrowArray.wrap(c_array_ptr_from_java) + java_wrapped_schema = java_c_package.ArrowSchema.wrap(c_schema_ptr_from_java) + java_c_package.Data.exportVector( + consumer.getAllocatorForJavaConsumer(), + consumer.getVector(), + c_provider, + java_wrapped_array, + java_wrapped_schema + ) + + print("From Java back to Python") + array_from_java = pa.Array._import_from_c(c_array_ptr_from_java, c_schema_ptr_from_java) + + # In Java and Python, the same memory is being accessed through the C Data interface. + # Since the array from Java and array created in Python should have same data. + + assert array_from_java.equals(array) + print("Array from Java: ", array_from_java) + + # Releasing Java C Data source. + del array_from_java + + consumer.close() + + jpype.shutdownJVM() .. code-block:: shell - From Python - Dictionary Created: - -- dictionary: - [ - "A", - "B", - "C", - "D" - ] - -- indices: - [ - 0, - 1, - 2, - 0, - 3 - ] - Doing work in Java - From Java back to Python - Array from Java: - -- dictionary: - [ - "A", - "B", - "C", - "D" - ] - -- indices: - [ - 2, - 1, - 2, - 0, - 3 - ] - + From Python + Dictionary Created: + -- dictionary: + [ + "A", + "B", + "C", + "D" + ] + -- indices: + [ + 0, + 1, + 2, + 0, + 3 + ] + Doing work in Java + From Java back to Python + Array from Java: + -- dictionary: + [ + "A", + "B", + "C", + "D" + ] + -- indices: + [ + 2, + 1, + 2, + 0, + 3 + ] Java Component -------------- @@ -176,108 +171,108 @@ MapValuesConsumer class uses C Data interface to access the data created in Pyth .. testcode:: - import org.apache.arrow.c.ArrowArray; - import org.apache.arrow.c.ArrowSchema; - import org.apache.arrow.c.Data; - import org.apache.arrow.c.CDataDictionaryProvider; - import org.apache.arrow.memory.BufferAllocator; - import org.apache.arrow.memory.RootAllocator; - import org.apache.arrow.vector.FieldVector; - import org.apache.arrow.vector.BigIntVector; - import org.apache.arrow.util.AutoCloseables; - - - class MapValuesConsumer implements AutoCloseable { - private final BufferAllocator allocator; - private final CDataDictionaryProvider provider; - private FieldVector vector; - private final BigIntVector intVector; - - - public MapValuesConsumer(CDataDictionaryProvider provider, BufferAllocator allocator) { - this.provider = provider; - this.allocator = allocator; - this.intVector = new BigIntVector("internal_test_vector", allocator); - } - - public BufferAllocator getAllocatorForJavaConsumer() { - return allocator; - } - - public FieldVector getVector() { - return this.vector; - } - - public void update(long c_array_ptr, long c_schema_ptr) { - ArrowArray arrow_array = ArrowArray.wrap(c_array_ptr); - ArrowSchema arrow_schema = ArrowSchema.wrap(c_schema_ptr); - this.vector = Data.importVector(allocator, arrow_array, arrow_schema, this.provider); - this.doWorkInJava(vector); - } - - public FieldVector updateFromJava(long c_array_ptr, long c_schema_ptr) { - ArrowArray arrow_array = ArrowArray.wrap(c_array_ptr); - ArrowSchema arrow_schema = ArrowSchema.wrap(c_schema_ptr); - this.vector = Data.importVector(allocator, arrow_array, arrow_schema, this.provider); - this.doWorkInJava(vector); - return vector; - } - - private void doWorkInJava(FieldVector vector) { - System.out.println("Doing work in Java"); - BigIntVector bigIntVector = (BigIntVector)vector; - bigIntVector.setSafe(0, 2); - } - - public BigIntVector getIntVectorForJavaConsumer() { - intVector.allocateNew(3); - intVector.set(0, 1); - intVector.set(1, 7); - intVector.set(2, 93); - intVector.setValueCount(3); - return intVector; - } - - @Override - public void close() throws Exception { - AutoCloseables.close(intVector); - } - } - try (BufferAllocator allocator = new RootAllocator()) { - CDataDictionaryProvider provider = new CDataDictionaryProvider(); - try (final MapValuesConsumer mvc = new MapValuesConsumer(provider, allocator)) { - try ( - ArrowArray arrowArray = ArrowArray.allocateNew(allocator); - ArrowSchema arrowSchema = ArrowSchema.allocateNew(allocator) - ) { - Data.exportVector(allocator, mvc.getIntVectorForJavaConsumer(), provider, arrowArray, - arrowSchema); - FieldVector updatedVector = mvc.updateFromJava(arrowArray.memoryAddress(), - arrowSchema.memoryAddress()); - try (ArrowArray usedArray = ArrowArray.allocateNew(allocator); - ArrowSchema usedSchema = ArrowSchema.allocateNew(allocator)) { - Data.exportVector(allocator, updatedVector, provider, usedArray, usedSchema); - try (FieldVector valueVectors = Data.importVector(allocator, usedArray, usedSchema, - provider)) { - System.out.println(valueVectors); - } - } - updatedVector.close(); - } catch (Exception ex) { - ex.printStackTrace(); - } - } catch (Exception ex) { - ex.printStackTrace(); - } - } catch (Exception ex) { - ex.printStackTrace(); - } + import org.apache.arrow.c.ArrowArray; + import org.apache.arrow.c.ArrowSchema; + import org.apache.arrow.c.Data; + import org.apache.arrow.c.CDataDictionaryProvider; + import org.apache.arrow.memory.BufferAllocator; + import org.apache.arrow.memory.RootAllocator; + import org.apache.arrow.vector.FieldVector; + import org.apache.arrow.vector.BigIntVector; + import org.apache.arrow.util.AutoCloseables; + + + class MapValuesConsumer implements AutoCloseable { + private final BufferAllocator allocator; + private final CDataDictionaryProvider provider; + private FieldVector vector; + private final BigIntVector intVector; + + + public MapValuesConsumer(CDataDictionaryProvider provider, BufferAllocator allocator) { + this.provider = provider; + this.allocator = allocator; + this.intVector = new BigIntVector("internal_test_vector", allocator); + } + + public BufferAllocator getAllocatorForJavaConsumer() { + return allocator; + } + + public FieldVector getVector() { + return this.vector; + } + + public void update(long c_array_ptr, long c_schema_ptr) { + ArrowArray arrow_array = ArrowArray.wrap(c_array_ptr); + ArrowSchema arrow_schema = ArrowSchema.wrap(c_schema_ptr); + this.vector = Data.importVector(allocator, arrow_array, arrow_schema, this.provider); + this.doWorkInJava(vector); + } + + public FieldVector updateFromJava(long c_array_ptr, long c_schema_ptr) { + ArrowArray arrow_array = ArrowArray.wrap(c_array_ptr); + ArrowSchema arrow_schema = ArrowSchema.wrap(c_schema_ptr); + this.vector = Data.importVector(allocator, arrow_array, arrow_schema, this.provider); + this.doWorkInJava(vector); + return vector; + } + + private void doWorkInJava(FieldVector vector) { + System.out.println("Doing work in Java"); + BigIntVector bigIntVector = (BigIntVector)vector; + bigIntVector.setSafe(0, 2); + } + + public BigIntVector getIntVectorForJavaConsumer() { + intVector.allocateNew(3); + intVector.set(0, 1); + intVector.set(1, 7); + intVector.set(2, 93); + intVector.setValueCount(3); + return intVector; + } + + @Override + public void close() throws Exception { + AutoCloseables.close(intVector); + } + } + try (BufferAllocator allocator = new RootAllocator()) { + CDataDictionaryProvider provider = new CDataDictionaryProvider(); + try (final MapValuesConsumer mvc = new MapValuesConsumer(provider, allocator)) { + try ( + ArrowArray arrowArray = ArrowArray.allocateNew(allocator); + ArrowSchema arrowSchema = ArrowSchema.allocateNew(allocator) + ) { + Data.exportVector(allocator, mvc.getIntVectorForJavaConsumer(), provider, arrowArray, + arrowSchema); + FieldVector updatedVector = mvc.updateFromJava(arrowArray.memoryAddress(), + arrowSchema.memoryAddress()); + try (ArrowArray usedArray = ArrowArray.allocateNew(allocator); + ArrowSchema usedSchema = ArrowSchema.allocateNew(allocator)) { + Data.exportVector(allocator, updatedVector, provider, usedArray, usedSchema); + try (FieldVector valueVectors = Data.importVector(allocator, usedArray, usedSchema, + provider)) { + System.out.println(valueVectors); + } + } + updatedVector.close(); + } catch (Exception ex) { + ex.printStackTrace(); + } + } catch (Exception ex) { + ex.printStackTrace(); + } + } catch (Exception ex) { + ex.printStackTrace(); + } .. testoutput:: - Doing work in Java - [2, 7, 93] + Doing work in Java + [2, 7, 93] By integrating PyArrow in Python and Java components, this example demonstrates that