From fa17f68540b682259ca7de80f6dbc7a6a85b1a88 Mon Sep 17 00:00:00 2001 From: universalmind303 Date: Tue, 30 Jul 2024 13:56:35 -0500 Subject: [PATCH 1/4] accept "iterable[pa.Table]" for from_arrow --- Makefile | 2 +- daft/convert.py | 4 ++-- daft/daft | 1 + daft/dataframe/dataframe.py | 4 +++- 4 files changed, 7 insertions(+), 4 deletions(-) create mode 120000 daft/daft diff --git a/Makefile b/Makefile index 1c2277c31c..d9ae11c52e 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ endif .venv: ## Set up virtual environment - python3 -m venv $(VENV) + python3.11 -m venv $(VENV) $(VENV_BIN)/python -m pip install --upgrade uv ## Hacks to deal with grpcio compile errors on m1 macs ifeq ($(IS_M1), 1) diff --git a/daft/convert.py b/daft/convert.py index 4bebd7220b..e734d0a176 100644 --- a/daft/convert.py +++ b/daft/convert.py @@ -1,6 +1,6 @@ # isort: dont-add-import: from __future__ import annotations -from typing import TYPE_CHECKING, Any, Dict, List, Union +from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Union from daft.api_annotations import PublicAPI @@ -55,7 +55,7 @@ def from_pydict(data: Dict[str, InputListType]) -> "DataFrame": @PublicAPI -def from_arrow(data: Union["pa.Table", List["pa.Table"]]) -> "DataFrame": +def from_arrow(data: Union["pa.Table", List["pa.Table"], Iterable["pa.Table"]]) -> "DataFrame": """Creates a DataFrame from a pyarrow Table. Example: diff --git a/daft/daft b/daft/daft new file mode 120000 index 0000000000..4afc3550f5 --- /dev/null +++ b/daft/daft @@ -0,0 +1 @@ +../Daft/daft \ No newline at end of file diff --git a/daft/dataframe/dataframe.py b/daft/dataframe/dataframe.py index c5d9a6ad01..c5a1948bf0 100644 --- a/daft/dataframe/dataframe.py +++ b/daft/dataframe/dataframe.py @@ -312,8 +312,10 @@ def _from_pydict(cls, data: Dict[str, InputListType]) -> "DataFrame": return cls._from_tables(data_vpartition) @classmethod - def _from_arrow(cls, data: Union["pyarrow.Table", List["pyarrow.Table"]]) -> "DataFrame": + def _from_arrow(cls, data: Union["pyarrow.Table", List["pyarrow.Table"], Iterable["pyarrow.Table"]]) -> "DataFrame": """Creates a DataFrame from a `pyarrow Table `__.""" + if isinstance(data, Iterable): + data = list(data) if not isinstance(data, list): data = [data] data_vpartitions = [MicroPartition.from_arrow(table) for table in data] From b0e8c8c87749c8c234394450de65dcfd5384d8f7 Mon Sep 17 00:00:00 2001 From: universalmind303 Date: Tue, 30 Jul 2024 13:57:24 -0500 Subject: [PATCH 2/4] revert makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d9ae11c52e..1c2277c31c 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ endif .venv: ## Set up virtual environment - python3.11 -m venv $(VENV) + python3 -m venv $(VENV) $(VENV_BIN)/python -m pip install --upgrade uv ## Hacks to deal with grpcio compile errors on m1 macs ifeq ($(IS_M1), 1) From 91fec0f4075849445145c5e69276fd556be5831d Mon Sep 17 00:00:00 2001 From: universalmind303 Date: Tue, 30 Jul 2024 14:03:16 -0500 Subject: [PATCH 3/4] add test case --- tests/table/test_from_py.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/table/test_from_py.py b/tests/table/test_from_py.py index 95ef19b884..216b66dae2 100644 --- a/tests/table/test_from_py.py +++ b/tests/table/test_from_py.py @@ -9,6 +9,7 @@ import pyarrow.compute as pac import pytest +import daft from daft import DataType, TimeUnit from daft.context import get_context from daft.series import Series @@ -649,3 +650,17 @@ def test_nested_struct_dates(levels: int) -> None: assert back_again.to_arrow().type == expected_arrow_type assert back_again.to_pylist() == data + + +def test_from_arrow_iterable() -> None: + class CustomIterable: + def __iter__(self): + yield pa.table({"text": ["foo1", "bar2"]}) + yield pa.table({"text": ["foo2", "bar2"]}) + yield pa.table({"text": ["foo3", "bar3"]}) + + my_iter = CustomIterable() + + table = daft.from_arrow(my_iter) + tbl = table.to_pydict() + assert tbl == {"text": ["foo1", "bar2", "foo2", "bar2", "foo3", "bar3"]} From 2eeb3277f79415b297dc659f690f59b22021063d Mon Sep 17 00:00:00 2001 From: universalmind303 Date: Tue, 30 Jul 2024 14:10:08 -0500 Subject: [PATCH 4/4] whoops --- daft/daft | 1 - 1 file changed, 1 deletion(-) delete mode 120000 daft/daft diff --git a/daft/daft b/daft/daft deleted file mode 120000 index 4afc3550f5..0000000000 --- a/daft/daft +++ /dev/null @@ -1 +0,0 @@ -../Daft/daft \ No newline at end of file