Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix divide by zero error when pyarrow table size comes out 0 #368

Merged
merged 3 commits into from
Oct 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion deltacat/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@

deltacat.logs.configure_deltacat_logger(logging.getLogger(__name__))

__version__ = "1.1.26"
__version__ = "1.1.27"


__all__ = [
Expand Down
2 changes: 1 addition & 1 deletion deltacat/compute/resource_estimation/delta.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def _estimate_resources_required_to_process_delta_using_file_sampling(
sampled_on_disk_size += delta.manifest.entries[entry_index].meta.content_length
sampled_num_rows += len(tbl)

if not sampled_on_disk_size:
if not sampled_on_disk_size or not sampled_in_memory_size:
return EstimatedResources.of(
memory_bytes=0,
statistics=Statistics.of(
Expand Down
37 changes: 37 additions & 0 deletions deltacat/tests/compute/resource_estimation/test_delta.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,43 @@ def test_delta_manifest_parquet_when_file_sampling(
== parquet_delta_with_manifest.meta.content_length
)

def test_parquet_delta_when_file_sampling_and_arrow_size_zero(
self,
local_deltacat_storage_kwargs,
parquet_delta_with_manifest: Delta,
monkeypatch,
):
params = EstimateResourcesParams.of(
resource_estimation_method=ResourceEstimationMethod.FILE_SAMPLING,
max_files_to_sample=2,
)

def mock_func(*args, **kwargs):
class MockedValue:
nbytes = 0

def __len__(self):
return 0

return MockedValue()

monkeypatch.setattr(ds, "download_delta_manifest_entry", mock_func)

result = estimate_resources_required_to_process_delta(
delta=parquet_delta_with_manifest,
operation_type=OperationType.PYARROW_DOWNLOAD,
deltacat_storage=ds,
deltacat_storage_kwargs=local_deltacat_storage_kwargs,
estimate_resources_params=params,
)

assert parquet_delta_with_manifest.manifest is not None
assert result.memory_bytes == 0
assert (
result.statistics.on_disk_size_bytes
== parquet_delta_with_manifest.meta.content_length
)

def test_delta_manifest_utsv_when_file_sampling(
self, local_deltacat_storage_kwargs, utsv_delta_with_manifest: Delta
):
Expand Down
Loading