Skip to content

Commit

Permalink
🐛 Fix concatenation when trailing result chunk is smaller (#28)
Browse files Browse the repository at this point in the history
  • Loading branch information
ddelange authored Sep 7, 2022
1 parent d997507 commit 019ee26
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 5 deletions.
6 changes: 1 addition & 5 deletions src/mapply/mapply.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,11 +116,7 @@ def run_apply(func, df_or_series, args=(), **kwargs):
)
)

if (
isseries
or len(results) == 1
or len(results[0]) * len(results) in df_or_series.shape
):
if isseries or len(results) == 1 or sum(map(len, results)) in df_or_series.shape:
return concat(results, copy=False)

return concat(results, axis=1, copy=False)
8 changes: 8 additions & 0 deletions tests/test_mapply.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,14 @@ def test_df_mapply():
df.mapply(lambda x: x**2),
)

# not all result chunks have equal size (trailing chunk)
mapply.init(progressbar=False, chunk_size=100, n_workers=2)
df = pd.DataFrame(np.random.randint(2, size=(5, 201)))
pd.testing.assert_series_equal(
df.apply(np.var),
df.mapply(np.var),
)

# concat for only one result
mapply.init(progressbar=False, chunk_size=100, n_workers=2)
df = pd.DataFrame(list(range(1, 200))) # (199, 1)
Expand Down

0 comments on commit 019ee26

Please sign in to comment.