Skip to content

Commit

Permalink
fix: only keep a few batches alive per worker instead of the full chunk
Browse files Browse the repository at this point in the history
  • Loading branch information
percevalw committed Jul 18, 2024
1 parent fc88947 commit d30b7d7
Showing 1 changed file with 2 additions and 5 deletions.
7 changes: 2 additions & 5 deletions edsnlp/processing/multiprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,11 +458,8 @@ def read_tasks():
expect_new_tasks = True

while expect_new_tasks or len(active_batches) > 0:
# Check that there are no more than `chunk_size` docs being processed.
# If there is still room, we can process new batches
has_room_for_new_batches = (
sum(len(ab[0]) for ab in active_batches.values()) < lc.chunk_size
)
# Check that there are no more than a few batches active (say 8)
has_room_for_new_batches = len(active_batches) < 8

# if new_batch_iterator is not None and len(active_batches) == 0:
# yield next(new_batch_iterator)
Expand Down

0 comments on commit d30b7d7

Please sign in to comment.