From 4af533128199f88d87a4c1aa40a61306d3aabeee Mon Sep 17 00:00:00 2001 From: Jeff Kinnison Date: Tue, 26 Sep 2023 18:01:20 -0400 Subject: [PATCH] fix: The final batch of an epoch is skipped when batch size is 1 (#3653) Co-authored-by: Justin Zhao --- ludwig/data/batcher/random_access.py | 9 +++++++-- tests/integration_tests/test_api.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/ludwig/data/batcher/random_access.py b/ludwig/data/batcher/random_access.py index 91c53b9eacc..108f4247c2d 100644 --- a/ludwig/data/batcher/random_access.py +++ b/ludwig/data/batcher/random_access.py @@ -63,6 +63,10 @@ def next_batch(self): return sub_batch def last_batch(self): + """Returns whether we've exhausted all batches for this epoch. + + If False, then there is at least 1 more batch available with next_batch(). + """ # If our current index in the dataset exceeds the size of the dataset, # we've finished the epoch and can indicate that this is the last batch if self.index >= self.total_size: @@ -71,8 +75,9 @@ def last_batch(self): # For e.g., batch size = 128 but the dataset only has 100 rows. elif self.ignore_last and self.step: # index += batch_size after each epoch. So, if our current index in total dataset is 1 less than the total - # dataset size, then the last batch will only have 1 row. Drop it if this happens. - if self.index - self.total_size == -1: + # dataset size, then the last batch will only have 1 row. + # If this happens, we drop the last batch, unless batch_size is 1. + if self.batch_size > 1 and self.index - self.total_size == -1: logger.info("Last batch in epoch only has 1 sample and will be dropped.") return True return False diff --git a/tests/integration_tests/test_api.py b/tests/integration_tests/test_api.py index 232217ab7f4..ec556348e04 100644 --- a/tests/integration_tests/test_api.py +++ b/tests/integration_tests/test_api.py @@ -589,6 +589,34 @@ def test_api_callbacks_fixed_train_steps(tmpdir, csv_filename): assert mock_callback.on_epoch_start.call_count == 10 +def test_api_callbacks_batch_size_1(tmpdir, csv_filename): + epochs = 2 + batch_size = 1 + num_examples = 80 + mock_callback = mock.Mock(wraps=Callback()) + + input_features = [sequence_feature(encoder={"reduce_output": "sum"})] + output_features = [category_feature(decoder={"vocab_size": 5}, reduce_input="sum")] + config = { + "input_features": input_features, + "output_features": output_features, + "combiner": {"type": "concat", "output_size": 14}, + TRAINER: {"epochs": epochs, "batch_size": batch_size}, + } + model = LudwigModel(config, callbacks=[mock_callback]) + model.train( + training_set=generate_data( + input_features, output_features, os.path.join(tmpdir, csv_filename), num_examples=num_examples + ) + ) + + # There are exactly 2 epoch starts, even with batch_size = 1. + assert mock_callback.on_epoch_start.call_count == 2 + assert mock_callback.on_epoch_end.call_count == 2 + assert mock_callback.on_batch_start.call_count == 160 + assert mock_callback.on_batch_end.call_count == 160 + + def test_api_callbacks_fixed_train_steps_less_than_one_epoch(tmpdir, csv_filename): # If train_steps is set manually, epochs is ignored. train_steps = total_batches = 6