Skip to content

Commit

Permalink
Add train finished run event (#2714)
Browse files Browse the repository at this point in the history
* adding train finished

* adding train finished

* moved mosaic logger before RUD

* added comment about RUD
  • Loading branch information
jjanezhang authored Nov 16, 2023
1 parent 83a40f5 commit 3cf73cc
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 5 deletions.
2 changes: 2 additions & 0 deletions composer/loggers/mosaicml_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ def epoch_end(self, state: State, logger: Logger) -> None:
self._flush_metadata()

def fit_end(self, state: State, logger: Logger) -> None:
# Log model training finished time for run events
self._log_metadata({'train_finished_time': time.time()})
training_progress_data = self._get_training_progress_metrics(state)
log.debug(f'\nLogging FINAL training progress data to metadata:\n{dict_to_str(training_progress_data)}')
self._log_metadata(training_progress_data)
Expand Down
11 changes: 6 additions & 5 deletions composer/trainer/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1078,18 +1078,19 @@ def __init__(
loggers.append(
ConsoleLogger(stream=console_stream, log_interval=console_log_interval, log_traces=log_traces))

if save_folder is not None:
remote_ud = maybe_create_remote_uploader_downloader_from_uri(save_folder, loggers)
if remote_ud is not None:
loggers.append(remote_ud)

# MosaicML Logger
# Keep MosaicML logger above the RemoteUploaderDownloader so that fit end is reported before the final checkpoint begins uploading
if os.environ.get(MOSAICML_PLATFORM_ENV_VAR, 'false').lower() == 'true' and os.environ.get(
MOSAICML_ACCESS_TOKEN_ENV_VAR) is not None and not any(isinstance(x, MosaicMLLogger) for x in loggers):
log.info('Detected run on MosaicML platform. Adding MosaicMLLogger to loggers.')
mosaicml_logger = MosaicMLLogger()
loggers.append(mosaicml_logger)

if save_folder is not None:
remote_ud = maybe_create_remote_uploader_downloader_from_uri(save_folder, loggers)
if remote_ud is not None:
loggers.append(remote_ud)

# Logger
self.logger = Logger(state=self.state, destinations=loggers)

Expand Down
1 change: 1 addition & 0 deletions tests/loggers/test_mosaicml_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ def test_run_events_logged(monkeypatch):
assert 'mosaicml/training_progress' in metadata
assert metadata['mosaicml/training_progress'] == '[batch=4/4]'
assert 'mosaicml/training_sub_progress' not in metadata
assert isinstance(metadata['mosaicml/train_finished_time'], float)


def test_token_training_progress_metrics():
Expand Down

0 comments on commit 3cf73cc

Please sign in to comment.