Skip to content

Commit

Permalink
feat: Example log controller yaml with training state (#296)
Browse files Browse the repository at this point in the history
Signed-off-by: Padmanabha V Seshadri <[email protected]>
  • Loading branch information
seshapad authored Aug 14, 2024
1 parent 78909af commit 2d1c17c
Showing 1 changed file with 33 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
controller_metrics:
- name: trainer_state
class: TrainingState
- name: training_loss
class: Loss
- name: per_process_state
class: PerProcessState
operations:
- name: logger_on_log
class: LogControl
arguments:
log_format: 'Epoch: {trainer_state[epoch]:.0f}, Step: {trainer_state[global_step]}, Rank: {per_process_state[rank]}, loss = {training_loss}'
log_level: info
- name: logger_on_save
class: LogControl
arguments:
log_format: |
Saving model in huggingface format at step: {trainer_state[global_step]}
Model saved in {args.output_dir}/checkpoint-{trainer_state[global_step]}
log_level: info
controllers:
- name: ctrl-on-log-format
triggers:
- on_log
rule: 'training_loss != None'
operations:
- logger_on_log.should_log
- name: ctrl-on-save-format
triggers:
- on_save
rule: 'True'
operations:
- logger_on_save.should_log

0 comments on commit 2d1c17c

Please sign in to comment.