Skip to content

Commit

Permalink
fix: Add dataset_probabilities
Browse files Browse the repository at this point in the history
  • Loading branch information
saattrupdan committed Oct 22, 2024
1 parent 2ffd1d4 commit 1b77ef9
Showing 1 changed file with 29 additions and 0 deletions.
29 changes: 29 additions & 0 deletions makefile
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ roest-315m: ## Train the Røst-315M model
src/scripts/finetune_asr_model.py \
model=wav2vec2-small \
datasets=[coral,common_voice_17] \
dataset_probabilities=[0.95,0.05] \
decoder_datasets=[wikipedia,common_voice,reddit] \
push_to_hub=true \
dataloader_num_workers=4 \
Expand All @@ -137,9 +138,37 @@ roest-1b: ## Train the Røst-1B model
src/scripts/finetune_asr_model.py \
model=wav2vec2-medium \
datasets=[coral,common_voice_17] \
dataset_probabilities=[0.95,0.05] \
decoder_datasets=[wikipedia,common_voice,reddit] \
push_to_hub=true \
dataloader_num_workers=4 \
model_id=roest-1b \
private=true \
per_device_batch_size=64

roest-1.5b: ## Train the Røst-1.5B model
@accelerate launch \
--use-deepspeed \
src/scripts/finetune_asr_model.py \
model=whisper-large \
datasets=[coral,common_voice_17] \
dataset_probabilities=[0.95,0.05] \
push_to_hub=true \
dataloader_num_workers=4 \
model_id=roest-1.5b \
private=true \
per_device_batch_size=64

roest-2b: ## Train the Røst-2B model
@accelerate launch \
--use-deepspeed \
src/scripts/finetune_asr_model.py \
model=wav2vec2-large \
datasets=[coral,common_voice_17] \
dataset_probabilities=[0.95,0.05] \
decoder_datasets=[wikipedia,common_voice,reddit] \
push_to_hub=true \
dataloader_num_workers=4 \
model_id=roest-2b \
private=true \
per_device_batch_size=64

0 comments on commit 1b77ef9

Please sign in to comment.