You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I'm now try to use it training JSUT dataset. The preprocess.py is worked well( I get the extracted features (mel-spectrograms and linear spectrograms)). But when I run the train.py , Error occurs:
Note that I copy the preset of "en" and rename it "deepvoice3_jp.json" and use it in the command of preporcess.py and train.py. I also set the frontend = "jp" both in .json and hparams.py.
My command is : python train.py --preset=presets/deepvoice3_jp.json --data-root=./preprocess_result/ [Errors Message]:
Command line args:
{'--checkpoint': None,
'--checkpoint-dir': 'checkpoints',
'--checkpoint-postnet': None,
'--checkpoint-seq2seq': None,
'--data-root': './preprocess_result/',
'--help': False,
'--hparams': '',
'--load-embedding': None,
'--log-event-path': None,
'--preset': 'presets/deepvoice3_jp.json',
'--reset-optimizer': False,
'--restore-parts': None,
'--speaker-id': None,
'--train-postnet-only': False,
'--train-seq2seq-only': False}
Training whole model
Training seq2seq model
Hyperparameters:
adam_beta1: 0.5
adam_beta2: 0.9
adam_eps: 1e-06
allow_clipping_in_normalization: True
amsgrad: False
batch_size: 8
binary_divergence_weight: 0.1
builder: deepvoice3
checkpoint_interval: 10000
clip_thresh: 0.1
converter_channels: 256
decoder_channels: 256
downsample_step: 4
dropout: 0.050000000000000044
embedding_weight_std: 0.1
encoder_channels: 512
eval_interval: 10000
fft_size: 1024
fmax: 7600
fmin: 125
force_monotonic_attention: True
freeze_embedding: False
frontend: jp
guided_attention_sigma: 0.4
hop_size: 256
ignore_recognition_level: 2
initial_learning_rate: 0.0005
kernel_size: 3
key_position_rate: 7.6
key_projection: True
lr_schedule: noam_learning_rate_decay
lr_schedule_kwargs: {}
masked_loss_weight: 0.5
max_positions: 512
min_level_db: -100
min_text: 20
n_speakers: 1
name: deepvoice3
nepochs: 2000
num_mels: 80
num_workers: 2
outputs_per_step: 1
padding_idx: 0
pin_memory: True
power: 1.4
preemphasis: 0.97
priority_freq: 3000
priority_freq_weight: 0.0
process_only_htk_aligned: False
query_position_rate: 2.0
ref_level_db: 20
replace_pronunciation_prob: 0.5
rescaling: False
rescaling_max: 0.999
sample_rate: 22050
save_optimizer_state: True
speaker_embed_dim: 16
speaker_embedding_weight_std: 0.05
text_embed_dim: 256
trainable_positional_encodings: False
use_decoder_state_for_postnet_input: True
use_guided_attention: True
use_memory_mask: True
value_projection: True
weight_decay: 0.0
window_ahead: 3
window_backward: 1
Log event path: log/run-test2020-08-03_12:52:24.910593
0it [00:00, ?it/s]
Traceback (most recent call last):
File "train.py", line 1016, in
train_seq2seq=train_seq2seq, train_postnet=train_postnet)
File "train.py", line 623, in train
in tqdm(enumerate(data_loader)):
File "/root/anaconda3/envs/pytorch/lib/python3.6/site-packages/tqdm/std.py", line 1130, in iter
for obj in iterable:
File "/root/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 345, in next
data = self._next_data()
File "/root/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 856, in _next_data
return self._process_data(data)
File "/root/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 881, in _process_data
data.reraise()
File "/root/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/_utils.py", line 394, in reraise
raise self.exc_type(msg)
ValueError: Caught ValueError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/root/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/utils/data/_utils/worker.py", line 178, in _worker_loop
data = fetcher.fetch(index)
File "/root/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/root/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/utils/data/_utils/fetch.py", line 44, in
data = [self.dataset[idx] for idx in possibly_batched_index]
File "train.py", line 255, in getitem
return self.X[idx], self.Mel[idx], self.Y[idx]
File "/root/anaconda3/envs/pytorch/lib/python3.6/site-packages/nnmnkwii/datasets/init.py", line 146, in getitem
return self.__collect_features(paths)
File "/root/anaconda3/envs/pytorch/lib/python3.6/site-packages/nnmnkwii/datasets/init.py", line 131, in _collect_features
return self.file_data_source.collect_features(*paths)
File "train.py", line 134, in collect_features
seq = frontend.text_to_sequence(text, p=hparams.replace_pronunciation_prob)
File "/root/AI/deepvoice3_pytorch/deepvoice3_pytorch/frontend/jp/init.py", line 71, in text_to_sequence
text = mix_pronunciation(text, p)
File "/root/AI/deepvoice3_pytorch/deepvoice3_pytorch/frontend/jp/init.py", line 42, in mix_pronunciation
tokens, yomis = _yomi(tagger.parse(text))
File "/root/AI/deepvoice3_pytorch/deepvoice3_pytorch/frontend/jp/init.py", line 22, in _yomi
token, rest = s ValueError: too many values to unpack (expected 2)
How does this problem occurs? Thanks and waiting for your reply!
The text was updated successfully, but these errors were encountered:
hi, Thanks for you implement of deepvoice3!
I'm now try to use it training JSUT dataset. The preprocess.py is worked well( I get the extracted features (mel-spectrograms and linear spectrograms)). But when I run the train.py , Error occurs:
Note that I copy the preset of "en" and rename it "deepvoice3_jp.json" and use it in the command of preporcess.py and train.py. I also set the frontend = "jp" both in .json and hparams.py.
My command is : python train.py --preset=presets/deepvoice3_jp.json --data-root=./preprocess_result/
[Errors Message]:
Command line args:
{'--checkpoint': None,
'--checkpoint-dir': 'checkpoints',
'--checkpoint-postnet': None,
'--checkpoint-seq2seq': None,
'--data-root': './preprocess_result/',
'--help': False,
'--hparams': '',
'--load-embedding': None,
'--log-event-path': None,
'--preset': 'presets/deepvoice3_jp.json',
'--reset-optimizer': False,
'--restore-parts': None,
'--speaker-id': None,
'--train-postnet-only': False,
'--train-seq2seq-only': False}
Training whole model
Training seq2seq model
Hyperparameters:
adam_beta1: 0.5
adam_beta2: 0.9
adam_eps: 1e-06
allow_clipping_in_normalization: True
amsgrad: False
batch_size: 8
binary_divergence_weight: 0.1
builder: deepvoice3
checkpoint_interval: 10000
clip_thresh: 0.1
converter_channels: 256
decoder_channels: 256
downsample_step: 4
dropout: 0.050000000000000044
embedding_weight_std: 0.1
encoder_channels: 512
eval_interval: 10000
fft_size: 1024
fmax: 7600
fmin: 125
force_monotonic_attention: True
freeze_embedding: False
frontend: jp
guided_attention_sigma: 0.4
hop_size: 256
ignore_recognition_level: 2
initial_learning_rate: 0.0005
kernel_size: 3
key_position_rate: 7.6
key_projection: True
lr_schedule: noam_learning_rate_decay
lr_schedule_kwargs: {}
masked_loss_weight: 0.5
max_positions: 512
min_level_db: -100
min_text: 20
n_speakers: 1
name: deepvoice3
nepochs: 2000
num_mels: 80
num_workers: 2
outputs_per_step: 1
padding_idx: 0
pin_memory: True
power: 1.4
preemphasis: 0.97
priority_freq: 3000
priority_freq_weight: 0.0
process_only_htk_aligned: False
query_position_rate: 2.0
ref_level_db: 20
replace_pronunciation_prob: 0.5
rescaling: False
rescaling_max: 0.999
sample_rate: 22050
save_optimizer_state: True
speaker_embed_dim: 16
speaker_embedding_weight_std: 0.05
text_embed_dim: 256
trainable_positional_encodings: False
use_decoder_state_for_postnet_input: True
use_guided_attention: True
use_memory_mask: True
value_projection: True
weight_decay: 0.0
window_ahead: 3
window_backward: 1
Log event path: log/run-test2020-08-03_12:52:24.910593
0it [00:00, ?it/s]
Traceback (most recent call last):
File "train.py", line 1016, in
train_seq2seq=train_seq2seq, train_postnet=train_postnet)
File "train.py", line 623, in train
in tqdm(enumerate(data_loader)):
File "/root/anaconda3/envs/pytorch/lib/python3.6/site-packages/tqdm/std.py", line 1130, in iter
for obj in iterable:
File "/root/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 345, in next
data = self._next_data()
File "/root/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 856, in _next_data
return self._process_data(data)
File "/root/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 881, in _process_data
data.reraise()
File "/root/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/_utils.py", line 394, in reraise
raise self.exc_type(msg)
ValueError: Caught ValueError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/root/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/utils/data/_utils/worker.py", line 178, in _worker_loop
data = fetcher.fetch(index)
File "/root/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/root/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/utils/data/_utils/fetch.py", line 44, in
data = [self.dataset[idx] for idx in possibly_batched_index]
File "train.py", line 255, in getitem
return self.X[idx], self.Mel[idx], self.Y[idx]
File "/root/anaconda3/envs/pytorch/lib/python3.6/site-packages/nnmnkwii/datasets/init.py", line 146, in getitem
return self.__collect_features(paths)
File "/root/anaconda3/envs/pytorch/lib/python3.6/site-packages/nnmnkwii/datasets/init.py", line 131, in _collect_features
return self.file_data_source.collect_features(*paths)
File "train.py", line 134, in collect_features
seq = frontend.text_to_sequence(text, p=hparams.replace_pronunciation_prob)
File "/root/AI/deepvoice3_pytorch/deepvoice3_pytorch/frontend/jp/init.py", line 71, in text_to_sequence
text = mix_pronunciation(text, p)
File "/root/AI/deepvoice3_pytorch/deepvoice3_pytorch/frontend/jp/init.py", line 42, in mix_pronunciation
tokens, yomis = _yomi(tagger.parse(text))
File "/root/AI/deepvoice3_pytorch/deepvoice3_pytorch/frontend/jp/init.py", line 22, in _yomi
token, rest = s
ValueError: too many values to unpack (expected 2)
How does this problem occurs? Thanks and waiting for your reply!
The text was updated successfully, but these errors were encountered: