ming024 · osoblanco · Dec 23, 2021 · Dec 24, 2021 · Dec 24, 2021 · Dec 24, 2021
diff --git a/README.md b/README.md
@@ -119,19 +119,27 @@ python3 train.py -p config/LJSpeech/preprocess.yaml -m config/LJSpeech/model.yam
 
 The model takes less than 10k steps (less than 1 hour on my GTX1080Ti GPU) of training to generate audio samples with acceptable quality, which is much more efficient than the autoregressive models such as Tacotron2.
 
-# TensorBoard
+# Visualizing insights with Aim
 
-Use
-```
-tensorboard --logdir output/log/LJSpeech
-```
+[Aim](https://github.com/aimhubio/aim) is an open-source experiment tracker that logs your training runs, enables a beautiful UI to compare them and an API to query them programmatically.
+
+Execute `aim up` to run Aim UI on your server.
+
+**View all tracked runs, each metric last tracked values and tracked hyperparameters in Runs Dashboard:**
+
+<img width="1579" alt="Screen Shot 2022-03-02 at 19 41 11" src="https://user-images.githubusercontent.com/13848158/156395419-5b9659c7-dbad-4a8c-a557-660a8315bcd8.png">
+
+**Compare loss curves with Metrics Explorer - group and aggregate by any hyperparameter to easily compare the runs:**
+
+<img width="1576" alt="Screen Shot 2022-03-02 at 19 43 56" src="https://user-images.githubusercontent.com/13848158/156396025-a82987b5-9da2-497b-8fe6-9b6b7c1acf6a.png">
+
+**Compare and debug spectograms across training runs via Images Explorer:**
+
+<img width="1580" alt="Screen Shot 2022-03-02 at 21 12 26" src="https://user-images.githubusercontent.com/13848158/156412758-28c395de-6571-4b41-be7e-77d120b99e7e.png">
 
-to serve TensorBoard on your localhost.
-The loss curves, synthesized mel-spectrograms, and audios are shown.
+**Deeply explore results of each run in single page:**
 
-![](./img/tensorboard_loss.png)
-![](./img/tensorboard_spec.png)
-![](./img/tensorboard_audio.png)
+<img width="1580" alt="Screen Shot 2022-03-02 at 21 14 37" src="https://user-images.githubusercontent.com/13848158/156413072-cad16296-3a87-454c-9b37-e223c051b775.png">
 
 # Implementation Issues
 

diff --git a/config/LJSpeech/preprocess.yaml b/config/LJSpeech/preprocess.yaml
@@ -1,7 +1,7 @@
 dataset: "LJSpeech"
 
 path:
-  corpus_path: "/home/ming/Data/LJSpeech-1.1"
+  corpus_path: "./data/LJSpeech-1.1"
   lexicon_path: "lexicon/librispeech-lexicon.txt"
   raw_path: "./raw_data/LJSpeech"
   preprocessed_path: "./preprocessed_data/LJSpeech"

diff --git a/config/LJSpeech/train.yaml b/config/LJSpeech/train.yaml
@@ -15,6 +15,6 @@ optimizer:
 step:
   total_step: 900000
   log_step: 100
-  synth_step: 1000
+  synth_step: 100
   val_step: 1000
-  save_step: 100000
+  save_step: 10000
diff --git a/config/LJSpeech_paper/preprocess.yaml b/config/LJSpeech_paper/preprocess.yaml
@@ -1,7 +1,7 @@
 dataset: "LJSpeech_paper"
 
 path:
-  corpus_path: "/home/ming/Data/LJSpeech-1.1"
+  corpus_path: "./data/LJSpeech-1.1"
   lexicon_path: "lexicon/librispeech-lexicon.txt"
   raw_path: "./raw_data/LJSpeech"
   preprocessed_path: "./preprocessed_data/LJSpeech_paper"

diff --git a/model/modules.py b/model/modules.py
@@ -121,7 +121,7 @@ def forward(
             x = x + pitch_embedding
         if self.energy_feature_level == "phoneme_level":
             energy_prediction, energy_embedding = self.get_energy_embedding(
-                x, energy_target, src_mask, p_control
+                x, energy_target, src_mask, e_control
             )
             x = x + energy_embedding
 
@@ -143,7 +143,7 @@ def forward(
             x = x + pitch_embedding
         if self.energy_feature_level == "frame_level":
             energy_prediction, energy_embedding = self.get_energy_embedding(
-                x, energy_target, mel_mask, p_control
+                x, energy_target, mel_mask, e_control
             )
             x = x + energy_embedding
 

diff --git a/preprocessed_data/LJSpeech/stats.json b/preprocessed_data/LJSpeech/stats.json
@@ -1 +1 @@
-{"pitch": [-2.917079304729967, 11.391254536985784, 207.6309860026605, 46.77559025098988], "energy": [-1.431044578552246, 8.184337615966797, 37.32621679053821, 26.044180782835863]}
+{"pitch": [-2.879620383246412, 10.701647317596422, 207.4988362093985, 46.75837075025294], "energy": [-1.4311870336532593, 8.16907787322998, 37.39676954370247, 26.08419376725949]}
diff --git a/preprocessed_data/LJSpeech/train.txt b/preprocessed_data/LJSpeech/train.txt
diff --git a/preprocessed_data/LJSpeech/val.txt b/preprocessed_data/LJSpeech/val.txt
diff --git a/preprocessor/preprocessor.py b/preprocessor/preprocessor.py
@@ -66,7 +66,7 @@ def build_from_path(self):
         speakers = {}
         for i, speaker in enumerate(tqdm(os.listdir(self.in_dir))):
             speakers[speaker] = i
-            for wav_name in os.listdir(os.path.join(self.in_dir, speaker)):
+            for wav_name in tqdm(os.listdir(os.path.join(self.in_dir, speaker))):
                 if ".wav" not in wav_name:
                     continue
 

diff --git a/track_utils.py b/track_utils.py
@@ -0,0 +1,34 @@
+import torch
+import torch
+from torch.onnx import utils as trace_utils
+import numpy as np
+from typing import List
+import PIL
+
+def fig_to_img(fig):
+    import io
+    buf = io.BytesIO()
+    fig.savefig(buf)
+    buf.seek(0)
+    return PIL.Image.open(buf)
+
+
+
+def track_model_graph(model) -> dict:
+    """Tracking Model Graph Data Meta-information
+        Arguments:
+            model (Torch-model): The model being tacked
+        Returns:
+            dict:
+    """
+    model_metadata = {}
+    try:
+
+        for param_tensor in model.state_dict():
+            model_metadata[param_tensor] =  model.state_dict()[param_tensor].size()
+
+
+    except RuntimeError as e:
+        print("Unable to track model graph and hyperparams with error: ",e)
+        return model_metadata
+    return model_metadata
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		{"pitch": [-2.917079304729967, 11.391254536985784, 207.6309860026605, 46.77559025098988], "energy": [-1.431044578552246, 8.184337615966797, 37.32621679053821, 26.044180782835863]}
		{"pitch": [-2.879620383246412, 10.701647317596422, 207.4988362093985, 46.75837075025294], "energy": [-1.4311870336532593, 8.16907787322998, 37.39676954370247, 26.08419376725949]}