Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(bin): log to stdout in cli tools #217

Merged
merged 2 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion TTS/bin/compute_attention_masks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import importlib
import logging
import os
import sys
from argparse import RawTextHelpFormatter

import numpy as np
Expand All @@ -18,7 +19,7 @@
from TTS.utils.generic_utils import ConsoleFormatter, setup_logger

if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())

# pylint: disable=bad-option-value
parser = argparse.ArgumentParser(
Expand Down
3 changes: 2 additions & 1 deletion TTS/bin/compute_embeddings.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import argparse
import logging
import os
import sys
from argparse import RawTextHelpFormatter

import torch
Expand Down Expand Up @@ -102,7 +103,7 @@ def compute_embeddings(


if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())

parser = argparse.ArgumentParser(
description="""Compute embedding vectors for each audio file in a dataset and store them keyed by `{dataset_name}#{file_path}` in a .pth file\n\n"""
Expand Down
3 changes: 2 additions & 1 deletion TTS/bin/compute_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import glob
import logging
import os
import sys

import numpy as np
from tqdm import tqdm
Expand All @@ -18,7 +19,7 @@

def main():
"""Run preprocessing process."""
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stderr, formatter=ConsoleFormatter())

parser = argparse.ArgumentParser(description="Compute mean and variance of spectrogtram features.")
parser.add_argument("config_path", type=str, help="TTS config file path to define audio processin parameters.")
Expand Down
3 changes: 2 additions & 1 deletion TTS/bin/eval_encoder.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import argparse
import logging
import sys
from argparse import RawTextHelpFormatter

import torch
Expand Down Expand Up @@ -53,7 +54,7 @@ def compute_encoder_accuracy(dataset_items, encoder_manager):


if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())

parser = argparse.ArgumentParser(
description="""Compute the accuracy of the encoder.\n\n"""
Expand Down
3 changes: 2 additions & 1 deletion TTS/bin/extract_tts_spectrograms.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import argparse
import logging
import os
import sys

import numpy as np
import torch
Expand Down Expand Up @@ -273,7 +274,7 @@ def main(args): # pylint: disable=redefined-outer-name


if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())

parser = argparse.ArgumentParser()
parser.add_argument("--config_path", type=str, help="Path to config file for training.", required=True)
Expand Down
3 changes: 2 additions & 1 deletion TTS/bin/find_unique_chars.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import argparse
import logging
import sys
from argparse import RawTextHelpFormatter

from TTS.config import load_config
Expand All @@ -10,7 +11,7 @@


def main():
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())

# pylint: disable=bad-option-value
parser = argparse.ArgumentParser(
Expand Down
3 changes: 2 additions & 1 deletion TTS/bin/find_unique_phonemes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import argparse
import logging
import multiprocessing
import sys
from argparse import RawTextHelpFormatter

from tqdm.contrib.concurrent import process_map
Expand All @@ -20,7 +21,7 @@ def compute_phonemes(item):


def main():
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())

# pylint: disable=W0601
global c, phonemizer
Expand Down
3 changes: 2 additions & 1 deletion TTS/bin/remove_silence_using_vad.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import multiprocessing
import os
import pathlib
import sys

import torch
from tqdm import tqdm
Expand Down Expand Up @@ -77,7 +78,7 @@ def preprocess_audios():


if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())

parser = argparse.ArgumentParser(
description="python TTS/bin/remove_silence_using_vad.py -i=VCTK-Corpus/ -o=VCTK-Corpus-removed-silence/ -g=wav48_silence_trimmed/*/*_mic1.flac --trim_just_beginning_and_end"
Expand Down
3 changes: 2 additions & 1 deletion TTS/bin/synthesize.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,8 +311,9 @@ def parse_args() -> argparse.Namespace:

def main() -> None:
"""Entry point for `tts` command line interface."""
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
args = parse_args()
stream = sys.stderr if args.pipe_out else sys.stdout
setup_logger("TTS", level=logging.INFO, stream=stream, formatter=ConsoleFormatter())

pipe_out = sys.stdout if args.pipe_out else None

Expand Down
2 changes: 1 addition & 1 deletion TTS/bin/train_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ def main(args): # pylint: disable=redefined-outer-name


if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())

args, c, OUT_PATH, AUDIO_PATH, c_logger, dashboard_logger = init_training()

Expand Down
3 changes: 2 additions & 1 deletion TTS/bin/train_tts.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
import os
import sys
from dataclasses import dataclass, field

from trainer import Trainer, TrainerArgs
Expand All @@ -17,7 +18,7 @@ class TrainTTSArgs(TrainerArgs):

def main():
"""Run `tts` model training directly by a `config.json` file."""
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())

# init trainer args
train_args = TrainTTSArgs()
Expand Down
3 changes: 2 additions & 1 deletion TTS/bin/train_vocoder.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
import os
import sys
from dataclasses import dataclass, field

from trainer import Trainer, TrainerArgs
Expand All @@ -18,7 +19,7 @@ class TrainVocoderArgs(TrainerArgs):

def main():
"""Run `tts` model training directly by a `config.json` file."""
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())

# init trainer args
train_args = TrainVocoderArgs()
Expand Down
3 changes: 2 additions & 1 deletion TTS/bin/tune_wavegrad.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import argparse
import logging
import sys
from itertools import product as cartesian_product

import numpy as np
Expand All @@ -17,7 +18,7 @@
from TTS.vocoder.models import setup_model

if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())

parser = argparse.ArgumentParser()
parser.add_argument("--model_path", type=str, help="Path to model checkpoint.")
Expand Down
2 changes: 1 addition & 1 deletion TTS/encoder/utils/prepare_voxceleb.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def processor(directory, subset, force_process):


if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
if len(sys.argv) != 4:
print("Usage: python prepare_data.py save_directory user password")
sys.exit()
Expand Down
2 changes: 1 addition & 1 deletion TTS/server/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from TTS.utils.synthesizer import Synthesizer

logger = logging.getLogger(__name__)
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())


def create_argparser() -> argparse.ArgumentParser:
Expand Down
24 changes: 17 additions & 7 deletions TTS/utils/generic_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
import datetime
import importlib
import logging
import os
import re
from pathlib import Path
from typing import Any, Callable, Dict, Optional, TypeVar, Union
from typing import Any, Callable, Dict, Optional, TextIO, TypeVar, Union

import torch
from packaging.version import Version
Expand Down Expand Up @@ -107,25 +108,34 @@ def setup_logger(
level: int = logging.INFO,
*,
formatter: Optional[logging.Formatter] = None,
screen: bool = False,
tofile: bool = False,
log_dir: str = "logs",
stream: Optional[TextIO] = None,
log_dir: Optional[Union[str, os.PathLike[Any]]] = None,
log_name: str = "log",
) -> None:
"""Set up a logger.

Args:
logger_name: Name of the logger to set up
level: Logging level
formatter: Formatter for the logger
stream: Add a StreamHandler for the given stream, e.g. sys.stderr or sys.stdout
log_dir: Folder to write the log file (no file created if None)
log_name: Prefix of the log file name
"""
lg = logging.getLogger(logger_name)
if formatter is None:
formatter = logging.Formatter(
"%(asctime)s.%(msecs)03d - %(levelname)-8s - %(name)s: %(message)s", datefmt="%y-%m-%d %H:%M:%S"
)
lg.setLevel(level)
if tofile:
if log_dir is not None:
Path(log_dir).mkdir(exist_ok=True, parents=True)
log_file = Path(log_dir) / f"{log_name}_{get_timestamp()}.log"
fh = logging.FileHandler(log_file, mode="w")
fh.setFormatter(formatter)
lg.addHandler(fh)
if screen:
sh = logging.StreamHandler()
if stream is not None:
sh = logging.StreamHandler(stream)
sh.setFormatter(formatter)
lg.addHandler(sh)

Expand Down
18 changes: 12 additions & 6 deletions docs/source/models/xtts.md
Original file line number Diff line number Diff line change
Expand Up @@ -163,12 +163,13 @@ from TTS.api import TTS
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to("cuda")

# generate speech by cloning a voice using default settings
tts.tts_to_file(text="It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
file_path="output.wav",
speaker="Ana Florence",
language="en",
split_sentences=True
)
tts.tts_to_file(
text="It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
file_path="output.wav",
speaker="Ana Florence",
language="en",
split_sentences=True
)
```


Expand Down Expand Up @@ -230,6 +231,11 @@ out = model.inference(
torchaudio.save("xtts.wav", torch.tensor(out["wav"]).unsqueeze(0), 24000)
```

You can also use the Coqui speakers:

```python
gpt_cond_latent, speaker_embedding = model.speaker_manager.speakers["Ana Florence"].values()
```

#### Streaming manually

Expand Down
Loading