Skip to content

Commit

Permalink
Merge branch 'master' into tts-emilia
Browse files Browse the repository at this point in the history
  • Loading branch information
pzelasko authored Oct 21, 2024
2 parents 7c6aa94 + a30720b commit ae81d19
Show file tree
Hide file tree
Showing 7 changed files with 667 additions and 0 deletions.
2 changes: 2 additions & 0 deletions docs/corpus.rst
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ a CLI tool that create the manifests given a corpus directory.
- :func:`lhotse.recipes.prepare_fisher_english`
* - Fisher Spanish
- :func:`lhotse.recipes.prepare_fisher_spanish`
* - FLEURS
- :func:`lhotse.recipes.prepare_fleurs`
* - Fluent Speech Commands
- :func:`lhotse.recipes.slu`
* - GALE Arabic Broadcast Speech
Expand Down
2 changes: 2 additions & 0 deletions lhotse/bin/modes/recipes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from .eval2000 import *
from .fisher_english import *
from .fisher_spanish import *
from .fleurs import *
from .gale_arabic import *
from .gale_mandarin import *
from .gigaspeech import *
Expand Down Expand Up @@ -66,6 +67,7 @@
from .nsc import *
from .peoples_speech import *
from .primewords import *
from .radio import *
from .reazonspeech import *
from .rir_noise import *
from .sbcsae import *
Expand Down
68 changes: 68 additions & 0 deletions lhotse/bin/modes/recipes/fleurs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from typing import Optional, Sequence, Union

import click

from lhotse.bin.modes import download, prepare
from lhotse.recipes.fleurs import download_fleurs, prepare_fleurs
from lhotse.utils import Pathlike

__all__ = ["fleurs"]


@prepare.command(context_settings=dict(show_default=True))
@click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
@click.argument("output_dir", type=click.Path())
@click.option(
"-j",
"--num-jobs",
type=int,
default=1,
help="How many threads to use (can give good speed-ups with slow disks).",
)
@click.option(
"-l",
"--lang",
multiple=True,
default=["all"],
help="Specify which languages to prepare, e.g., "
" lhoste prepare librispeech mtedx_corpus data -l de -l fr -l es ",
)
def fleurs(
corpus_dir: Pathlike,
output_dir: Pathlike,
num_jobs: int,
lang: Optional[Union[str, Sequence[str]]],
):
"""Fleurs ASR data preparation."""
prepare_fleurs(corpus_dir, output_dir=output_dir, num_jobs=num_jobs, languages=lang)


@download.command(context_settings=dict(show_default=True))
@click.argument("target_dir", type=click.Path())
@click.option(
"-l",
"--lang",
multiple=True,
default=["all"],
help="Specify which languages to download, e.g., "
" lhotse download fleurs . -l hi_in -l en_us "
" lhotse download fleurs",
)
@click.option(
"--force-download",
type=bool,
is_flag=True,
default=False,
help="Specify whether to overwrite an existing archive",
)
def fleurs(
target_dir: Pathlike,
lang: Optional[Union[str, Sequence[str]]],
force_download: bool = False,
):
"""FLEURS download."""
download_fleurs(
target_dir,
languages=lang,
force_download=force_download,
)
41 changes: 41 additions & 0 deletions lhotse/bin/modes/recipes/radio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from typing import List, Optional, Sequence, Tuple, Union

import click

from lhotse.bin.modes import prepare
from lhotse.recipes.radio import prepare_radio
from lhotse.utils import Pathlike

__all__ = ["radio"]


@prepare.command(context_settings=dict(show_default=True))
@click.argument("corpus_dir", type=click.Path(dir_okay=True))
@click.argument("output_dir", type=click.Path(dir_okay=True))
@click.option(
"-d",
"--min-seg-dur",
type=float,
default=0.5,
help="The minimum segment duration",
)
@click.option(
"-j",
"--num-jobs",
type=int,
default=4,
help="The number of parallel threads to use for data preparation",
)
def radio(
corpus_dir: Pathlike,
output_dir: Pathlike,
min_seg_dur: float = 0.5,
num_jobs: int = 4,
):
"""Data preparation"""
prepare_radio(
corpus_dir,
output_dir=output_dir,
num_jobs=num_jobs,
min_segment_duration=min_seg_dur,
)
5 changes: 5 additions & 0 deletions lhotse/recipes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from .eval2000 import prepare_eval2000
from .fisher_english import prepare_fisher_english
from .fisher_spanish import prepare_fisher_spanish
from .fleurs import download_fleurs, prepare_fleurs
from .gale_arabic import prepare_gale_arabic
from .gale_mandarin import prepare_gale_mandarin
from .gigaspeech import prepare_gigaspeech
Expand Down Expand Up @@ -66,6 +67,7 @@
from .musan import download_musan, prepare_musan
from .nsc import prepare_nsc
from .peoples_speech import prepare_peoples_speech
from .radio import prepare_radio
from .reazonspeech import download_reazonspeech, prepare_reazonspeech
from .rir_noise import download_rir_noise, prepare_rir_noise
from .sbcsae import download_sbcsae, prepare_sbcsae
Expand Down Expand Up @@ -145,6 +147,8 @@
"prepare_eval2000",
"prepare_fisher_english",
"prepare_fisher_spanish",
"download_fleurs",
"prepare_fleurs",
"prepare_gale_arabic",
"prepare_gale_mandarin",
"prepare_gigaspeech",
Expand Down Expand Up @@ -194,6 +198,7 @@
"prepare_peoples_speech",
"download_reazonspeech",
"prepare_reazonspeech",
"prepare_radio",
"download_rir_noise",
"prepare_rir_noise",
"prepare_slu",
Expand Down
Loading

0 comments on commit ae81d19

Please sign in to comment.