Skip to content

Commit

Permalink
add manifests for whisper
Browse files Browse the repository at this point in the history
  • Loading branch information
yuekaizhang committed Feb 22, 2024
1 parent be001a8 commit 910e5db
Show file tree
Hide file tree
Showing 5 changed files with 115 additions and 54 deletions.
20 changes: 14 additions & 6 deletions egs/aishell4/ASR/local/compute_fbank_aishell4.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from pathlib import Path

import torch
from lhotse import ChunkedLilcomHdf5Writer, CutSet, Fbank, FbankConfig
from lhotse import ChunkedLilcomHdf5Writer, CutSet, WhisperFbank, WhisperFbankConfig, Fbank, FbankConfig
from lhotse.recipes.utils import read_manifests_if_cached

from icefall.utils import get_executor, str2bool
Expand All @@ -42,10 +42,10 @@
torch.set_num_interop_threads(1)


def compute_fbank_aishell4(num_mel_bins: int = 80, perturb_speed: bool = False):
def compute_fbank_aishell4(num_mel_bins: int = 80, perturb_speed: bool = False, whisper_fbank: bool = False):
src_dir = Path("data/manifests/aishell4")
output_dir = Path("data/fbank")
num_jobs = min(15, os.cpu_count())
num_jobs = min(8, os.cpu_count())

dataset_parts = (
"train_S",
Expand All @@ -70,7 +70,10 @@ def compute_fbank_aishell4(num_mel_bins: int = 80, perturb_speed: bool = False):
dataset_parts,
)

extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
if whisper_fbank:
extractor = WhisperFbank(WhisperFbankConfig(num_filters=num_mel_bins, device='cuda'))
else:
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))

with get_executor() as ex: # Initialize the executor only once.
for partition, m in manifests.items():
Expand Down Expand Up @@ -121,7 +124,12 @@ def get_args():
default=False,
help="Enable 0.9 and 1.1 speed perturbation for data augmentation. Default: False.",
)

parser.add_argument(
"--whisper-fbank",
type=str2bool,
default=False,
help="Use WhisperFbank instead of Fbank. Default: False.",
)
return parser.parse_args()


Expand All @@ -132,5 +140,5 @@ def get_args():

args = get_args()
compute_fbank_aishell4(
num_mel_bins=args.num_mel_bins, perturb_speed=args.perturb_speed
num_mel_bins=args.num_mel_bins, perturb_speed=args.perturb_speed, whisper_fbank=args.whisper_fbank
)
27 changes: 14 additions & 13 deletions egs/aishell4/ASR/prepare.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python

set -eou pipefail

stage=-1
stop_stage=100
stage=20
stop_stage=20
perturb_speed=true


Expand Down Expand Up @@ -76,14 +76,24 @@ if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
fi

if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
log "Stage 2: Process aishell4"
log "Stage 2: Compute fbank for aishell4"
if [ ! -f data/fbank/aishell4/.fbank.done ]; then
mkdir -p data/fbank/aishell4
./local/compute_fbank_aishell4.py --perturb-speed ${perturb_speed}
touch data/fbank/aishell4/.fbank.done
fi
fi

whisper_mel_bins=80
if [ $stage -le 20 ] && [ $stop_stage -ge 20 ]; then
log "Stage 20: Compute whisper fbank for aishell4"
if [ ! -f data/fbank/aishell4/.fbank.done ]; then
mkdir -p data/fbank/aishell4
./local/compute_fbank_aishell4.py --perturb-speed ${perturb_speed} --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
touch data/fbank/aishell4/.fbank.done
fi
fi

if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then
log "Stage 3: Prepare musan manifest"
# We assume that you have downloaded the musan corpus
Expand All @@ -106,16 +116,7 @@ if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
fi

if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
log "Stage 5: Compute fbank for aishell4"
if [ ! -f data/fbank/.aishell4.done ]; then
mkdir -p data/fbank
./local/compute_fbank_aishell4.py --perturb-speed ${perturb_speed}
touch data/fbank/.aishell4.done
fi
fi

if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
log "Stage 6: Prepare char based lang"
log "Stage 5: Prepare char based lang"
lang_char_dir=data/lang_char
mkdir -p $lang_char_dir

Expand Down
20 changes: 14 additions & 6 deletions egs/alimeeting/ASR/local/compute_fbank_alimeeting.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from pathlib import Path

import torch
from lhotse import CutSet, Fbank, FbankConfig, LilcomChunkyWriter
from lhotse import CutSet, WhisperFbank, WhisperFbankConfig, Fbank, FbankConfig, LilcomChunkyWriter
from lhotse.recipes.utils import read_manifests_if_cached

from icefall.utils import get_executor, str2bool
Expand All @@ -42,10 +42,10 @@
torch.set_num_interop_threads(1)


def compute_fbank_alimeeting(num_mel_bins: int = 80, perturb_speed: bool = False):
def compute_fbank_alimeeting(num_mel_bins: int = 80, perturb_speed: bool = False, whisper_fbank: bool = False):
src_dir = Path("data/manifests/alimeeting")
output_dir = Path("data/fbank")
num_jobs = min(15, os.cpu_count())
num_jobs = min(8, os.cpu_count())

dataset_parts = (
"train",
Expand All @@ -70,7 +70,10 @@ def compute_fbank_alimeeting(num_mel_bins: int = 80, perturb_speed: bool = False
dataset_parts,
)

extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))
if whisper_fbank:
extractor = WhisperFbank(WhisperFbankConfig(num_filters=num_mel_bins, device='cuda'))
else:
extractor = Fbank(FbankConfig(num_mel_bins=num_mel_bins))

with get_executor() as ex: # Initialize the executor only once.
for partition, m in manifests.items():
Expand Down Expand Up @@ -121,7 +124,12 @@ def get_args():
default=False,
help="Enable 0.9 and 1.1 speed perturbation for data augmentation. Default: False.",
)

parser.add_argument(
"--whisper-fbank",
type=str2bool,
default=False,
help="Use the Whisper Fbank feature extractor. Default: False.",
)
return parser.parse_args()


Expand All @@ -132,5 +140,5 @@ def get_args():

args = get_args()
compute_fbank_alimeeting(
num_mel_bins=args.num_mel_bins, perturb_speed=args.perturb_speed
num_mel_bins=args.num_mel_bins, perturb_speed=args.perturb_speed, whisper_fbank=args.whisper_fbank
)
22 changes: 11 additions & 11 deletions egs/alimeeting/ASR/prepare.sh
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,22 @@ if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
fi

if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
log "Stage 2: Process alimeeting"
log "Stage 2: compute fbank for alimeeting"
if [ ! -f data/fbank/alimeeting/.fbank.done ]; then
mkdir -p data/fbank/alimeeting
./local/compute_fbank_alimeeting.py --perturb-speed ${perturb_speed}
fi
fi

whisper_mel_bins=80
if [ $stage -le 20 ] && [ $stop_stage -ge 20 ]; then
log "Stage 20: compute whisper fbank for alimeeting"
if [ ! -f data/fbank/alimeeting/.fbank.done ]; then
mkdir -p data/fbank/alimeeting
./local/compute_fbank_alimeeting.py --perturb-speed ${perturb_speed} --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
fi
fi

if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then
log "Stage 3: Prepare musan manifest"
# We assume that you have downloaded the musan corpus
Expand All @@ -95,16 +104,7 @@ if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
fi

if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
log "Stage 5: Compute fbank for alimeeting"
if [ ! -f data/fbank/.alimeeting.done ]; then
mkdir -p data/fbank
./local/compute_fbank_alimeeting.py --perturb-speed True
touch data/fbank/.alimeeting.done
fi
fi

if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
log "Stage 6: Prepare char based lang"
log "Stage 5: Prepare char based lang"
lang_char_dir=data/lang_char
mkdir -p $lang_char_dir

Expand Down
80 changes: 62 additions & 18 deletions egs/multi_zh-hans/ASR/prepare.sh
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then

if [ ! -f data/fbank/.thchs30.done ]; then
mkdir -p data/fbank
./local/compute_fbank_thchs30.py
./local/compute_fbank_thchs30.py --speed-perturb true
touch data/fbank/.thchs30.done
fi
fi
Expand Down Expand Up @@ -137,7 +137,7 @@ if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then

if [ ! -f data/fbank/.stcmds.done ]; then
mkdir -p data/fbank
./local/compute_fbank_stcmds.py
./local/compute_fbank_stcmds.py --speed-perturb true
touch data/fbank/.stcmds.done
fi
fi
Expand All @@ -151,15 +151,15 @@ if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then
lhotse download primewords $dl_dir/primewords
fi

if [ ! -f data/manifests/.stcmds.done ]; then
if [ ! -f data/manifests/.primewords.done ]; then
mkdir -p data/manifests
lhotse prepare stcmds $dl_dir/primewords data/manifests/primewords
lhotse prepare primewords $dl_dir/primewords data/manifests/primewords
touch data/manifests/.primewords.done
fi

if [ ! -f data/fbank/.primewords.done ]; then
mkdir -p data/fbank
./local/compute_fbank_primewords.py
./local/compute_fbank_primewords.py --speed-perturb true
touch data/fbank/.primewords.done
fi
fi
Expand All @@ -180,7 +180,7 @@ if [ $stage -le 8 ] && [ $stop_stage -ge 8 ]; then

if [ ! -f data/fbank/.magicdata.done ]; then
mkdir -p data/fbank
./local/compute_fbank_magicdata.py
./local/compute_fbank_magicdata.py --speed-perturb true
touch data/fbank/.magicdata.done
fi
fi
Expand Down Expand Up @@ -291,10 +291,10 @@ if [ $stage -le 12 ] && [ $stop_stage -ge 12 ]; then
fi

log "Compute KeSpeech fbank for train_phase1"
./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase1
./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase1

log "Compute KeSpeech fbank for train_phase2"
./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase2
./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase2

log "Compute KeSpeech fbank for test/dev"
./local/compute_fbank_kespeech_dev_test.py
Expand Down Expand Up @@ -344,10 +344,10 @@ if [ $stage -le 120 ] && [ $stop_stage -ge 120 ]; then
fi

log "Compute KeSpeech fbank for train_phase1"
./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase1 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase1 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true

log "Compute KeSpeech fbank for train_phase2"
./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase2 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase2 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true

log "Compute KeSpeech fbank for test/dev"
./local/compute_fbank_kespeech_dev_test.py --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
Expand All @@ -356,19 +356,63 @@ if [ $stage -le 120 ] && [ $stop_stage -ge 120 ]; then
fi
fi

if [ $stage -le 121 ] && [ $stop_stage -ge 121 ]; then
log "Stage 121: tmp"
log "Compute KeSpeech fbank for train_phase1"
./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --stop 1 --training-subset train_phase1 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
if [ $stage -le 122 ] && [ $stop_stage -ge 122 ]; then
log "Stage 122: Prepare speed perturb versionKeSpeech for whisper"
./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase1 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true

log "Compute KeSpeech fbank for train_phase2"
./local/compute_fbank_kespeech_splits.py --num-splits ${num_splits} --training-subset train_phase2 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
./local/compute_fbank_kespeech_splits.py --speed-perturb true --num-splits ${num_splits} --training-subset train_phase2 --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
fi

log "Compute KeSpeech fbank for test/dev"
./local/compute_fbank_kespeech_dev_test.py --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
if [ $stage -le 121 ] && [ $stop_stage -ge 121 ]; then
log "Stage 121: Prepare MagicData, Primewords, ST-CMDS, THCHS-30 for whisper"

touch data/fbank/.kespeech.done
if [ ! -f data/manifests/.magicdata.done ]; then
mkdir -p data/manifests
lhotse prepare magicdata $dl_dir/magicdata data/manifests/magicdata
touch data/manifests/.magicdata.done
fi

if [ ! -f data/manifests/.primewords.done ]; then
mkdir -p data/manifests
lhotse prepare primewords $dl_dir/primewords data/manifests/primewords
touch data/manifests/.primewords.done
fi
if [ ! -f data/manifests/.stcmds.done ]; then
mkdir -p data/manifests
lhotse prepare stcmds $dl_dir/stcmds data/manifests/stcmds
touch data/manifests/.stcmds.done
fi

if [ ! -f data/manifests/.thchs30.done ]; then
mkdir -p data/manifests
lhotse prepare thchs-30 $dl_dir/thchs30 data/manifests/thchs30
touch data/manifests/.thchs30.done
fi

if [ ! -f data/fbank/.thchs30.done ]; then
mkdir -p data/fbank
./local/compute_fbank_thchs30.py --speed-perturb true --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
touch data/fbank/.thchs30.done
fi

if [ ! -f data/fbank/.stcmds.done ]; then
mkdir -p data/fbank
./local/compute_fbank_stcmds.py --speed-perturb true --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
touch data/fbank/.stcmds.done
fi
if [ ! -f data/fbank/.magicdata.done ]; then
mkdir -p data/fbank
./local/compute_fbank_magicdata.py --speed-perturb true --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
touch data/fbank/.magicdata.done
fi

if [ ! -f data/fbank/.primewords.done ]; then
mkdir -p data/fbank
./local/compute_fbank_primewords.py --speed-perturb true --num-mel-bins ${whisper_mel_bins} --whisper-fbank true
touch data/fbank/.primewords.done
fi

fi


Expand Down

0 comments on commit 910e5db

Please sign in to comment.