Skip to content

Commit

Permalink
Add zh and code-switch support for gradio
Browse files Browse the repository at this point in the history
  • Loading branch information
SWivid committed Oct 15, 2024
1 parent ce5fc58 commit 21900ba
Showing 1 changed file with 7 additions and 6 deletions.
13 changes: 7 additions & 6 deletions gradio_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,15 +112,15 @@ def chunk_text(text, max_chars=135):
chunks = []
current_chunk = ""
# Split the text into sentences based on punctuation followed by whitespace
sentences = re.split(r'(?<=[;:,.!?])\s+', text)
sentences = re.split(r'(?<=[;:,.!?])\s+|(?<=[;:,。!?])', text)

for sentence in sentences:
if len(current_chunk) + len(sentence) <= max_chars:
current_chunk += sentence + " "
if len(current_chunk.encode('utf-8')) + len(sentence.encode('utf-8')) <= max_chars:
current_chunk += sentence + " " if sentence and len(sentence[-1].encode('utf-8')) == 1 else sentence
else:
if current_chunk:
chunks.append(current_chunk.strip())
current_chunk = sentence + " "
current_chunk = sentence + " " if sentence and len(sentence[-1].encode('utf-8')) == 1 else sentence

if current_chunk:
chunks.append(current_chunk.strip())
Expand Down Expand Up @@ -258,7 +258,7 @@ def infer(ref_audio_orig, ref_text, gen_text, exp_name, remove_silence, cross_fa
aseg = AudioSegment.from_file(ref_audio_orig)

non_silent_segs = silence.split_on_silence(
aseg, min_silence_len=1000, silence_thresh=-50, keep_silence=500
aseg, min_silence_len=1000, silence_thresh=-50, keep_silence=1000
)
non_silent_wave = AudioSegment.silent(duration=0)
for non_silent_seg in non_silent_segs:
Expand Down Expand Up @@ -295,7 +295,8 @@ def infer(ref_audio_orig, ref_text, gen_text, exp_name, remove_silence, cross_fa
audio, sr = torchaudio.load(ref_audio)

# Use the new chunk_text function to split gen_text
gen_text_batches = chunk_text(gen_text, max_chars=135)
max_chars = int(len(ref_text.encode('utf-8')) / (audio.shape[-1] / sr) * (25 - audio.shape[-1] / sr))
gen_text_batches = chunk_text(gen_text, max_chars=max_chars)
print('ref_text', ref_text)
for i, batch_text in enumerate(gen_text_batches):
print(f'gen_text {i}', batch_text)
Expand Down

0 comments on commit 21900ba

Please sign in to comment.