-
Notifications
You must be signed in to change notification settings - Fork 111
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Doc: Add more paraformer models. (#553)
- Loading branch information
1 parent
0cf5db8
commit f67a245
Showing
12 changed files
with
817 additions
and
40 deletions.
There are no files selected for viewing
24 changes: 24 additions & 0 deletions
24
...d_models/offline-paraformer/code-paraformer/sherpa-onnx-paraformer-en-2024-03-09-int8.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
/project/sherpa-onnx/csrc/parse-options.cc:Read:361 sherpa-onnx-offline --tokens=./sherpa-onnx-paraformer-en-2024-03-09/tokens.txt --paraformer=./sherpa-onnx-paraformer-en-2024-03-09/model.int8.onnx ./sherpa-onnx-paraformer-en-2024-03-09/test_wavs/0.wav ./sherpa-onnx-paraformer-en-2024-03-09/test_wavs/1.wav ./sherpa-onnx-paraformer-en-2024-03-09/test_wavs/8k.wav | ||
|
||
OfflineRecognizerConfig(feat_config=OfflineFeatureExtractorConfig(sampling_rate=16000, feature_dim=80), model_config=OfflineModelConfig(transducer=OfflineTransducerModelConfig(encoder_filename="", decoder_filename="", joiner_filename=""), paraformer=OfflineParaformerModelConfig(model="./sherpa-onnx-paraformer-en-2024-03-09/model.int8.onnx"), nemo_ctc=OfflineNemoEncDecCtcModelConfig(model=""), whisper=OfflineWhisperModelConfig(encoder="", decoder="", language="", task="transcribe", tail_paddings=-1), tdnn=OfflineTdnnModelConfig(model=""), zipformer_ctc=OfflineZipformerCtcModelConfig(model=""), wenet_ctc=OfflineWenetCtcModelConfig(model=""), tokens="./sherpa-onnx-paraformer-en-2024-03-09/tokens.txt", num_threads=2, debug=False, provider="cpu", model_type=""), lm_config=OfflineLMConfig(model="", scale=0.5), ctc_fst_decoder_config=OfflineCtcFstDecoderConfig(graph="", max_active=3000), decoding_method="greedy_search", max_active_paths=4, hotwords_file="", hotwords_score=1.5, blank_penalty=0) | ||
Creating recognizer ... | ||
Started | ||
/project/sherpa-onnx/csrc/offline-stream.cc:AcceptWaveformImpl:119 Creating a resampler: | ||
in_sample_rate: 8000 | ||
output_sample_rate: 16000 | ||
|
||
Done! | ||
|
||
./sherpa-onnx-paraformer-en-2024-03-09/test_wavs/0.wav | ||
{"text": " after early nightfall the yellow lamps would light up here and there the squalid quarter of the brothels", "timestamps": [], "tokens":["after", "early", "ni@@", "ght@@", "fall", "the", "yel@@", "low", "la@@", "mp@@", "s", "would", "light", "up", "here", "and", "there", "the", "squ@@", "al@@", "id", "quarter", "of", "the", "bro@@", "the@@", "ls"]} | ||
---- | ||
./sherpa-onnx-paraformer-en-2024-03-09/test_wavs/1.wav | ||
{"text": " god as a direct consequence of the sin which man thus punished had given her a lovely child whose place was 'on' that same dishonoured bosom to connect her parent for ever with the race and descent of mortals and to be finally a blessed soul in heaven", "timestamps": [], "tokens":["god", "as", "a", "direct", "con@@", "sequence", "of", "the", "sin", "which", "man", "thus", "p@@", "uni@@", "shed", "had", "given", "her", "a", "lo@@", "vely", "child", "whose", "place", "was", "'on'", "that", "same", "di@@", "sh@@", "on@@", "ou@@", "red", "bo@@", "so@@", "m", "to", "connect", "her", "paren@@", "t", "for", "ever", "with", "the", "race", "and", "des@@", "cent", "of", "mor@@", "tal@@", "s", "and", "to", "be", "finally", "a", "bl@@", "essed", "soul", "in", "hea@@", "ven"]} | ||
---- | ||
./sherpa-onnx-paraformer-en-2024-03-09/test_wavs/8k.wav | ||
{"text": " yet these thoughts affected hester prynne less with hope than apprehension", "timestamps": [], "tokens":["yet", "these", "thoughts", "aff@@", "ected", "he@@", "ster", "pr@@", "y@@", "n@@", "ne", "less", "with", "hope", "than", "ap@@", "pre@@", "hen@@", "sion"]} | ||
---- | ||
num threads: 2 | ||
decoding method: greedy_search | ||
Elapsed seconds: 5.492 s | ||
Real time factor (RTF): 5.492 / 28.165 = 0.195 |
24 changes: 24 additions & 0 deletions
24
...rained_models/offline-paraformer/code-paraformer/sherpa-onnx-paraformer-en-2024-03-09.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
/project/sherpa-onnx/csrc/parse-options.cc:Read:361 sherpa-onnx-offline --tokens=./sherpa-onnx-paraformer-en-2024-03-09/tokens.txt --paraformer=./sherpa-onnx-paraformer-en-2024-03-09/model.onnx ./sherpa-onnx-paraformer-en-2024-03-09/test_wavs/0.wav ./sherpa-onnx-paraformer-en-2024-03-09/test_wavs/1.wav ./sherpa-onnx-paraformer-en-2024-03-09/test_wavs/8k.wav | ||
|
||
OfflineRecognizerConfig(feat_config=OfflineFeatureExtractorConfig(sampling_rate=16000, feature_dim=80), model_config=OfflineModelConfig(transducer=OfflineTransducerModelConfig(encoder_filename="", decoder_filename="", joiner_filename=""), paraformer=OfflineParaformerModelConfig(model="./sherpa-onnx-paraformer-en-2024-03-09/model.onnx"), nemo_ctc=OfflineNemoEncDecCtcModelConfig(model=""), whisper=OfflineWhisperModelConfig(encoder="", decoder="", language="", task="transcribe", tail_paddings=-1), tdnn=OfflineTdnnModelConfig(model=""), zipformer_ctc=OfflineZipformerCtcModelConfig(model=""), wenet_ctc=OfflineWenetCtcModelConfig(model=""), tokens="./sherpa-onnx-paraformer-en-2024-03-09/tokens.txt", num_threads=2, debug=False, provider="cpu", model_type=""), lm_config=OfflineLMConfig(model="", scale=0.5), ctc_fst_decoder_config=OfflineCtcFstDecoderConfig(graph="", max_active=3000), decoding_method="greedy_search", max_active_paths=4, hotwords_file="", hotwords_score=1.5, blank_penalty=0) | ||
Creating recognizer ... | ||
Started | ||
/project/sherpa-onnx/csrc/offline-stream.cc:AcceptWaveformImpl:119 Creating a resampler: | ||
in_sample_rate: 8000 | ||
output_sample_rate: 16000 | ||
|
||
Done! | ||
|
||
./sherpa-onnx-paraformer-en-2024-03-09/test_wavs/0.wav | ||
{"text": " after early nightfall the yellow lamps would light up here and there the squalid quarter of the brothels", "timestamps": [], "tokens":["after", "early", "ni@@", "ght@@", "fall", "the", "yel@@", "low", "la@@", "mp@@", "s", "would", "light", "up", "here", "and", "there", "the", "squ@@", "al@@", "id", "quarter", "of", "the", "bro@@", "the@@", "ls"]} | ||
---- | ||
./sherpa-onnx-paraformer-en-2024-03-09/test_wavs/1.wav | ||
{"text": " god as a direct consequence of the sin which man thus punished had given her a lovely child whose place was 'on' that same dishonoured bosom to connect her parent for ever with the race and descent of mortals and to be finally a blessed soul in heaven", "timestamps": [], "tokens":["god", "as", "a", "direct", "con@@", "sequence", "of", "the", "sin", "which", "man", "thus", "p@@", "uni@@", "shed", "had", "given", "her", "a", "lo@@", "vely", "child", "whose", "place", "was", "'on'", "that", "same", "di@@", "sh@@", "on@@", "ou@@", "red", "bo@@", "so@@", "m", "to", "connect", "her", "paren@@", "t", "for", "ever", "with", "the", "race", "and", "des@@", "cent", "of", "mor@@", "tal@@", "s", "and", "to", "be", "finally", "a", "bl@@", "essed", "soul", "in", "hea@@", "ven"]} | ||
---- | ||
./sherpa-onnx-paraformer-en-2024-03-09/test_wavs/8k.wav | ||
{"text": " yet these thoughts affected hester prynne less with hope than apprehension", "timestamps": [], "tokens":["yet", "these", "thoughts", "aff@@", "ected", "he@@", "ster", "pr@@", "y@@", "n@@", "ne", "less", "with", "hope", "than", "ap@@", "pre@@", "hen@@", "sion"]} | ||
---- | ||
num threads: 2 | ||
decoding method: greedy_search | ||
Elapsed seconds: 7.173 s | ||
Real time factor (RTF): 7.173 / 28.165 = 0.255 |
35 changes: 35 additions & 0 deletions
35
...ine-paraformer/code-paraformer/sherpa-onnx-paraformer-trilingual-zh-cantonese-en-int8.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
/project/sherpa-onnx/csrc/parse-options.cc:Read:361 sherpa-onnx-offline --tokens=./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/tokens.txt --paraformer=./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/model.int8.onnx ./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/test_wavs/1.wav ./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/test_wavs/2.wav ./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/test_wavs/3-sichuan.wav ./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/test_wavs/4-tianjin.wav ./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/test_wavs/5-henan.wav ./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/test_wavs/6-zh-en.wav | ||
|
||
OfflineRecognizerConfig(feat_config=OfflineFeatureExtractorConfig(sampling_rate=16000, feature_dim=80), model_config=OfflineModelConfig(transducer=OfflineTransducerModelConfig(encoder_filename="", decoder_filename="", joiner_filename=""), paraformer=OfflineParaformerModelConfig(model="./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/model.int8.onnx"), nemo_ctc=OfflineNemoEncDecCtcModelConfig(model=""), whisper=OfflineWhisperModelConfig(encoder="", decoder="", language="", task="transcribe", tail_paddings=-1), tdnn=OfflineTdnnModelConfig(model=""), zipformer_ctc=OfflineZipformerCtcModelConfig(model=""), wenet_ctc=OfflineWenetCtcModelConfig(model=""), tokens="./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/tokens.txt", num_threads=2, debug=False, provider="cpu", model_type=""), lm_config=OfflineLMConfig(model="", scale=0.5), ctc_fst_decoder_config=OfflineCtcFstDecoderConfig(graph="", max_active=3000), decoding_method="greedy_search", max_active_paths=4, hotwords_file="", hotwords_score=1.5, blank_penalty=0) | ||
Creating recognizer ... | ||
Started | ||
/project/sherpa-onnx/csrc/offline-paraformer-greedy-search-decoder.cc:Decode:65 time stamp for batch: 0, 13 vs -1 | ||
/project/sherpa-onnx/csrc/offline-paraformer-greedy-search-decoder.cc:Decode:65 time stamp for batch: 1, 15 vs -1 | ||
/project/sherpa-onnx/csrc/offline-paraformer-greedy-search-decoder.cc:Decode:65 time stamp for batch: 2, 40 vs -1 | ||
/project/sherpa-onnx/csrc/offline-paraformer-greedy-search-decoder.cc:Decode:65 time stamp for batch: 3, 41 vs -1 | ||
/project/sherpa-onnx/csrc/offline-paraformer-greedy-search-decoder.cc:Decode:65 time stamp for batch: 4, 37 vs -1 | ||
/project/sherpa-onnx/csrc/offline-paraformer-greedy-search-decoder.cc:Decode:65 time stamp for batch: 5, 16 vs -1 | ||
Done! | ||
|
||
./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/test_wavs/1.wav | ||
{"text": "有无人知道湾仔活道系点去㗎", "timestamps": [], "tokens":["有", "无", "人", "知", "道", "湾", "仔", "活", "道", "系", "点", "去", "㗎"]} | ||
---- | ||
./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/test_wavs/2.wav | ||
{"text": "我喺黄大仙九龙塘联合道荡失路啊", "timestamps": [], "tokens":["我", "喺", "黄", "大", "仙", "九", "龙", "塘", "联", "合", "道", "荡", "失", "路", "啊"]} | ||
---- | ||
./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/test_wavs/3-sichuan.wav | ||
{"text": "自己就是在那个在那个就是在情节里面就是感觉是演得特别好就是好像很真实一样你知道吧", "timestamps": [], "tokens":["自", "己", "就", "是", "在", "那", "个", "在", "那", "个", "就", "是", "在", "情", "节", "里", "面", "就", "是", "感", "觉", "是", "演", "得", "特", "别", "好", "就", "是", "好", "像", "很", "真", "实", "一", "样", "你", "知", "道", "吧"]} | ||
---- | ||
./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/test_wavs/4-tianjin.wav | ||
{"text": "其实他就是怕每个人都可以守法就这意思法律意识太单薄了而且就是嗯也不顾及到别人的感受", "timestamps": [], "tokens":["其", "实", "他", "就", "是", "怕", "每", "个", "人", "都", "可", "以", "守", "法", "就", "这", "意", "思", "法", "律", "意", "识", "太", "单", "薄", "了", "而", "且", "就", "是", "嗯", "也", "不", "顾", "及", "到", "别", "人", "的", "感", "受"]} | ||
---- | ||
./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/test_wavs/5-henan.wav | ||
{"text": "它这个管一下都通到有时候都通到七八层楼高然后它这管一下就可以浇到那那柱子上", "timestamps": [], "tokens":["它", "这", "个", "管", "一", "下", "都", "通", "到", "有", "时", "候", "都", "通", "到", "七", "八", "层", "楼", "高", "然", "后", "它", "这", "管", "一", "下", "就", "可", "以", "浇", "到", "那", "那", "柱", "子", "上"]} | ||
---- | ||
./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/test_wavs/6-zh-en.wav | ||
{"text": " yesterday was 星期一 today is tuesday 明天是星期三", "timestamps": [], "tokens":["yesterday", "was", "星", "期", "一", "today", "is", "tu@@", "es@@", "day", "明", "天", "是", "星", "期", "三"]} | ||
---- | ||
num threads: 2 | ||
decoding method: greedy_search | ||
Elapsed seconds: 6.290 s | ||
Real time factor (RTF): 6.290 / 42.054 = 0.150 |
35 changes: 35 additions & 0 deletions
35
.../offline-paraformer/code-paraformer/sherpa-onnx-paraformer-trilingual-zh-cantonese-en.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
/project/sherpa-onnx/csrc/parse-options.cc:Read:361 sherpa-onnx-offline --tokens=./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/tokens.txt --paraformer=./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/model.onnx ./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/test_wavs/1.wav ./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/test_wavs/2.wav ./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/test_wavs/3-sichuan.wav ./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/test_wavs/4-tianjin.wav ./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/test_wavs/5-henan.wav ./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/test_wavs/6-zh-en.wav | ||
|
||
OfflineRecognizerConfig(feat_config=OfflineFeatureExtractorConfig(sampling_rate=16000, feature_dim=80), model_config=OfflineModelConfig(transducer=OfflineTransducerModelConfig(encoder_filename="", decoder_filename="", joiner_filename=""), paraformer=OfflineParaformerModelConfig(model="./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/model.onnx"), nemo_ctc=OfflineNemoEncDecCtcModelConfig(model=""), whisper=OfflineWhisperModelConfig(encoder="", decoder="", language="", task="transcribe", tail_paddings=-1), tdnn=OfflineTdnnModelConfig(model=""), zipformer_ctc=OfflineZipformerCtcModelConfig(model=""), wenet_ctc=OfflineWenetCtcModelConfig(model=""), tokens="./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/tokens.txt", num_threads=2, debug=False, provider="cpu", model_type=""), lm_config=OfflineLMConfig(model="", scale=0.5), ctc_fst_decoder_config=OfflineCtcFstDecoderConfig(graph="", max_active=3000), decoding_method="greedy_search", max_active_paths=4, hotwords_file="", hotwords_score=1.5, blank_penalty=0) | ||
Creating recognizer ... | ||
Started | ||
/project/sherpa-onnx/csrc/offline-paraformer-greedy-search-decoder.cc:Decode:65 time stamp for batch: 0, 13 vs -1 | ||
/project/sherpa-onnx/csrc/offline-paraformer-greedy-search-decoder.cc:Decode:65 time stamp for batch: 1, 15 vs -1 | ||
/project/sherpa-onnx/csrc/offline-paraformer-greedy-search-decoder.cc:Decode:65 time stamp for batch: 2, 40 vs -1 | ||
/project/sherpa-onnx/csrc/offline-paraformer-greedy-search-decoder.cc:Decode:65 time stamp for batch: 3, 41 vs -1 | ||
/project/sherpa-onnx/csrc/offline-paraformer-greedy-search-decoder.cc:Decode:65 time stamp for batch: 4, 37 vs -1 | ||
/project/sherpa-onnx/csrc/offline-paraformer-greedy-search-decoder.cc:Decode:65 time stamp for batch: 5, 16 vs -1 | ||
Done! | ||
|
||
./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/test_wavs/1.wav | ||
{"text": "有无人知道湾仔活道系点去㗎", "timestamps": [], "tokens":["有", "无", "人", "知", "道", "湾", "仔", "活", "道", "系", "点", "去", "㗎"]} | ||
---- | ||
./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/test_wavs/2.wav | ||
{"text": "我喺黄大仙九龙塘联合道荡失路啊", "timestamps": [], "tokens":["我", "喺", "黄", "大", "仙", "九", "龙", "塘", "联", "合", "道", "荡", "失", "路", "啊"]} | ||
---- | ||
./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/test_wavs/3-sichuan.wav | ||
{"text": "自己就是在那个在那个就是在情节里面就是感觉是演得特别好就是好像很真实一样你知道吧", "timestamps": [], "tokens":["自", "己", "就", "是", "在", "那", "个", "在", "那", "个", "就", "是", "在", "情", "节", "里", "面", "就", "是", "感", "觉", "是", "演", "得", "特", "别", "好", "就", "是", "好", "像", "很", "真", "实", "一", "样", "你", "知", "道", "吧"]} | ||
---- | ||
./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/test_wavs/4-tianjin.wav | ||
{"text": "其实他就是怕每个人都可以守法就这意思法律意识太单薄了而且就是嗯也不顾及到别人的感受", "timestamps": [], "tokens":["其", "实", "他", "就", "是", "怕", "每", "个", "人", "都", "可", "以", "守", "法", "就", "这", "意", "思", "法", "律", "意", "识", "太", "单", "薄", "了", "而", "且", "就", "是", "嗯", "也", "不", "顾", "及", "到", "别", "人", "的", "感", "受"]} | ||
---- | ||
./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/test_wavs/5-henan.wav | ||
{"text": "它这个管一下都通到有时候都通到七八层楼高然后它这管一下就可以浇到那那柱子上", "timestamps": [], "tokens":["它", "这", "个", "管", "一", "下", "都", "通", "到", "有", "时", "候", "都", "通", "到", "七", "八", "层", "楼", "高", "然", "后", "它", "这", "管", "一", "下", "就", "可", "以", "浇", "到", "那", "那", "柱", "子", "上"]} | ||
---- | ||
./sherpa-onnx-paraformer-trilingual-zh-cantonese-en/test_wavs/6-zh-en.wav | ||
{"text": " yesterday was 星期一 today is tuesday 明天是星期三", "timestamps": [], "tokens":["yesterday", "was", "星", "期", "一", "today", "is", "tu@@", "es@@", "day", "明", "天", "是", "星", "期", "三"]} | ||
---- | ||
num threads: 2 | ||
decoding method: greedy_search | ||
Elapsed seconds: 6.871 s | ||
Real time factor (RTF): 6.871 / 42.054 = 0.163 |
Oops, something went wrong.