From a5df43c9632b6271cb0678d257a4205609ab0fae Mon Sep 17 00:00:00 2001 From: Ben Foley Date: Wed, 21 Sep 2022 16:36:05 +1000 Subject: [PATCH] Ben transcribe audioname (#311) Harry already approved it, I just closed without merging doh. --- docs/wiki/elpis-workshop.md | 3 +-- elpis/engines/common/utilities/resampling.py | 2 +- elpis/engines/hft/objects/transcription.py | 12 ++++++++---- elpis/test/test_pipeline.py | 2 +- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/docs/wiki/elpis-workshop.md b/docs/wiki/elpis-workshop.md index 88fd31d2..9cf69cb6 100644 --- a/docs/wiki/elpis-workshop.md +++ b/docs/wiki/elpis-workshop.md @@ -286,8 +286,7 @@ The transcription text may be shown in a range of tones from black to light grey ![](assets/latest/145-transcribed-confidence.png) -Listen in Elan. -> If you are using your own audio, rename the audio to `audio.wav`. +Listen in Elan. You will need to move the audio file into the same location as the Elan file for Elan to link to it. ![](assets/latest/150-elan.png) diff --git a/elpis/engines/common/utilities/resampling.py b/elpis/engines/common/utilities/resampling.py index 60380ecc..702f2761 100644 --- a/elpis/engines/common/utilities/resampling.py +++ b/elpis/engines/common/utilities/resampling.py @@ -8,7 +8,7 @@ from werkzeug.datastructures import FileStorage -ORIGINAL_SOUND_FILE_DIRECTORY = Path("/tmp/origial_sound_files/") +ORIGINAL_SOUND_FILE_DIRECTORY = Path("/tmp/original_sound_files/") def load_audio(file: Path, target_sample_rate: int = None) -> Tuple[np.ndarray, int]: diff --git a/elpis/engines/hft/objects/transcription.py b/elpis/engines/hft/objects/transcription.py index 6d5a5bf1..2d5f5851 100644 --- a/elpis/engines/hft/objects/transcription.py +++ b/elpis/engines/hft/objects/transcription.py @@ -38,7 +38,8 @@ class HFTTranscription(BaseTranscription): def __init__(self, **kwargs) -> None: super().__init__(**kwargs) # Setup paths - self.audio_file_path = self.path.joinpath("audio.wav") + self.audio_filename = None + self.audio_file_path = None self.test_labels_path = self.path / "test-labels-path.txt" self.text_path = self.path / "one-best-hypothesis.txt" self.xml_path = self.path / "transcription.xml" @@ -195,7 +196,7 @@ def _save_utterances(self, utterances) -> None: """ result = pympi.Elan.Eaf(author="elpis") - result.add_linked_file("audio.wav") + result.add_linked_file(self.audio_filename) result.add_tier("default") to_millis = lambda seconds: int(seconds * 1000) @@ -205,8 +206,11 @@ def _save_utterances(self, utterances) -> None: pympi.Elan.to_eaf(self.elan_path, result) - def prepare_audio(self, audio: Path, on_complete: callable = None): - logger.info(f"=== Prepare audio {audio} {self.audio_file_path}") + def prepare_audio(self, audio: FileStorage, on_complete: callable = None): + logger.info(f"=== Prepare audio for transcription {audio}") + self.audio_filename = audio.filename + self.audio_file_path = self.path.joinpath(self.audio_filename) + resampler.resample_from_file_storage(audio, self.audio_file_path, HFTModel.SAMPLING_RATE) if on_complete is not None: on_complete() diff --git a/elpis/test/test_pipeline.py b/elpis/test/test_pipeline.py index cf562aea..b0d3ed41 100644 --- a/elpis/test/test_pipeline.py +++ b/elpis/test/test_pipeline.py @@ -92,6 +92,6 @@ def pipeline_upto_step_4(pipeline_upto_step_3): # Make a transcription interface and transcribe unseen audio to elan. t = kaldi.new_transcription("transcription_w") t.link(m) - t.transcribe_algin("/recordings/untranscribed/audio.wav") + t.transcribe_align("/recordings/untranscribed/audio.wav") return (kaldi, ds, pd, m, t)