-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata.py
37 lines (33 loc) · 1.95 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# data.py
from dataclasses import dataclass
from typing import Any, List, Optional, Tuple, Union, Dict
import numpy as np
@dataclass
class Word:
word: str
start: float
end: float
probability: float
@dataclass
class TextSegment:
text: str
start: float
end: float
words: Optional[List[Word]] = None
probability: Optional[float] = None # This is the probability of the word detected at this point in the audio. Not how likley the word is to be correct.
@dataclass
class AudioData:
raw_audio_data: bytes # Current audio chunk from stream
audio_buffer: Optional[bytes] = None # Buffer of n seconds of raw audio data
audio_buffer_time: Optional[float] = None # Time duration of the audio buffer
audio_buffer_start_after: Optional[float] = None # Time duration of the audio buffer start after the start of the audio stream
audio_data: Optional[np.ndarray] = None # Audio data converted to mono waveform
audio_data_sample_rate: Optional[int] = None # Sample rate of the audio data after conversion
vad_result: Optional[List[Dict[str, float | List[Tuple[float, float]]]]] = None # Voice activity detection result
# vad_audio_result: Optional[np.ndarray] = None # Voice activity detection result
language: Optional[Tuple[str, float]] = None # Detected language of the audio data (language code, probability)
transcribed_segments: Optional[List[TextSegment]] = None # Transcribed segments as text with timestamps
# cleaned_words: Optional[List[str]] = None # List of transcribed words. Halicunation removed.
confirmed_words: Optional[List[Word]] = None # List of confirmed words
unconfirmed_words: Optional[List[Word]] = None # List of unconfirmed words
translations: Optional[Dict[str, str]] = None # Translations to target languages