Skip to content

Commit

Permalink
Update subgen.py
Browse files Browse the repository at this point in the history
  • Loading branch information
McCloudS authored Dec 4, 2024
1 parent 5b14057 commit 2c7f526
Showing 1 changed file with 46 additions and 6 deletions.
52 changes: 46 additions & 6 deletions subgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def convert_to_bool(in_bool):
lrc_for_audio_files = convert_to_bool(os.getenv('LRC_FOR_AUDIO_FILES', True))
custom_regroup = os.getenv('CUSTOM_REGROUP', 'cm_sl=84_sl=42++++++1')
detect_language_length = int(os.getenv('DETECT_LANGUAGE_LENGTH', 30))
detect_language_offset = int(os.getenv('DETECT_LANGUAGE_START_OFFSET', 90))
detect_language_offset = int(os.getenv('DETECT_LANGUAGE_START_OFFSET', 0))
skipifexternalsub = convert_to_bool(os.getenv('SKIPIFEXTERNALSUB', False))
skip_if_to_transcribe_sub_already_exist = convert_to_bool(os.getenv('SKIP_IF_TO_TRANSCRIBE_SUB_ALREADY_EXIST', True))
skipifinternalsublang = LanguageCode.from_string(os.getenv('SKIPIFINTERNALSUBLANG', ''))
Expand Down Expand Up @@ -440,7 +440,7 @@ async def detect_language(
"detected_language": force_detected_language_to.to_name(),
"language_code": force_detected_language_to.to_iso_639_1()
}

global detect_language_length, detect_language_offset
detected_language = LanguageCode.NONE
language_code = 'und'
Expand All @@ -464,8 +464,8 @@ async def detect_language(
task_id = { 'path': f"Bazarr-detect-language-{random_name}" }
task_queue.put(task_id)
args = {}
sample_rate = next(stream.rate for stream in av.open(audio_file.file).streams if stream.type == 'audio')
logging.info(f"Sample rate is: {sample_rate}")
#sample_rate = next(stream.rate for stream in av.open(audio_file.file).streams if stream.type == 'audio')
#logging.info(f"Sample rate is: {sample_rate}")
audio_file.file.seek(0)
args['progress_callback'] = progress

Expand All @@ -474,7 +474,7 @@ async def detect_language(
args['input_sr'] = 16000
else:
#args['audio'] = whisper.pad_or_trim(np.frombuffer(audio_file.file.read(), np.int16).flatten().astype(np.float32) / 32768.0, args['input_sr'] * int(detect_language_length))
args['audio'] = extract_audio_segment_to_memory(audio_file, detect_language_offset, detect_language_length).read()
args['audio'] = await get_audio_chunk(audio_file, detect_lang_offset, detect_lang_length)
args['input_sr'] = 16000

args.update(kwargs)
Expand All @@ -488,12 +488,45 @@ async def detect_language(
logging.info(f"Error processing or transcribing Bazarr {audio_file.filename}: {e}")

finally:
await audio_file.close()
#await audio_file.close()
task_queue.task_done()
delete_model()

return {"detected_language": detected_language.to_name(), "language_code": language_code}

async def get_audio_chunk(audio_file, offset=detect_language_offset, length=detect_language_length, sample_rate=16000, audio_format=np.int16):
"""
Extract a chunk of audio from a file, starting at the given offset and of the given length.
:param audio_file: The audio file (UploadFile or file-like object).
:param offset: The offset in seconds to start the extraction.
:param length: The length in seconds for the chunk to be extracted.
:param sample_rate: The sample rate of the audio (default 16000).
:param audio_format: The audio format to interpret (default int16, 2 bytes per sample).
:return: A numpy array containing the extracted audio chunk.
"""

# Number of bytes per sample (for int16, 2 bytes per sample)
bytes_per_sample = np.dtype(audio_format).itemsize

# Calculate the start byte based on offset and sample rate
start_byte = offset * sample_rate * bytes_per_sample

# Calculate the length in bytes based on the length in seconds
length_in_bytes = length * sample_rate * bytes_per_sample

# Seek to the start position (this assumes the audio_file is a file-like object)
await audio_file.seek(start_byte)

# Read the required chunk of audio (length_in_bytes)
chunk = await audio_file.read(length_in_bytes)

# Convert the chunk into a numpy array (normalized to float32)
audio_data = np.frombuffer(chunk, dtype=audio_format).flatten().astype(np.float32) / 32768.0

return audio_data

def detect_language_task(path):
detected_language = LanguageCode.NONE
language_code = 'und'
Expand Down Expand Up @@ -570,6 +603,13 @@ def extract_audio_segment_to_memory(input_file, start_time, duration):
logging.error(f"Error: {str(e)}")
return None

except ffmpeg.Error as e:
logging.error(f"FFmpeg error: {e.stderr.decode()}")
return None
except Exception as e:
logging.error(f"Error: {str(e)}")
return None

def start_model():
global model
if model is None:
Expand Down

0 comments on commit 2c7f526

Please sign in to comment.