Skip to content

Commit

Permalink
playht streaming fix by detecting mp3 idtag and removing it
Browse files Browse the repository at this point in the history
  • Loading branch information
skirdey committed Dec 3, 2023
1 parent 6f94a7b commit 72fc379
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 3 deletions.
2 changes: 1 addition & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

67 changes: 65 additions & 2 deletions vocode/streaming/synthesizer/miniaudio_worker.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from __future__ import annotations

import io
import queue

from typing import Optional, Tuple, Union
Expand All @@ -11,6 +13,51 @@
from vocode.streaming.utils.worker import ThreadAsyncWorker, logger


class ID3TagProcessor:
def __init__(self):
self.buffer = bytearray()
self.id3_tag_size = 0
self.id3_tag_processed = False

def process_chunk(self, chunk):
print(chunk)
self.buffer += chunk

if not self.id3_tag_processed:
if self.buffer.startswith(b"ID3"):
if len(self.buffer) >= 10:
self.id3_tag_size = self.calculate_id3_size(self.buffer[:10])
if len(self.buffer) >= self.id3_tag_size:
# Skip the ID3 tag
self.buffer = self.buffer[self.id3_tag_size :]
self.id3_tag_processed = True
else:
self.id3_tag_processed = True

if self.id3_tag_processed:
# Return the audio data and clear the buffer
audio_data = self.buffer
self.buffer = bytearray()
return audio_data

return bytearray() # Return an empty bytearray if still processing the tag

def calculate_id3_size(self, header):
# Extract the four bytes that represent the size
size_bytes = header[6:10]
print(size_bytes)

# Calculate the size (each byte is only 7 bits)
tag_size = 0
for byte in size_bytes:
tag_size = (tag_size << 7) | (byte & 0x7F)

print(tag_size)

# The size does not include the 10-byte header
return tag_size + 10


class MiniaudioWorker(ThreadAsyncWorker[Union[bytes, None]]):
def __init__(
self,
Expand All @@ -27,6 +74,7 @@ def __init__(

def _run_loop(self):
# tracks the mp3 so far
id3_processor = ID3TagProcessor()
current_mp3_buffer = bytearray()
# tracks the wav so far
current_wav_buffer = bytearray()
Expand All @@ -47,15 +95,30 @@ def _run_loop(self):
current_wav_output_buffer.clear()
continue
try:
current_mp3_buffer.extend(mp3_chunk)
output_bytes = decode_mp3(bytes(current_mp3_buffer))
processed_chunk = id3_processor.process_chunk(mp3_chunk)
if processed_chunk:
current_mp3_buffer.extend(processed_chunk)
output_bytes_io = io.BytesIO(bytes(current_mp3_buffer))
# Ensure the stream is at the start
output_bytes_io.seek(0)
# Check if there is data in the stream
if output_bytes_io.getbuffer().nbytes > 0:
output_bytes = decode_mp3(output_bytes_io.read())
# Further processing...
else:
# Handle the case where there is no data
continue
else:
# Handle empty processed_chunk
continue
except miniaudio.DecodeError as e:
# TODO: better logging
logger.exception("MiniaudioWorker error: " + str(e), exc_info=True)
self.output_janus_queue.sync_q.put(
(bytes(current_wav_output_buffer), True)
) # sentinel
continue

converted_output_bytes = convert_wav(
output_bytes,
output_sample_rate=self.synthesizer_config.sampling_rate,
Expand Down

0 comments on commit 72fc379

Please sign in to comment.