playht streaming fix by detecting mp3 idtag and removing it

vocodedev · Dec 3, 2023 · 72fc379 · 72fc379
1 parent 6f94a7b
commit 72fc379
Show file tree

Hide file tree

Showing 2 changed files with 66 additions and 3 deletions.
diff --git a/poetry.lock b/poetry.lock
diff --git a/vocode/streaming/synthesizer/miniaudio_worker.py b/vocode/streaming/synthesizer/miniaudio_worker.py
@@ -1,4 +1,6 @@
 from __future__ import annotations
+
+import io
 import queue
 
 from typing import Optional, Tuple, Union
@@ -11,6 +13,51 @@
 from vocode.streaming.utils.worker import ThreadAsyncWorker, logger
 
 
+class ID3TagProcessor:
+    def __init__(self):
+        self.buffer = bytearray()
+        self.id3_tag_size = 0
+        self.id3_tag_processed = False
+
+    def process_chunk(self, chunk):
+        print(chunk)
+        self.buffer += chunk
+
+        if not self.id3_tag_processed:
+            if self.buffer.startswith(b"ID3"):
+                if len(self.buffer) >= 10:
+                    self.id3_tag_size = self.calculate_id3_size(self.buffer[:10])
+                    if len(self.buffer) >= self.id3_tag_size:
+                        # Skip the ID3 tag
+                        self.buffer = self.buffer[self.id3_tag_size :]
+                        self.id3_tag_processed = True
+            else:
+                self.id3_tag_processed = True
+
+        if self.id3_tag_processed:
+            # Return the audio data and clear the buffer
+            audio_data = self.buffer
+            self.buffer = bytearray()
+            return audio_data
+
+        return bytearray()  # Return an empty bytearray if still processing the tag
+
+    def calculate_id3_size(self, header):
+        # Extract the four bytes that represent the size
+        size_bytes = header[6:10]
+        print(size_bytes)
+
+        # Calculate the size (each byte is only 7 bits)
+        tag_size = 0
+        for byte in size_bytes:
+            tag_size = (tag_size << 7) | (byte & 0x7F)
+
+        print(tag_size)
+
+        # The size does not include the 10-byte header
+        return tag_size + 10
+
+
 class MiniaudioWorker(ThreadAsyncWorker[Union[bytes, None]]):
     def __init__(
         self,
@@ -27,6 +74,7 @@ def __init__(
 
     def _run_loop(self):
         # tracks the mp3 so far
+        id3_processor = ID3TagProcessor()
         current_mp3_buffer = bytearray()
         # tracks the wav so far
         current_wav_buffer = bytearray()
@@ -47,15 +95,30 @@ def _run_loop(self):
                 current_wav_output_buffer.clear()
                 continue
             try:
-                current_mp3_buffer.extend(mp3_chunk)
-                output_bytes = decode_mp3(bytes(current_mp3_buffer))
+                processed_chunk = id3_processor.process_chunk(mp3_chunk)
+                if processed_chunk:
+                    current_mp3_buffer.extend(processed_chunk)
+                    output_bytes_io = io.BytesIO(bytes(current_mp3_buffer))
+                    # Ensure the stream is at the start
+                    output_bytes_io.seek(0)
+                    # Check if there is data in the stream
+                    if output_bytes_io.getbuffer().nbytes > 0:
+                        output_bytes = decode_mp3(output_bytes_io.read())
+                        # Further processing...
+                    else:
+                        # Handle the case where there is no data
+                        continue
+                else:
+                    # Handle empty processed_chunk
+                    continue
             except miniaudio.DecodeError as e:
                 # TODO: better logging
                 logger.exception("MiniaudioWorker error: " + str(e), exc_info=True)
                 self.output_janus_queue.sync_q.put(
                     (bytes(current_wav_output_buffer), True)
                 )  # sentinel
                 continue
+
             converted_output_bytes = convert_wav(
                 output_bytes,
                 output_sample_rate=self.synthesizer_config.sampling_rate,