SevaSk · Nov 9, 2023
Showing with 483 additions and 54 deletions.

+2 −1 AudioRecorder.py

+19 −21 AudioTranscriber.py

+69 −9 GPTResponder.py

+2 −1 TranscriberModels.py

+176 −0 annotater.ipynb

+44 −20 main.py

+153 −0 main_minimum.py

+18 −2 prompts.py
diff --git a/AudioRecorder.py b/AudioRecorder.py
@@ -1,5 +1,6 @@
 import custom_speech_recognition as sr
-import pyaudiowpatch as pyaudio
+# import pyaudiowpatch as pyaudio
+import pyaudio
 from datetime import datetime
 
 RECORD_TIMEOUT = 3

diff --git a/AudioTranscriber.py b/AudioTranscriber.py
@@ -7,18 +7,21 @@
 import custom_speech_recognition as sr
 import io
 from datetime import timedelta
-import pyaudiowpatch as pyaudio
+# import pyaudiowpatch as pyaudio
+import pyaudio
 from heapq import merge
+import time
 
 PHRASE_TIMEOUT = 3.05
 
-MAX_PHRASES = 10
+MAX_PHRASES = 100
 
 class AudioTranscriber:
-    def __init__(self, mic_source, speaker_source, model):
-        self.transcript_data = {"You": [], "Speaker": []}
+    def __init__(self, mic_source, model):
+        self.transcript_data = {"You": []}
         self.transcript_changed_event = threading.Event()
         self.audio_model = model
+        self.audio = pyaudio.PyAudio()
         self.audio_sources = {
             "You": {
                 "sample_rate": mic_source.SAMPLE_RATE,
@@ -28,17 +31,9 @@ def __init__(self, mic_source, speaker_source, model):
                 "last_spoken": None,
                 "new_phrase": True,
                 "process_data_func": self.process_mic_data
-            },
-            "Speaker": {
-                "sample_rate": speaker_source.SAMPLE_RATE,
-                "sample_width": speaker_source.SAMPLE_WIDTH,
-                "channels": speaker_source.channels,
-                "last_sample": bytes(),
-                "last_spoken": None,
-                "new_phrase": True,
-                "process_data_func": self.process_speaker_data
             }
         }
+        self.log_file = open(str(time.time()) + ".log", "a")
 
     def transcribe_audio_queue(self, audio_queue):
         while True:
@@ -58,6 +53,9 @@ def transcribe_audio_queue(self, audio_queue):
                 os.unlink(path)
 
             if text != '' and text.lower() != 'you':
+                #append onto log file
+                self.log_file.write(f"{text}\n")
+                self.log_file.flush()
                 self.update_transcript(who_spoke, text, time_spoken)
                 self.transcript_changed_event.set()
 
@@ -93,23 +91,23 @@ def update_transcript(self, who_spoke, text, time_spoken):
         if source_info["new_phrase"] or len(transcript) == 0:
             if len(transcript) > MAX_PHRASES:
                 transcript.pop(-1)
-            transcript.insert(0, (f"{who_spoke}: [{text}]\n\n", time_spoken))
+            transcript.insert(0, (f"{text}\n\n", time_spoken))
         else:
-            transcript[0] = (f"{who_spoke}: [{text}]\n\n", time_spoken)
+            transcript[0] = (f"{text}\n\n", time_spoken)
 
-    def get_transcript(self):
+    def get_transcript(self, max_phrases=MAX_PHRASES):
         combined_transcript = list(merge(
-            self.transcript_data["You"], self.transcript_data["Speaker"], 
+            self.transcript_data["You"], 
             key=lambda x: x[1], reverse=True))
-        combined_transcript = combined_transcript[:MAX_PHRASES]
+        combined_transcript = combined_transcript[:max_phrases]
         return "".join([t[0] for t in combined_transcript])
 
     def clear_transcript_data(self):
         self.transcript_data["You"].clear()
-        self.transcript_data["Speaker"].clear()
+        # self.transcript_data["Speaker"].clear()
 
         self.audio_sources["You"]["last_sample"] = bytes()
-        self.audio_sources["Speaker"]["last_sample"] = bytes()
+        # self.audio_sources["Speaker"]["last_sample"] = bytes()
 
         self.audio_sources["You"]["new_phrase"] = True
-        self.audio_sources["Speaker"]["new_phrase"] = True
+        # self.audio_sources["Speaker"]["new_phrase"] = True
diff --git a/GPTResponder.py b/GPTResponder.py
@@ -1,30 +1,67 @@
 import openai
 from keys import OPENAI_API_KEY
-from prompts import create_prompt, INITIAL_RESPONSE
+from prompts import create_suggestion, create_summarization
 import time
 
 openai.api_key = OPENAI_API_KEY
 
-def generate_response_from_transcript(transcript):
+# def generate_response_from_transcript(transcript):
+#     try:
+#         response = openai.ChatCompletion.create(
+#                 model="gpt-3.5-turbo",
+#                 messages=[{"role": "system", "content": create_prompt(transcript)}],
+#                 temperature = 0.0
+#         )
+#     except Exception as e:
+#         print(e)
+#         return ''
+#     full_response = response.choices[0].message.content
+#     print(response.choices[0])
+#     try:
+#         return full_response.split('[')[1].split(']')[0]
+#     except:
+#         return ''
+def generate_suggestion_from_transcript(transcript):
     try:
         response = openai.ChatCompletion.create(
-                model="gpt-3.5-turbo-0301",
-                messages=[{"role": "system", "content": create_prompt(transcript)}],
+                model="gpt-3.5-turbo",
+                messages=[{"role": "system", "content": create_suggestion(transcript)}],
                 temperature = 0.0
         )
     except Exception as e:
         print(e)
         return ''
     full_response = response.choices[0].message.content
+    print("suggestion:" + full_response)
     try:
-        return full_response.split('[')[1].split(']')[0]
+        return full_response
+    except:
+        return ''
+
+def generate_summarization_from_transcript(transcript):
+    try:
+        response = openai.ChatCompletion.create(
+                model="gpt-3.5-turbo",
+                messages=[{"role": "system", "content": create_summarization(transcript)}],
+                temperature = 0.0
+        )
+    except Exception as e:
+        print(e)
+        return ''
+    full_response = response.choices[0].message.content
+    print("summarization:" + full_response)
+    try:
+        return full_response
     except:
         return ''
 
 class GPTResponder:
     def __init__(self):
-        self.response = INITIAL_RESPONSE
-        self.response_interval = 2
+        self.responses = []
+        self.response_interval = 30
+        self.summarization_interval = 90
+        self.summarization = ''
+        self.summarizations = []
 
     def respond_to_transcriber(self, transcriber):
         while True:
@@ -33,19 +70,42 @@ def respond_to_transcriber(self, transcriber):
 
                 transcriber.transcript_changed_event.clear() 
                 transcript_string = transcriber.get_transcript()
-                response = generate_response_from_transcript(transcript_string)
+                response = generate_suggestion_from_transcript(transcript_string)
 
                 end_time = time.time()  # Measure end time
                 execution_time = end_time - start_time  # Calculate the time it took to execute the function
 
                 if response != '':
-                    self.response = response
+                    if response not in self.responses:
+                        self.responses.append(response)
 
                 remaining_time = self.response_interval - execution_time
                 if remaining_time > 0:
                     time.sleep(remaining_time)
             else:
                 time.sleep(0.3)
 
+    def summarize_to_transcriber(self, transcriber):
+        while True:
+            if transcriber.transcript_changed_event.is_set():
+                start_time = time.time()
+
+                transcriber.transcript_changed_event.clear() 
+                transcript_string = transcriber.get_transcript()
+                response = generate_summarization_from_transcript(transcript_string)
+
+                end_time = time.time()  # Measure end time
+                execution_time = end_time - start_time  # Calculate the time it took to execute the function
+
+                if response != '':
+                    self.summarization = response
+                    self.summarizations.append(response)
+
+                remaining_time = self.summarization_interval - execution_time
+                if remaining_time > 0:
+                    time.sleep(remaining_time)
+            else:
+                time.sleep(0.3)
+
     def update_response_interval(self, interval):
         self.response_interval = interval
diff --git a/TranscriberModels.py b/TranscriberModels.py
@@ -11,7 +11,8 @@ def get_model(use_api):
 
 class WhisperTranscriber:
     def __init__(self):
-        self.audio_model = whisper.load_model(os.path.join(os.getcwd(), 'tiny.en.pt'))
+        self.audio_model = whisper.load_model(os.path.join(os.getcwd(), 'base.en.pt'))
+        # self.audio_model = whisper.load_model(os.path.join(os.getcwd(), 'large-v2.pt'))
         print(f"[INFO] Whisper using GPU: " + str(torch.cuda.is_available()))
 
     def get_transcription(self, wav_file_path):