Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: SevaSk/ecoute
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: main
Choose a base ref
...
head repository: nickotto/ecoute
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: main
Choose a head ref
Able to merge. These branches can be automatically merged.
  • 1 commit
  • 8 files changed
  • 1 contributor

Commits on Nov 9, 2023

  1. annotate and suggest

    nickmatsumoto committed Nov 9, 2023
    Copy the full SHA
    88e98d4 View commit details
Showing with 483 additions and 54 deletions.
  1. +2 −1 AudioRecorder.py
  2. +19 −21 AudioTranscriber.py
  3. +69 −9 GPTResponder.py
  4. +2 −1 TranscriberModels.py
  5. +176 −0 annotater.ipynb
  6. +44 −20 main.py
  7. +153 −0 main_minimum.py
  8. +18 −2 prompts.py
3 changes: 2 additions & 1 deletion AudioRecorder.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import custom_speech_recognition as sr
import pyaudiowpatch as pyaudio
# import pyaudiowpatch as pyaudio
import pyaudio
from datetime import datetime

RECORD_TIMEOUT = 3
40 changes: 19 additions & 21 deletions AudioTranscriber.py
Original file line number Diff line number Diff line change
@@ -7,18 +7,21 @@
import custom_speech_recognition as sr
import io
from datetime import timedelta
import pyaudiowpatch as pyaudio
# import pyaudiowpatch as pyaudio
import pyaudio
from heapq import merge
import time

PHRASE_TIMEOUT = 3.05

MAX_PHRASES = 10
MAX_PHRASES = 100

class AudioTranscriber:
def __init__(self, mic_source, speaker_source, model):
self.transcript_data = {"You": [], "Speaker": []}
def __init__(self, mic_source, model):
self.transcript_data = {"You": []}
self.transcript_changed_event = threading.Event()
self.audio_model = model
self.audio = pyaudio.PyAudio()
self.audio_sources = {
"You": {
"sample_rate": mic_source.SAMPLE_RATE,
@@ -28,17 +31,9 @@ def __init__(self, mic_source, speaker_source, model):
"last_spoken": None,
"new_phrase": True,
"process_data_func": self.process_mic_data
},
"Speaker": {
"sample_rate": speaker_source.SAMPLE_RATE,
"sample_width": speaker_source.SAMPLE_WIDTH,
"channels": speaker_source.channels,
"last_sample": bytes(),
"last_spoken": None,
"new_phrase": True,
"process_data_func": self.process_speaker_data
}
}
self.log_file = open(str(time.time()) + ".log", "a")

def transcribe_audio_queue(self, audio_queue):
while True:
@@ -58,6 +53,9 @@ def transcribe_audio_queue(self, audio_queue):
os.unlink(path)

if text != '' and text.lower() != 'you':
#append onto log file
self.log_file.write(f"{text}\n")
self.log_file.flush()
self.update_transcript(who_spoke, text, time_spoken)
self.transcript_changed_event.set()

@@ -93,23 +91,23 @@ def update_transcript(self, who_spoke, text, time_spoken):
if source_info["new_phrase"] or len(transcript) == 0:
if len(transcript) > MAX_PHRASES:
transcript.pop(-1)
transcript.insert(0, (f"{who_spoke}: [{text}]\n\n", time_spoken))
transcript.insert(0, (f"{text}\n\n", time_spoken))
else:
transcript[0] = (f"{who_spoke}: [{text}]\n\n", time_spoken)
transcript[0] = (f"{text}\n\n", time_spoken)

def get_transcript(self):
def get_transcript(self, max_phrases=MAX_PHRASES):
combined_transcript = list(merge(
self.transcript_data["You"], self.transcript_data["Speaker"],
self.transcript_data["You"],
key=lambda x: x[1], reverse=True))
combined_transcript = combined_transcript[:MAX_PHRASES]
combined_transcript = combined_transcript[:max_phrases]
return "".join([t[0] for t in combined_transcript])

def clear_transcript_data(self):
self.transcript_data["You"].clear()
self.transcript_data["Speaker"].clear()
# self.transcript_data["Speaker"].clear()

self.audio_sources["You"]["last_sample"] = bytes()
self.audio_sources["Speaker"]["last_sample"] = bytes()
# self.audio_sources["Speaker"]["last_sample"] = bytes()

self.audio_sources["You"]["new_phrase"] = True
self.audio_sources["Speaker"]["new_phrase"] = True
# self.audio_sources["Speaker"]["new_phrase"] = True
78 changes: 69 additions & 9 deletions GPTResponder.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,67 @@
import openai
from keys import OPENAI_API_KEY
from prompts import create_prompt, INITIAL_RESPONSE
from prompts import create_suggestion, create_summarization
import time

openai.api_key = OPENAI_API_KEY

def generate_response_from_transcript(transcript):
# def generate_response_from_transcript(transcript):
# try:
# response = openai.ChatCompletion.create(
# model="gpt-3.5-turbo",
# messages=[{"role": "system", "content": create_prompt(transcript)}],
# temperature = 0.0
# )
# except Exception as e:
# print(e)
# return ''
# full_response = response.choices[0].message.content
# print(response.choices[0])
# try:
# return full_response.split('[')[1].split(']')[0]
# except:
# return ''
def generate_suggestion_from_transcript(transcript):
try:
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo-0301",
messages=[{"role": "system", "content": create_prompt(transcript)}],
model="gpt-3.5-turbo",
messages=[{"role": "system", "content": create_suggestion(transcript)}],
temperature = 0.0
)
except Exception as e:
print(e)
return ''
full_response = response.choices[0].message.content
print("suggestion:" + full_response)
try:
return full_response.split('[')[1].split(']')[0]
return full_response
except:
return ''

def generate_summarization_from_transcript(transcript):
try:
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[{"role": "system", "content": create_summarization(transcript)}],
temperature = 0.0
)
except Exception as e:
print(e)
return ''
full_response = response.choices[0].message.content
print("summarization:" + full_response)
try:
return full_response
except:
return ''

class GPTResponder:
def __init__(self):
self.response = INITIAL_RESPONSE
self.response_interval = 2
self.responses = []
self.response_interval = 30
self.summarization_interval = 90
self.summarization = ''
self.summarizations = []

def respond_to_transcriber(self, transcriber):
while True:
@@ -33,19 +70,42 @@ def respond_to_transcriber(self, transcriber):

transcriber.transcript_changed_event.clear()
transcript_string = transcriber.get_transcript()
response = generate_response_from_transcript(transcript_string)
response = generate_suggestion_from_transcript(transcript_string)

end_time = time.time() # Measure end time
execution_time = end_time - start_time # Calculate the time it took to execute the function

if response != '':
self.response = response
if response not in self.responses:
self.responses.append(response)

remaining_time = self.response_interval - execution_time
if remaining_time > 0:
time.sleep(remaining_time)
else:
time.sleep(0.3)

def summarize_to_transcriber(self, transcriber):
while True:
if transcriber.transcript_changed_event.is_set():
start_time = time.time()

transcriber.transcript_changed_event.clear()
transcript_string = transcriber.get_transcript()
response = generate_summarization_from_transcript(transcript_string)

end_time = time.time() # Measure end time
execution_time = end_time - start_time # Calculate the time it took to execute the function

if response != '':
self.summarization = response
self.summarizations.append(response)

remaining_time = self.summarization_interval - execution_time
if remaining_time > 0:
time.sleep(remaining_time)
else:
time.sleep(0.3)

def update_response_interval(self, interval):
self.response_interval = interval
3 changes: 2 additions & 1 deletion TranscriberModels.py
Original file line number Diff line number Diff line change
@@ -11,7 +11,8 @@ def get_model(use_api):

class WhisperTranscriber:
def __init__(self):
self.audio_model = whisper.load_model(os.path.join(os.getcwd(), 'tiny.en.pt'))
self.audio_model = whisper.load_model(os.path.join(os.getcwd(), 'base.en.pt'))
# self.audio_model = whisper.load_model(os.path.join(os.getcwd(), 'large-v2.pt'))
print(f"[INFO] Whisper using GPU: " + str(torch.cuda.is_available()))

def get_transcription(self, wav_file_path):
Loading