-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain2.py
127 lines (99 loc) · 3.6 KB
/
main2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
from fastapi import FastAPI, WebSocket, WebSocketDisconnect
from fastapi.middleware.cors import CORSMiddleware
import websockets
import asyncio
import json
import os
from dotenv import load_dotenv
from pydub import AudioSegment
import io
import base64
from rich import print
load_dotenv()
app = FastAPI()
# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Adjust this in production
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OPENAI_WEBSOCKET_URL = "wss://api.openai.com/v1/realtime?model=gpt-4o-realtime-preview-2024-10-01"
def process_audio(base64_audio):
# Decode base64 to raw audio data
raw_audio = base64.b64decode(base64_audio)
# Convert to AudioSegment
audio = AudioSegment.from_mp3(io.BytesIO(raw_audio))
# Resample to 24kHz
audio = audio.set_frame_rate(24000)
# Convert to mono
audio = audio.set_channels(1)
# Convert to 16-bit PCM
audio = audio.set_sample_width(2)
# Get raw PCM data
raw_pcm = audio.raw_data
# Encode back to base64
return base64.b64encode(raw_pcm).decode()
async def openai_websocket_proxy(websocket: WebSocket):
try:
async with websockets.connect(
OPENAI_WEBSOCKET_URL,
extra_headers={
"Authorization": f"Bearer {OPENAI_API_KEY}",
"OpenAI-Beta": "realtime=v1",
}
) as openai_ws:
client_to_openai = asyncio.create_task(forward_messages(websocket, openai_ws))
openai_to_client = asyncio.create_task(forward_messages(openai_ws, websocket))
await asyncio.wait(
[client_to_openai, openai_to_client],
return_when=asyncio.FIRST_COMPLETED
)
for task in [client_to_openai, openai_to_client]:
if not task.done():
task.cancel()
try:
await task
except asyncio.CancelledError:
pass
except Exception as e:
print(f"Error in openai_websocket_proxy: {str(e)}")
finally:
print("WebSocket connection closed")
async def forward_messages(source, destination):
try:
while True:
if isinstance(source, WebSocket):
message = await source.receive_text()
else:
message = await source.recv()
data = json.loads(message)
print(f"[green]Received message: {data}[/green]")
if data.get("type") == "input_audio_buffer.append":
audio_data = data["audio"]
processed_audio = process_audio(audio_data)
data["data"] = processed_audio
message = json.dumps(data)
print(f"[red]Forwarding message: {message}[/red]")
if isinstance(destination, WebSocket):
await destination.send_text(message)
else:
await destination.send(message)
except WebSocketDisconnect:
print("WebSocket disconnected")
except Exception as e:
print(f"Error in forward_messages: {str(e)}")
import traceback
traceback.print_exc()
@app.websocket("/ws/openai")
async def websocket_endpoint(websocket: WebSocket):
await websocket.accept()
try:
await openai_websocket_proxy(websocket)
except WebSocketDisconnect:
print("Client disconnected")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)