diff --git a/clients/sony_spresense/firmware/README.md b/clients/sony_spresense/firmware/README.md new file mode 100644 index 00000000..46a611c8 --- /dev/null +++ b/clients/sony_spresense/firmware/README.md @@ -0,0 +1,21 @@ +## About Sony Spresense + +The Sony Spresense is a compact, low-power board designed. It has a Sony CXD5602 microcontroller, a 6-core ARM® Cortex®-M4F processor that operates up to 156MHz. + +### Key Features + +- **High-Performance CPU:** Multi-core ARM® Cortex®-M4F processors. +- **Low Power Consumption:** Designed for battery-operated IoT devices, so can support extended operational times. +- **High-Quality Image Sensor:** 5M pixel Sony CMOS image sensor for high-quality image capture. +- **Audio Processing:** Includes support for MP3 audio codecs. +- **LTE-M:** Capable of capturing untethered with the LTE extension board. +- **GNSS Receiver:** Integrated GPS for applications requiring location tracking. + +### Limitations + +- LTE-M throughput is limited. +- Requires soldering a microphone for audio input. + +## Getting Started + +See the [Sony documentation](https://developer.sony.com/spresense/) for instructions on how to use the board with Arduino the IDE. \ No newline at end of file diff --git a/clients/sony_spresense/firmware/spresense/spresense.ino b/clients/sony_spresense/firmware/spresense/spresense.ino new file mode 100644 index 00000000..60221f17 --- /dev/null +++ b/clients/sony_spresense/firmware/spresense/spresense.ino @@ -0,0 +1,105 @@ +#include +#include +#include +#include +#include +#include + +AudioClass *theAudio; +const int mic_channel_num = 1; +#define APP_LTE_APN "iot.truphone.com" +#define APP_LTE_IP_TYPE (LTE_NET_IPTYPE_V4V6) +#define APP_LTE_AUTH_TYPE (LTE_NET_AUTHTYPE_NONE) +#define APP_LTE_RAT (LTE_NET_RAT_CATM) + +// host configuration +char serverAddress[] = ""; +int port = 8001; + +LTE lteAccess; +LTEUDP udp; + +static void audio_attention_cb(const ErrorAttentionParam *atprm) { + puts("Attention!"); + + if (atprm->error_code >= AS_ATTENTION_CODE_WARNING) { + + theAudio->startRecorder(); + } +} + +void setup() { + char apn[LTE_NET_APN_MAXLEN] = APP_LTE_APN; + LTENetworkAuthType authtype = APP_LTE_AUTH_TYPE; + + Serial.begin(115200); + while (!Serial) + ; + + Serial.println("Starting LTE client setup."); + + Serial.println("=========== APN information ==========="); + Serial.print("Access Point Name : "); + Serial.println(apn); + Serial.print("Authentication Type: "); + Serial.println((authtype == LTE_NET_AUTHTYPE_CHAP) ? "CHAP" : (authtype == LTE_NET_AUTHTYPE_NONE) ? "NONE" : "PAP"); + + while (true) { + if (lteAccess.begin() != LTE_SEARCHING) { + Serial.println("Could not transition to LTE_SEARCHING."); + Serial.println("Please check the status of the LTE board."); + for (;;) { + sleep(1); + } + } + + if (lteAccess.attach(APP_LTE_RAT, + apn, + "", + "", + authtype, + APP_LTE_IP_TYPE) + == LTE_READY) { + Serial.println("attach succeeded."); + + break; + } + } + + if (!udp.begin(port)) { + Serial.println("Failed to start UDP"); + while (true) + ; + } + + Serial.println("UDP started."); + + Serial.println("Init Audio Library"); + theAudio = AudioClass::getInstance(); + theAudio->begin(audio_attention_cb); + + Serial.println("Init Audio Recorder"); + theAudio->setRecorderMode(AS_SETRECDR_STS_INPUTDEVICE_MIC, 10, 200 * 1024); + + uint8_t channel = AS_CHANNEL_MONO; + theAudio->initRecorder(AS_CODECTYPE_MP3, "/mnt/sd0/BIN", AS_SAMPLINGRATE_16000, channel); + + theAudio->startRecorder(); + + Serial.println("Rec start!"); +} + +void loop() { + static const size_t bufferSize = 4096; + char buffer[bufferSize]; + uint32_t readSize; + int err = theAudio->readFrames(buffer, bufferSize, &readSize); + + if (readSize > 0) { + if (readSize > 0) { + udp.beginPacket(serverAddress, port); + udp.write(buffer, readSize); + udp.endPacket(); + } + } +} diff --git a/untitledai/config.yaml b/untitledai/config.yaml index 54fc9a0c..49b7b325 100644 --- a/untitledai/config.yaml +++ b/untitledai/config.yaml @@ -45,3 +45,6 @@ conversation_endpointing: notification: apn_team_id: "" +udp: + enabled: false + diff --git a/untitledai/core/config.py b/untitledai/core/config.py index f0f8d51d..3d40afd1 100644 --- a/untitledai/core/config.py +++ b/untitledai/core/config.py @@ -51,6 +51,12 @@ class ConversationEndpointingConfiguration(BaseModel): class NotificationConfiguration(BaseModel): apn_team_id: str | None +class UDPConfiguration(BaseModel): + enabled: bool + host: str | None + port: int | None + + class Configuration(BaseModel): @classmethod @@ -81,4 +87,5 @@ def load_config_yaml(cls, config_file_path: str) -> 'Configuration': user: UserConfiguration database: DatabaseConfiguration conversation_endpointing: ConversationEndpointingConfiguration - notification: NotificationConfiguration \ No newline at end of file + notification: NotificationConfiguration + udp: UDPConfiguration \ No newline at end of file diff --git a/untitledai/server/main.py b/untitledai/server/main.py index ae4c6daf..929efd0b 100644 --- a/untitledai/server/main.py +++ b/untitledai/server/main.py @@ -17,6 +17,7 @@ from .routes.capture import router as capture_router from .routes.conversations import router as conversations_router from .capture_socket import CaptureSocketApp +from .udp_capture_socket import UDPCaptureSocketApp from ..services import LLMService, ConversationService, NotificationService from ..database.database import Database from ..services.stt.asynchronous.async_transcription_service_factory import AsyncTranscriptionServiceFactory @@ -27,6 +28,8 @@ from colorama import init, Fore, Style, Back from fastapi import Depends +from .streaming_capture_handler import StreamingCaptureHandler + logger = logging.getLogger(__name__) # TODO: How to handle logging configuration? @@ -103,6 +106,11 @@ async def startup_event(): # Initialize the database app.state._app_state.database.init_db() asyncio.create_task(process_queue(app.state._app_state)) + if config.udp.enabled: + loop = asyncio.get_running_loop() + await loop.create_datagram_endpoint( + lambda: UDPCaptureSocketApp(app.state._app_state), local_addr=(config.udp.host, config.udp.port) + ) @app.on_event("shutdown") async def shutdown_event(): diff --git a/untitledai/server/udp_capture_socket.py b/untitledai/server/udp_capture_socket.py new file mode 100644 index 00000000..4683a6b6 --- /dev/null +++ b/untitledai/server/udp_capture_socket.py @@ -0,0 +1,49 @@ +# +# udp_capture_socket.py +# +# UDP socket handlers for streaming audio capture. +# +# UDP is necessary for some LTE-M boards, such as the Spresense, where bandwidth is limited. +# + +import asyncio +import uuid +from .app_state import AppState +from .streaming_capture_handler import StreamingCaptureHandler +import logging + +logger = logging.getLogger(__name__) + +class UDPCaptureSocketApp(asyncio.DatagramProtocol): + def __init__(self, app_state: AppState, timeout_seconds=5): + self._app_state = app_state + self._transport = None + self._capture_uuid = None + self._timeout_seconds = timeout_seconds + self._timeout_handle = None + + def connection_made(self, transport: asyncio.DatagramTransport) -> None: + self._transport = transport + # Create a new capture session with id generated by the server since we don't have a UPD protocol for this + self._capture_uuid = uuid.uuid1().hex + self._app_state.capture_handlers[self._capture_uuid] = StreamingCaptureHandler( + self._app_state, "spresense", self._capture_uuid, "mp3" + ) + + def datagram_received(self, data: bytes, addr): + if self._timeout_handle: + self._timeout_handle.cancel() + self._timeout_handle = asyncio.get_running_loop().call_later( + self._timeout_seconds, self.connection_timed_out) + + asyncio.create_task(self.send_info_to_client(data)) + + async def send_info_to_client(self, data: bytes): + await self._app_state.capture_handlers[self._capture_uuid].handle_audio_data(data) + + def connection_timed_out(self): + if self._capture_uuid not in self._app_state.capture_handlers: + logger.error(f"Capture session not found: {self._capture_uuid}") + return + capture_handler = self._app_state.capture_handlers[self._capture_uuid] + capture_handler.finish_capture_session()