diff --git a/corpus/__pycache__/extrawords.cpython-37.pyc b/corpus/__pycache__/extrawords.cpython-37.pyc new file mode 100644 index 0000000..bf5b48b Binary files /dev/null and b/corpus/__pycache__/extrawords.cpython-37.pyc differ diff --git a/experiments/speechrecog.ipynb b/experiments/speechrecog.ipynb index 20e4c01..d687dbf 100644 --- a/experiments/speechrecog.ipynb +++ b/experiments/speechrecog.ipynb @@ -139,6 +139,195 @@ "source": [ "Task: Explore potential in implementation" ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Part 2: Enhance accuracy and speed\n", + "Before 2023, the speech recognition module is built based on the google API. Its capabilities are limited by internet speed and microphone accuracy. \n", + "\n", + "Below, we will be attempting to use Vosk to understand the possibilities of offline speech recognition and its related accuracy." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Traditionally, the code will include something like this:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import speech_recognition as sr\n", + "import pyaudio #for mic access\n", + "def listen():\n", + " r = sr.Recognizer()\n", + " with sr.Microphone() as source:\n", + " print(\"Listening>>>\")\n", + " r.pause_threshold = 1\n", + " r.adjust_for_ambient_noise(source)\n", + " audio = r.listen(source)\n", + "\n", + " try:\n", + " print(\"Recognizing: \")\n", + " query = r.recognize_google(audio, language='en-in')\n", + " print(f\"User: {query}\\n\")\n", + " except Exception as e:\n", + " print(e)\n", + " print(\"Audio not heard, plesae try again\")\n", + " return \"None\"\n", + " if query is None:\n", + " print(\"audio not heard at thres 2\")\n", + " else:\n", + " return query" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here below we will keep trying the implementation of vosk:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "usage: ipykernel_launcher.py [-h] [-l] [-f FILENAME] [-d DEVICE]\n", + " [-r SAMPLERATE] [-m MODEL]\n", + "ipykernel_launcher.py: error: unrecognized arguments: --ip=127.0.0.1 --stdin=9003 --control=9001 --hb=9000 --Session.signature_scheme=\"hmac-sha256\" --Session.key=b\"e6b71672-5503-452a-aa99-f0136f0665bd\" --shell=9002 --transport=\"tcp\" --iopub=9004\n" + ] + }, + { + "ename": "SystemExit", + "evalue": "2", + "output_type": "error", + "traceback": [ + "An exception has occurred, use %tb to see the full traceback.\n", + "\u001b[1;31mSystemExit\u001b[0m\u001b[1;31m:\u001b[0m 2\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\Gabriel\\AppData\\Local\\Programs\\Python\\Python37\\lib\\site-packages\\IPython\\core\\interactiveshell.py:3561: UserWarning: To exit: use 'exit', 'quit', or Ctrl-D.\n", + " warn(\"To exit: use 'exit', 'quit', or Ctrl-D.\", stacklevel=1)\n" + ] + } + ], + "source": [ + "#!/usr/bin/env python3\n", + "\n", + "# prerequisites: as described in https://alphacephei.com/vosk/install and also python module `sounddevice` (simply run command `pip install sounddevice`)\n", + "# Example usage using Dutch (nl) recognition model: `python test_microphone.py -m nl`\n", + "# For more help run: `python test_microphone.py -h`\n", + "\n", + "import argparse\n", + "import queue\n", + "import sys\n", + "import sounddevice as sd\n", + "\n", + "from vosk import Model, KaldiRecognizer\n", + "\n", + "q = queue.Queue()\n", + "\n", + "def int_or_str(text):\n", + " \"\"\"Helper function for argument parsing.\"\"\"\n", + " try:\n", + " return int(text)\n", + " except ValueError:\n", + " return text\n", + "\n", + "def callback(indata, frames, time, status):\n", + " \"\"\"This is called (from a separate thread) for each audio block.\"\"\"\n", + " if status:\n", + " print(status, file=sys.stderr)\n", + " q.put(bytes(indata))\n", + "\n", + "parser = argparse.ArgumentParser(add_help=False)\n", + "parser.add_argument(\n", + " \"-l\", \"--list-devices\", action=\"store_true\",\n", + " help=\"show list of audio devices and exit\")\n", + "args, remaining = parser.parse_known_args()\n", + "if args.list_devices:\n", + " print(sd.query_devices())\n", + " parser.exit(0)\n", + "parser = argparse.ArgumentParser(\n", + " description=__doc__,\n", + " formatter_class=argparse.RawDescriptionHelpFormatter,\n", + " parents=[parser])\n", + "parser.add_argument(\n", + " \"-f\", \"--filename\", type=str, metavar=\"FILENAME\",\n", + " help=\"audio file to store recording to\")\n", + "parser.add_argument(\n", + " \"-d\", \"--device\", type=int_or_str,\n", + " help=\"input device (numeric ID or substring)\")\n", + "parser.add_argument(\n", + " \"-r\", \"--samplerate\", type=int, help=\"sampling rate\")\n", + "parser.add_argument(\n", + " \"-m\", \"--model\", type=str, help=\"language model; e.g. en-us, fr, nl; default is en-us\")\n", + "args = parser.parse_args(remaining)\n", + "\n", + "try:\n", + " if args.samplerate is None:\n", + " device_info = sd.query_devices(args.device, \"input\")\n", + " # soundfile expects an int, sounddevice provides a float:\n", + " args.samplerate = int(device_info[\"default_samplerate\"])\n", + " \n", + " if args.model is None:\n", + " model = Model(lang=\"en-us\")\n", + " else:\n", + " model = Model(lang=args.model)\n", + "\n", + " if args.filename:\n", + " dump_fn = open(args.filename, \"wb\")\n", + " else:\n", + " dump_fn = None\n", + "\n", + " with sd.RawInputStream(samplerate=args.samplerate, blocksize = 8000, device=args.device,\n", + " dtype=\"int16\", channels=1, callback=callback):\n", + " print(\"#\" * 80)\n", + " print(\"Press Ctrl+C to stop the recording\")\n", + " print(\"#\" * 80)\n", + "\n", + " rec = KaldiRecognizer(model, args.samplerate)\n", + " while True:\n", + " data = q.get()\n", + " if rec.AcceptWaveform(data):\n", + " print(rec.Result())\n", + " else:\n", + " print(rec.PartialResult())\n", + " if dump_fn is not None:\n", + " dump_fn.write(data)\n", + "\n", + "except KeyboardInterrupt:\n", + " print(\"\\nDone\")\n", + " parser.exit(0)\n", + "except Exception as e:\n", + " parser.exit(type(e).__name__ + \": \" + str(e))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It works." + ] } ], "metadata": { diff --git a/experiments/test.py b/experiments/test.py new file mode 100644 index 0000000..951864d --- /dev/null +++ b/experiments/test.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python3 + +# prerequisites: as described in https://alphacephei.com/vosk/install and also python module `sounddevice` (simply run command `pip install sounddevice`) +# Example usage using Dutch (nl) recognition model: `python test_microphone.py -m nl` +# For more help run: `python test_microphone.py -h` + +import argparse +import queue +import sys +import sounddevice as sd + +from vosk import Model, KaldiRecognizer + +q = queue.Queue() + +def int_or_str(text): + """Helper function for argument parsing.""" + try: + return int(text) + except ValueError: + return text + +def callback(indata, frames, time, status): + """This is called (from a separate thread) for each audio block.""" + if status: + print(status, file=sys.stderr) + q.put(bytes(indata)) + +parser = argparse.ArgumentParser(add_help=False) +parser.add_argument( + "-l", "--list-devices", action="store_true", + help="show list of audio devices and exit") +args, remaining = parser.parse_known_args() +if args.list_devices: + print(sd.query_devices()) + parser.exit(0) +parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + parents=[parser]) +parser.add_argument( + "-f", "--filename", type=str, metavar="FILENAME", + help="audio file to store recording to") +parser.add_argument( + "-d", "--device", type=int_or_str, + help="input device (numeric ID or substring)") +parser.add_argument( + "-r", "--samplerate", type=int, help="sampling rate") +parser.add_argument( + "-m", "--model", type=str, help="language model; e.g. en-us, fr, nl; default is en-us") +args = parser.parse_args(remaining) + +try: + if args.samplerate is None: + device_info = sd.query_devices(args.device, "input") + # soundfile expects an int, sounddevice provides a float: + args.samplerate = int(device_info["default_samplerate"]) + + if args.model is None: + model = Model(lang="en-us") + else: + model = Model(lang=args.model) + + if args.filename: + dump_fn = open(args.filename, "wb") + else: + dump_fn = None + + with sd.RawInputStream(samplerate=args.samplerate, blocksize = 8000, device=args.device, + dtype="int16", channels=1, callback=callback): + print("#" * 80) + print("Press Ctrl+C to stop the recording") + print("#" * 80) + + rec = KaldiRecognizer(model, args.samplerate) + while True: + data = q.get() + if rec.AcceptWaveform(data): + print(rec.Result()) + else: + print(rec.PartialResult()) + if dump_fn is not None: + dump_fn.write(data) + +except KeyboardInterrupt: + print("\nDone") + parser.exit(0) +except Exception as e: + parser.exit(type(e).__name__ + ": " + str(e)) \ No newline at end of file diff --git a/modules/__pycache__/bootloader.cpython-37.pyc b/modules/__pycache__/bootloader.cpython-37.pyc index 4ae237d..ee8c29d 100644 Binary files a/modules/__pycache__/bootloader.cpython-37.pyc and b/modules/__pycache__/bootloader.cpython-37.pyc differ diff --git a/modules/__pycache__/display.cpython-37.pyc b/modules/__pycache__/display.cpython-37.pyc index 3b7de65..2e32943 100644 Binary files a/modules/__pycache__/display.cpython-37.pyc and b/modules/__pycache__/display.cpython-37.pyc differ diff --git a/modules/__pycache__/sense.cpython-37.pyc b/modules/__pycache__/sense.cpython-37.pyc index 4e77905..46ce7fb 100644 Binary files a/modules/__pycache__/sense.cpython-37.pyc and b/modules/__pycache__/sense.cpython-37.pyc differ diff --git a/modules/bootloader.py b/modules/bootloader.py index 88133f1..9919e89 100644 --- a/modules/bootloader.py +++ b/modules/bootloader.py @@ -1,4 +1,6 @@ #DO NOT RUN THIS MODULE INDIVIDUALLY +#This is the main startup function +#It only runs when the program is started import json from os import path from modules.sense import speak diff --git a/modules/display.py b/modules/display.py index aa403de..211f3b5 100644 --- a/modules/display.py +++ b/modules/display.py @@ -1,4 +1,5 @@ - +#this function is to display words on the terminal +#cannot be observed on front end import sys import time import pyfiglet diff --git a/modules/mainsystem.py b/modules/mainsystem.py index 04dc547..5fe13b0 100644 --- a/modules/mainsystem.py +++ b/modules/mainsystem.py @@ -1,3 +1,4 @@ +#this file is code for any functions related to the windows operating system from pyautogui import screenshot import psutil from sense import speak diff --git a/modules/search.py b/modules/search.py index 08c8f27..06aa955 100644 --- a/modules/search.py +++ b/modules/search.py @@ -1,3 +1,4 @@ +#this searches any definitions or other kinds of factual information on the internet from cv2 import mean import wikipedia from sense import speak diff --git a/modules/sense.py b/modules/sense.py index 814d7b8..46b95c4 100644 --- a/modules/sense.py +++ b/modules/sense.py @@ -1,3 +1,6 @@ +#speak +#listen +#notify import time import pyttsx3 import speech_recognition as sr @@ -9,7 +12,8 @@ def speak(audio): engine.say(audio) engine.runAndWait() -#listen +#listen +#reimplement anther system def listen(): r = sr.Recognizer() with sr.Microphone() as source: