restarting this project

privacyrespected · Jan 28, 2023 · d2c8fc1 · d2c8fc1
1 parent 573ddcc
commit d2c8fc1
Show file tree

Hide file tree

Showing 11 changed files with 289 additions and 2 deletions.
diff --git a/corpus/__pycache__/extrawords.cpython-37.pyc b/corpus/__pycache__/extrawords.cpython-37.pyc
diff --git a/experiments/speechrecog.ipynb b/experiments/speechrecog.ipynb
@@ -139,6 +139,195 @@
    "source": [
     "Task: Explore potential in implementation"
    ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Part 2: Enhance accuracy and speed\n",
+    "Before 2023, the speech recognition module is built based on the google API. Its capabilities are limited by internet speed and microphone accuracy. \n",
+    "\n",
+    "Below, we will be attempting to use Vosk to understand the possibilities of offline speech recognition and its related accuracy."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Traditionally, the code will include something like this:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import speech_recognition as sr\n",
+    "import pyaudio #for mic access\n",
+    "def listen():\n",
+    "    r = sr.Recognizer()\n",
+    "    with sr.Microphone() as source:\n",
+    "        print(\"Listening>>>\")\n",
+    "        r.pause_threshold = 1\n",
+    "        r.adjust_for_ambient_noise(source)\n",
+    "        audio = r.listen(source)\n",
+    "\n",
+    "    try:\n",
+    "        print(\"Recognizing: \")\n",
+    "        query = r.recognize_google(audio, language='en-in')\n",
+    "        print(f\"User:  {query}\\n\")\n",
+    "    except Exception as e:\n",
+    "        print(e)\n",
+    "        print(\"Audio not heard, plesae try again\")\n",
+    "        return \"None\"\n",
+    "    if query is None:\n",
+    "        print(\"audio not heard at thres 2\")\n",
+    "    else:\n",
+    "        return query"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here below we will keep trying the implementation of vosk:\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "usage: ipykernel_launcher.py [-h] [-l] [-f FILENAME] [-d DEVICE]\n",
+      "                             [-r SAMPLERATE] [-m MODEL]\n",
+      "ipykernel_launcher.py: error: unrecognized arguments: --ip=127.0.0.1 --stdin=9003 --control=9001 --hb=9000 --Session.signature_scheme=\"hmac-sha256\" --Session.key=b\"e6b71672-5503-452a-aa99-f0136f0665bd\" --shell=9002 --transport=\"tcp\" --iopub=9004\n"
+     ]
+    },
+    {
+     "ename": "SystemExit",
+     "evalue": "2",
+     "output_type": "error",
+     "traceback": [
+      "An exception has occurred, use %tb to see the full traceback.\n",
+      "\u001b[1;31mSystemExit\u001b[0m\u001b[1;31m:\u001b[0m 2\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\Gabriel\\AppData\\Local\\Programs\\Python\\Python37\\lib\\site-packages\\IPython\\core\\interactiveshell.py:3561: UserWarning: To exit: use 'exit', 'quit', or Ctrl-D.\n",
+      "  warn(\"To exit: use 'exit', 'quit', or Ctrl-D.\", stacklevel=1)\n"
+     ]
+    }
+   ],
+   "source": [
+    "#!/usr/bin/env python3\n",
+    "\n",
+    "# prerequisites: as described in https://alphacephei.com/vosk/install and also python module `sounddevice` (simply run command `pip install sounddevice`)\n",
+    "# Example usage using Dutch (nl) recognition model: `python test_microphone.py -m nl`\n",
+    "# For more help run: `python test_microphone.py -h`\n",
+    "\n",
+    "import argparse\n",
+    "import queue\n",
+    "import sys\n",
+    "import sounddevice as sd\n",
+    "\n",
+    "from vosk import Model, KaldiRecognizer\n",
+    "\n",
+    "q = queue.Queue()\n",
+    "\n",
+    "def int_or_str(text):\n",
+    "    \"\"\"Helper function for argument parsing.\"\"\"\n",
+    "    try:\n",
+    "        return int(text)\n",
+    "    except ValueError:\n",
+    "        return text\n",
+    "\n",
+    "def callback(indata, frames, time, status):\n",
+    "    \"\"\"This is called (from a separate thread) for each audio block.\"\"\"\n",
+    "    if status:\n",
+    "        print(status, file=sys.stderr)\n",
+    "    q.put(bytes(indata))\n",
+    "\n",
+    "parser = argparse.ArgumentParser(add_help=False)\n",
+    "parser.add_argument(\n",
+    "    \"-l\", \"--list-devices\", action=\"store_true\",\n",
+    "    help=\"show list of audio devices and exit\")\n",
+    "args, remaining = parser.parse_known_args()\n",
+    "if args.list_devices:\n",
+    "    print(sd.query_devices())\n",
+    "    parser.exit(0)\n",
+    "parser = argparse.ArgumentParser(\n",
+    "    description=__doc__,\n",
+    "    formatter_class=argparse.RawDescriptionHelpFormatter,\n",
+    "    parents=[parser])\n",
+    "parser.add_argument(\n",
+    "    \"-f\", \"--filename\", type=str, metavar=\"FILENAME\",\n",
+    "    help=\"audio file to store recording to\")\n",
+    "parser.add_argument(\n",
+    "    \"-d\", \"--device\", type=int_or_str,\n",
+    "    help=\"input device (numeric ID or substring)\")\n",
+    "parser.add_argument(\n",
+    "    \"-r\", \"--samplerate\", type=int, help=\"sampling rate\")\n",
+    "parser.add_argument(\n",
+    "    \"-m\", \"--model\", type=str, help=\"language model; e.g. en-us, fr, nl; default is en-us\")\n",
+    "args = parser.parse_args(remaining)\n",
+    "\n",
+    "try:\n",
+    "    if args.samplerate is None:\n",
+    "        device_info = sd.query_devices(args.device, \"input\")\n",
+    "        # soundfile expects an int, sounddevice provides a float:\n",
+    "        args.samplerate = int(device_info[\"default_samplerate\"])\n",
+    "        \n",
+    "    if args.model is None:\n",
+    "        model = Model(lang=\"en-us\")\n",
+    "    else:\n",
+    "        model = Model(lang=args.model)\n",
+    "\n",
+    "    if args.filename:\n",
+    "        dump_fn = open(args.filename, \"wb\")\n",
+    "    else:\n",
+    "        dump_fn = None\n",
+    "\n",
+    "    with sd.RawInputStream(samplerate=args.samplerate, blocksize = 8000, device=args.device,\n",
+    "            dtype=\"int16\", channels=1, callback=callback):\n",
+    "        print(\"#\" * 80)\n",
+    "        print(\"Press Ctrl+C to stop the recording\")\n",
+    "        print(\"#\" * 80)\n",
+    "\n",
+    "        rec = KaldiRecognizer(model, args.samplerate)\n",
+    "        while True:\n",
+    "            data = q.get()\n",
+    "            if rec.AcceptWaveform(data):\n",
+    "                print(rec.Result())\n",
+    "            else:\n",
+    "                print(rec.PartialResult())\n",
+    "            if dump_fn is not None:\n",
+    "                dump_fn.write(data)\n",
+    "\n",
+    "except KeyboardInterrupt:\n",
+    "    print(\"\\nDone\")\n",
+    "    parser.exit(0)\n",
+    "except Exception as e:\n",
+    "    parser.exit(type(e).__name__ + \": \" + str(e))"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "It works."
+   ]
   }
  ],
  "metadata": {

diff --git a/experiments/test.py b/experiments/test.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python3
+
+# prerequisites: as described in https://alphacephei.com/vosk/install and also python module `sounddevice` (simply run command `pip install sounddevice`)
+# Example usage using Dutch (nl) recognition model: `python test_microphone.py -m nl`
+# For more help run: `python test_microphone.py -h`
+
+import argparse
+import queue
+import sys
+import sounddevice as sd
+
+from vosk import Model, KaldiRecognizer
+
+q = queue.Queue()
+
+def int_or_str(text):
+    """Helper function for argument parsing."""
+    try:
+        return int(text)
+    except ValueError:
+        return text
+
+def callback(indata, frames, time, status):
+    """This is called (from a separate thread) for each audio block."""
+    if status:
+        print(status, file=sys.stderr)
+    q.put(bytes(indata))
+
+parser = argparse.ArgumentParser(add_help=False)
+parser.add_argument(
+    "-l", "--list-devices", action="store_true",
+    help="show list of audio devices and exit")
+args, remaining = parser.parse_known_args()
+if args.list_devices:
+    print(sd.query_devices())
+    parser.exit(0)
+parser = argparse.ArgumentParser(
+    description=__doc__,
+    formatter_class=argparse.RawDescriptionHelpFormatter,
+    parents=[parser])
+parser.add_argument(
+    "-f", "--filename", type=str, metavar="FILENAME",
+    help="audio file to store recording to")
+parser.add_argument(
+    "-d", "--device", type=int_or_str,
+    help="input device (numeric ID or substring)")
+parser.add_argument(
+    "-r", "--samplerate", type=int, help="sampling rate")
+parser.add_argument(
+    "-m", "--model", type=str, help="language model; e.g. en-us, fr, nl; default is en-us")
+args = parser.parse_args(remaining)
+
+try:
+    if args.samplerate is None:
+        device_info = sd.query_devices(args.device, "input")
+        # soundfile expects an int, sounddevice provides a float:
+        args.samplerate = int(device_info["default_samplerate"])
+
+    if args.model is None:
+        model = Model(lang="en-us")
+    else:
+        model = Model(lang=args.model)
+
+    if args.filename:
+        dump_fn = open(args.filename, "wb")
+    else:
+        dump_fn = None
+
+    with sd.RawInputStream(samplerate=args.samplerate, blocksize = 8000, device=args.device,
+            dtype="int16", channels=1, callback=callback):
+        print("#" * 80)
+        print("Press Ctrl+C to stop the recording")
+        print("#" * 80)
+
+        rec = KaldiRecognizer(model, args.samplerate)
+        while True:
+            data = q.get()
+            if rec.AcceptWaveform(data):
+                print(rec.Result())
+            else:
+                print(rec.PartialResult())
+            if dump_fn is not None:
+                dump_fn.write(data)
+
+except KeyboardInterrupt:
+    print("\nDone")
+    parser.exit(0)
+except Exception as e:
+    parser.exit(type(e).__name__ + ": " + str(e))
diff --git a/modules/__pycache__/bootloader.cpython-37.pyc b/modules/__pycache__/bootloader.cpython-37.pyc
diff --git a/modules/__pycache__/display.cpython-37.pyc b/modules/__pycache__/display.cpython-37.pyc
diff --git a/modules/__pycache__/sense.cpython-37.pyc b/modules/__pycache__/sense.cpython-37.pyc
diff --git a/modules/bootloader.py b/modules/bootloader.py
@@ -1,4 +1,6 @@
 #DO NOT RUN THIS MODULE INDIVIDUALLY
+#This is the main startup function
+#It only runs when the program is started
 import json
 from os import path
 from modules.sense import speak

diff --git a/modules/display.py b/modules/display.py
@@ -1,4 +1,5 @@
-
+#this function is to display words on the terminal
+#cannot be observed on front end
 import sys
 import time
 import pyfiglet

diff --git a/modules/mainsystem.py b/modules/mainsystem.py
@@ -1,3 +1,4 @@
+#this file is code for any functions related to the windows operating system
 from pyautogui import screenshot
 import psutil
 from sense import speak

diff --git a/modules/search.py b/modules/search.py
@@ -1,3 +1,4 @@
+#this searches any definitions or other kinds of factual information on the internet
 from cv2 import mean
 import wikipedia
 from sense import speak

diff --git a/modules/sense.py b/modules/sense.py
@@ -1,3 +1,6 @@
+#speak
+#listen
+#notify
 import time
 import pyttsx3
 import speech_recognition as sr
@@ -9,7 +12,8 @@ def speak(audio):
     engine.say(audio)
     engine.runAndWait()
 
-#listen
+#listen 
+#reimplement anther system
 def listen():
     r = sr.Recognizer()
     with sr.Microphone() as source: