Skip to content

Commit

Permalink
restarting this project
Browse files Browse the repository at this point in the history
  • Loading branch information
tutor-temple committed Jan 28, 2023
1 parent 573ddcc commit d2c8fc1
Show file tree
Hide file tree
Showing 11 changed files with 289 additions and 2 deletions.
Binary file added corpus/__pycache__/extrawords.cpython-37.pyc
Binary file not shown.
189 changes: 189 additions & 0 deletions experiments/speechrecog.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,195 @@
"source": [
"Task: Explore potential in implementation"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Part 2: Enhance accuracy and speed\n",
"Before 2023, the speech recognition module is built based on the google API. Its capabilities are limited by internet speed and microphone accuracy. \n",
"\n",
"Below, we will be attempting to use Vosk to understand the possibilities of offline speech recognition and its related accuracy."
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Traditionally, the code will include something like this:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import speech_recognition as sr\n",
"import pyaudio #for mic access\n",
"def listen():\n",
" r = sr.Recognizer()\n",
" with sr.Microphone() as source:\n",
" print(\"Listening>>>\")\n",
" r.pause_threshold = 1\n",
" r.adjust_for_ambient_noise(source)\n",
" audio = r.listen(source)\n",
"\n",
" try:\n",
" print(\"Recognizing: \")\n",
" query = r.recognize_google(audio, language='en-in')\n",
" print(f\"User: {query}\\n\")\n",
" except Exception as e:\n",
" print(e)\n",
" print(\"Audio not heard, plesae try again\")\n",
" return \"None\"\n",
" if query is None:\n",
" print(\"audio not heard at thres 2\")\n",
" else:\n",
" return query"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Here below we will keep trying the implementation of vosk:\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"usage: ipykernel_launcher.py [-h] [-l] [-f FILENAME] [-d DEVICE]\n",
" [-r SAMPLERATE] [-m MODEL]\n",
"ipykernel_launcher.py: error: unrecognized arguments: --ip=127.0.0.1 --stdin=9003 --control=9001 --hb=9000 --Session.signature_scheme=\"hmac-sha256\" --Session.key=b\"e6b71672-5503-452a-aa99-f0136f0665bd\" --shell=9002 --transport=\"tcp\" --iopub=9004\n"
]
},
{
"ename": "SystemExit",
"evalue": "2",
"output_type": "error",
"traceback": [
"An exception has occurred, use %tb to see the full traceback.\n",
"\u001b[1;31mSystemExit\u001b[0m\u001b[1;31m:\u001b[0m 2\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\Gabriel\\AppData\\Local\\Programs\\Python\\Python37\\lib\\site-packages\\IPython\\core\\interactiveshell.py:3561: UserWarning: To exit: use 'exit', 'quit', or Ctrl-D.\n",
" warn(\"To exit: use 'exit', 'quit', or Ctrl-D.\", stacklevel=1)\n"
]
}
],
"source": [
"#!/usr/bin/env python3\n",
"\n",
"# prerequisites: as described in https://alphacephei.com/vosk/install and also python module `sounddevice` (simply run command `pip install sounddevice`)\n",
"# Example usage using Dutch (nl) recognition model: `python test_microphone.py -m nl`\n",
"# For more help run: `python test_microphone.py -h`\n",
"\n",
"import argparse\n",
"import queue\n",
"import sys\n",
"import sounddevice as sd\n",
"\n",
"from vosk import Model, KaldiRecognizer\n",
"\n",
"q = queue.Queue()\n",
"\n",
"def int_or_str(text):\n",
" \"\"\"Helper function for argument parsing.\"\"\"\n",
" try:\n",
" return int(text)\n",
" except ValueError:\n",
" return text\n",
"\n",
"def callback(indata, frames, time, status):\n",
" \"\"\"This is called (from a separate thread) for each audio block.\"\"\"\n",
" if status:\n",
" print(status, file=sys.stderr)\n",
" q.put(bytes(indata))\n",
"\n",
"parser = argparse.ArgumentParser(add_help=False)\n",
"parser.add_argument(\n",
" \"-l\", \"--list-devices\", action=\"store_true\",\n",
" help=\"show list of audio devices and exit\")\n",
"args, remaining = parser.parse_known_args()\n",
"if args.list_devices:\n",
" print(sd.query_devices())\n",
" parser.exit(0)\n",
"parser = argparse.ArgumentParser(\n",
" description=__doc__,\n",
" formatter_class=argparse.RawDescriptionHelpFormatter,\n",
" parents=[parser])\n",
"parser.add_argument(\n",
" \"-f\", \"--filename\", type=str, metavar=\"FILENAME\",\n",
" help=\"audio file to store recording to\")\n",
"parser.add_argument(\n",
" \"-d\", \"--device\", type=int_or_str,\n",
" help=\"input device (numeric ID or substring)\")\n",
"parser.add_argument(\n",
" \"-r\", \"--samplerate\", type=int, help=\"sampling rate\")\n",
"parser.add_argument(\n",
" \"-m\", \"--model\", type=str, help=\"language model; e.g. en-us, fr, nl; default is en-us\")\n",
"args = parser.parse_args(remaining)\n",
"\n",
"try:\n",
" if args.samplerate is None:\n",
" device_info = sd.query_devices(args.device, \"input\")\n",
" # soundfile expects an int, sounddevice provides a float:\n",
" args.samplerate = int(device_info[\"default_samplerate\"])\n",
" \n",
" if args.model is None:\n",
" model = Model(lang=\"en-us\")\n",
" else:\n",
" model = Model(lang=args.model)\n",
"\n",
" if args.filename:\n",
" dump_fn = open(args.filename, \"wb\")\n",
" else:\n",
" dump_fn = None\n",
"\n",
" with sd.RawInputStream(samplerate=args.samplerate, blocksize = 8000, device=args.device,\n",
" dtype=\"int16\", channels=1, callback=callback):\n",
" print(\"#\" * 80)\n",
" print(\"Press Ctrl+C to stop the recording\")\n",
" print(\"#\" * 80)\n",
"\n",
" rec = KaldiRecognizer(model, args.samplerate)\n",
" while True:\n",
" data = q.get()\n",
" if rec.AcceptWaveform(data):\n",
" print(rec.Result())\n",
" else:\n",
" print(rec.PartialResult())\n",
" if dump_fn is not None:\n",
" dump_fn.write(data)\n",
"\n",
"except KeyboardInterrupt:\n",
" print(\"\\nDone\")\n",
" parser.exit(0)\n",
"except Exception as e:\n",
" parser.exit(type(e).__name__ + \": \" + str(e))"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"It works."
]
}
],
"metadata": {
Expand Down
89 changes: 89 additions & 0 deletions experiments/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#!/usr/bin/env python3

# prerequisites: as described in https://alphacephei.com/vosk/install and also python module `sounddevice` (simply run command `pip install sounddevice`)
# Example usage using Dutch (nl) recognition model: `python test_microphone.py -m nl`
# For more help run: `python test_microphone.py -h`

import argparse
import queue
import sys
import sounddevice as sd

from vosk import Model, KaldiRecognizer

q = queue.Queue()

def int_or_str(text):
"""Helper function for argument parsing."""
try:
return int(text)
except ValueError:
return text

def callback(indata, frames, time, status):
"""This is called (from a separate thread) for each audio block."""
if status:
print(status, file=sys.stderr)
q.put(bytes(indata))

parser = argparse.ArgumentParser(add_help=False)
parser.add_argument(
"-l", "--list-devices", action="store_true",
help="show list of audio devices and exit")
args, remaining = parser.parse_known_args()
if args.list_devices:
print(sd.query_devices())
parser.exit(0)
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter,
parents=[parser])
parser.add_argument(
"-f", "--filename", type=str, metavar="FILENAME",
help="audio file to store recording to")
parser.add_argument(
"-d", "--device", type=int_or_str,
help="input device (numeric ID or substring)")
parser.add_argument(
"-r", "--samplerate", type=int, help="sampling rate")
parser.add_argument(
"-m", "--model", type=str, help="language model; e.g. en-us, fr, nl; default is en-us")
args = parser.parse_args(remaining)

try:
if args.samplerate is None:
device_info = sd.query_devices(args.device, "input")
# soundfile expects an int, sounddevice provides a float:
args.samplerate = int(device_info["default_samplerate"])

if args.model is None:
model = Model(lang="en-us")
else:
model = Model(lang=args.model)

if args.filename:
dump_fn = open(args.filename, "wb")
else:
dump_fn = None

with sd.RawInputStream(samplerate=args.samplerate, blocksize = 8000, device=args.device,
dtype="int16", channels=1, callback=callback):
print("#" * 80)
print("Press Ctrl+C to stop the recording")
print("#" * 80)

rec = KaldiRecognizer(model, args.samplerate)
while True:
data = q.get()
if rec.AcceptWaveform(data):
print(rec.Result())
else:
print(rec.PartialResult())
if dump_fn is not None:
dump_fn.write(data)

except KeyboardInterrupt:
print("\nDone")
parser.exit(0)
except Exception as e:
parser.exit(type(e).__name__ + ": " + str(e))
Binary file modified modules/__pycache__/bootloader.cpython-37.pyc
Binary file not shown.
Binary file modified modules/__pycache__/display.cpython-37.pyc
Binary file not shown.
Binary file modified modules/__pycache__/sense.cpython-37.pyc
Binary file not shown.
2 changes: 2 additions & 0 deletions modules/bootloader.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#DO NOT RUN THIS MODULE INDIVIDUALLY
#This is the main startup function
#It only runs when the program is started
import json
from os import path
from modules.sense import speak
Expand Down
3 changes: 2 additions & 1 deletion modules/display.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@

#this function is to display words on the terminal
#cannot be observed on front end
import sys
import time
import pyfiglet
Expand Down
1 change: 1 addition & 0 deletions modules/mainsystem.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#this file is code for any functions related to the windows operating system
from pyautogui import screenshot
import psutil
from sense import speak
Expand Down
1 change: 1 addition & 0 deletions modules/search.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#this searches any definitions or other kinds of factual information on the internet
from cv2 import mean
import wikipedia
from sense import speak
Expand Down
6 changes: 5 additions & 1 deletion modules/sense.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
#speak
#listen
#notify
import time
import pyttsx3
import speech_recognition as sr
Expand All @@ -9,7 +12,8 @@ def speak(audio):
engine.say(audio)
engine.runAndWait()

#listen
#listen
#reimplement anther system
def listen():
r = sr.Recognizer()
with sr.Microphone() as source:
Expand Down

0 comments on commit d2c8fc1

Please sign in to comment.