-
Notifications
You must be signed in to change notification settings - Fork 0
/
barney_test_voice.py
201 lines (179 loc) · 6.92 KB
/
barney_test_voice.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
import discord
import asyncio
import speech_recognition as sr
from threading import Thread
# bot token and wit.ai api key
TOKEN = 'whats a token'
WIT_AI_KEY = "what"
discord.opus.load_opus('C:/Users/andre/documents/Barney Bot/libopus-0.x64.dll')
# we need a sink for the listen function, so we just define our own
# extremely simple: just appends data to a byte array buffer
class BufSink(discord.reader.AudioSink):
def __init__(self):
# byte array to store stuff
self.bytearr_buf = bytearray()
# sample width, which is (bit_rate/8) * channels
self.sample_width = 2
# 48000Hz sampling rate
# doubled, because speech_recognition needs mono and we've got stereo
self.sample_rate = 96000
# calculated bytes per second, sample_rate * sample_width
# we need this to know what slices we can take from the buffer
# would be 96000, but mono
self.bytes_ps = 192000
# just append data to the byte array
def write(self, data):
self.bytearr_buf += data.data
# to prevent the buffer from getting immense, we just cut the part we've
# just read from it, using the index calculated when we extracted the part
def freshen(self, idx):
self.bytearr_buf = self.bytearr_buf[idx:]
# global var - needed to stop the thread
close_flag = False
# client bot class
class Deffy(discord.Client):
# init variables
def __init__(self):
super().__init__()
# save the channel we need to post to
self.target_channel = None
# the thread object
self.post_thread = None
# buffer to hold info
self.buffer = BufSink()
# post some sanity messages on start-up
async def on_ready(self):
print()
print("Logged in as")
print(self.user.name)
print(self.user.id)
print("----------")
print("discord.py version")
print(discord.__version__)
print("----------")
print()
# wait for a message to interact with the user
async def on_message(self, message):
# notify the thread we're closing
global close_flag
# don't respond to ourselves
if message.author == self.user:
return False
# handle closing
if message.content.lower().startswith("$close"):
# send a message to ack the command
await message.channel.send("Got it, shutting down...")
# the polite thing to do is close any active voice connections properly
if self.voice_clients:
for vc in self.voice_clients:
await vc.disconnect()
# set the flag and wait for the thread to end
close_flag = True
self.post_thread.join()
# shut down the bot, then quit the program
await self.close()
quit()
# handle disconnecting
if message.content.lower().startswith("$leave"):
# close any active voice connections. in theory, there's only one, but
# could be extended for more
if self.voice_clients:
for vc in self.voice_clients:
await vc.disconnect()
# set the flag and wait for the thread to end
close_flag = True
self.post_thread.join()
else:
await message.channel.send("Sorry, you're not in a voice channel.")
# handle summoning
if message.content.lower().startswith("$here"):
# if the user is not connect to a voice channel, but tries to summon,
# just send a message and exit
if message.author.voice is None:
await message.channel.send("Sorry, you're not in a voice channel.")
else:
# check if we already have an active voice connection, and use that
# one instead of creating another one
if self.voice_clients:
# store the channel we need to post our output to
self.target_channel = message.channel
# ack the command and inform the user
await message.channel.send("Got it, moving to voice channel " +
message.author.voice.channel.name + " and directing output to " +
self.target_channel.name + ".")
# use the existing voice connection to move to the new voice channel
await self.voice_clients[0].move_to(message.author.voice.channel)
# start a thread that will handle voice analysis
# if it doesn't exist already
if self.post_thread is None:
self.post_thread = Thread(target=poster,
args=(self, self.buffer, self.target_channel))
self.post_thread.start()
# start listening - user filter just listens to a certain user
self.voice_clients[0].listen(discord.reader.UserFilter(
self.buffer, message.author))
else:
# if we don't have an already active connection, create a new one
self.target_channel = message.channel
await message.channel.send("Got it, moving to voice channel " +
message.author.voice.channel.name + " and directing output to " +
self.target_channel.name + ".")
# create a new voice client
await message.author.voice.channel.connect()
# start a thread that will handle voice analysis,
# if it doesn't exist already
if self.post_thread is None:
self.post_thread = Thread(target=poster,
args=(self, self.buffer, self.target_channel))
self.post_thread.start()
target_voice_client = self.voice_clients[0]
# start listening - user filter just listens to a certain user
target_voice_client.listen(discord.reader.UserFilter(
self.buffer, message.author))
# thread that handles message posting and voice analysis
def poster(bot, buffer, target_channel):
print('started posting')
global close_flag
# instantiate our recognizer object
recog = sr.Recognizer()
# we don't want the thread to end, so just loop forever
while True:
# useless to try anything if we don't have anything in the buffer
# wait until we have enough data for a 5-second voice clip in the buffer
if len(buffer.bytearr_buf) > 960000:
# get 5 seconds worth of data from the buffer
idx = buffer.bytes_ps * 3
slice = buffer.bytearr_buf[:idx]
# if the slice isn't all 0s, create an AudioData instance with it,
# needed by the speech_recognition lib
if any(slice):
# trim leading zeroes, should be more accurate
idx_strip = slice.index(next(filter(lambda x: x!=0, slice)))
if idx_strip:
buffer.freshen(idx_strip)
slice = buffer.bytearr_buf[:idx]
# create the AudioData object
audio = sr.AudioData(bytes(slice), buffer.sample_rate,
buffer.sample_width)
with open("audio_file.wav", "wb") as new_file:
new_file.write(audio.get_wav_data())
# send the data to get recognized
try:
msg = recog.recognize_wit(audio, WIT_AI_KEY)
except sr.UnknownValueError:
print("ERROR: Couldn't understand.")
#except sr.RequestError as e:
# print("ERROR: Could not request results from Wit.ai service; {0}".format(e))
# if we send a msg with all 0s or something unintelligible,
# we'll get a message, but it'll be empty
if msg:
# send the message to the async routine
asyncio.run_coroutine_threadsafe(target_channel.send(msg), bot.loop)
# cut the part we just read from the buffer
buffer.freshen(idx)
# since it's an infinite loop, we need some way to break out, once the
# program shuts down
if close_flag:
break
client = Deffy()
client.run(TOKEN)