-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplugin_fuzzy_sklearn.py
101 lines (78 loc) · 3.76 KB
/
plugin_fuzzy_sklearn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import os
from vacore import VACore
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from pymorphy2 import MorphAnalyzer
modname = os.path.basename(__file__)[:-3] # calculating modname
# The current implementation of the API requires the return of a command key, not a specific command.
# This function looks for the command key in the context and returns it.
def get_command_key_from_context(predicted_command, context):
for keyall in context.keys():
for key in keyall.split("|"):
if key == predicted_command:
return keyall
return None
class Plugin:
def __init__(self):
self.morph = MorphAnalyzer()
self.commands = []
self.commands_vectors = None
# Create a Tf-Idf vectorizer
self.vectorizer = TfidfVectorizer()
def prepare(self, core: VACore):
morph_commands = []
# preprocessing step
for keyall in core.commands.keys():
for key in keyall.split("|"):
morph_key = " ".join(self.morph.parse(word)[0].normal_form for word in key.split())
self.commands.append(key)
morph_commands.append(morph_key)
# Vectorize the commands
self.commands_vectors = self.vectorizer.fit_transform(morph_commands)
def predict(self, core: VACore, command: str, context: dict):
last_step = 0
best_score = 0
best_predicted_command = None
command_by_words = command.split() + [""]
# The first iteration of the loop predicts the full command.
# In the next iteration of the loop, each time the last word is deleted
# and the predicted command is checked to be unchanged.
# This allows to extract the rest of the phrase from the command.
for step in range(len(command_by_words), 1, -1):
# preprocessing step
command_to_predict = " ".join(
self.morph.parse(word)[0].normal_form for word in command_by_words[0:step - 1])
# Vectorize the examples and the string to predict
command_to_predict_vector = self.vectorizer.transform([command_to_predict])
# Calculate the cosine similarity between the string to predict and each example
similarities = cosine_similarity(command_to_predict_vector, self.commands_vectors)
# Get the index of the most similar example
most_similar_index = similarities.argmax()
# Get the probability of the most similar example
most_similar_prob = similarities.max()
predicted_command = self.commands[most_similar_index]
if best_predicted_command is None or predicted_command == best_predicted_command:
if best_predicted_command is None:
best_predicted_command = predicted_command
if best_score < most_similar_prob:
best_score = most_similar_prob
last_step = step
else:
break
# Print the most similar example and its probability
end_of_phrase = " ".join(command_by_words[last_step - 1:len(command_by_words)])
command_key = get_command_key_from_context(best_predicted_command, context)
best_ret = (command_key, best_score, end_of_phrase)
return best_ret
plugin = Plugin()
# функция на старте
def start(core: VACore):
manifest = {
"name": "Fuzzy input processing with sklearn",
"version": "1.0",
"require_online": False,
"fuzzy_processor": {
"sklearn_fuzzy": (plugin.prepare, plugin.predict) # первая функция инициализации, вторая - обработка
}
}
return manifest