Skip to content

Commit

Permalink
Merge pull request #1 from mrkmcknz/QOL/single-file-init
Browse files Browse the repository at this point in the history
[WIP] Single JSON file initialization.
  • Loading branch information
mrkmcknz committed May 12, 2016
2 parents 9eda077 + 2a9d62c commit 760a765
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 22 deletions.
24 changes: 6 additions & 18 deletions isac/engine.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import re
import heapq

from isac.training_engine import Trainer
from isac.entity_engine import Entity
from isac.intent_engine import IntentBuilder
from isac.parser_engine import Parser
Expand All @@ -10,12 +11,14 @@

class Engine(object):

def __init__(self, training, tokenizer=None, trie=None):
def __init__(self, training_data=None, tokenizer=None, trie=None):
self.tokenizer = tokenizer or Tokenizer()
self.trie = trie or Trie()
self.training = training
self.tagger = Entity(self.trie, self.tokenizer, self.training)
self.training_data = training_data
self.tagger = Entity(self.trie, self.tokenizer, self.training_data)
self.intent_parsers = []
if training_data:
Trainer(self).build()

def _max_intent(self, parse_result, utterance):

Expand Down Expand Up @@ -56,21 +59,6 @@ def training_nouns(self, value, entity_type):
if entity_type not in self.nn_training:
self.nn_training.append(entity_type)

def pos_training(self, data):

for tag in data['tags']:

for pos in self.tokenizer.tagger(data['text']):
if pos[0] == tag['value']:
tag['pos'] = pos[1]

return data

def register_training(self, data):

for tag in data.get('tags'):
self.register_entity(tag['value'], tag['key'])

def register_intent_parser(self, intent_parser):

if hasattr(intent_parser, 'validate') and \
Expand Down
10 changes: 6 additions & 4 deletions isac/entity_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@

class Entity(object):

def __init__(self, trie, tokenizer, training, max_tokens=20):
def __init__(self, trie, tokenizer, training_data, max_tokens=20):
self.trie = trie
self.training = training
self.training_data = training_data
self.tokenizer = tokenizer
self.max_tokens = max_tokens

Expand All @@ -28,8 +28,10 @@ def _sort_and_merge_tags(self, tags):
def unknown_entities(self, attr, utterance):

pos_tokens = []
for _t in self.training:
pos_tokens += [t['pos'] for t in _t['tags'] if t['key'] == attr]
for intent in self.training_data:
for _t in intent:
pos_tokens += [t['pos'] for t in _t['tags']
if t['key'] == attr]

# Change to RF classifier or something PLEASE
most_common_pos = most_common(pos_tokens)
Expand Down
33 changes: 33 additions & 0 deletions isac/training_engine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from isac.intent_engine import IntentBuilder


class Trainer(object):

def __init__(self, engine):
self.training_data = engine.training_data
self.engine = engine
self.tokenizer = engine.tokenizer
self.tagger = engine.tagger

def register_entities(self, intent):

for _data in intent['training']:
for entity in _data.get('tags'):
self.engine.register_entity(entity['value'], entity['key'])

def pos_training_data(self):

for intent in self.training_data:
for _t in intent['training']:
for tag in _t['tags']:
for pos in self.tokenizer.tagger(_t['text']):
if pos[0] == tag['value']:
tag['pos'] = pos[1]

def build(self):

self.pos_training_data()
for intent in self.training_data:
self.register_entities(intent)
self.engine.register_intent(
intent['name'], intent['required'], intent['optional'])

0 comments on commit 760a765

Please sign in to comment.