forked from barrucadu/markov
-
Notifications
You must be signed in to change notification settings - Fork 0
/
markov.py
87 lines (70 loc) · 2.25 KB
/
markov.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import random
import pickle
import os
import sys
class Markov:
def __init__(self, n=3):
self.n = n
self.p = 0
self.seed = None
self.data = {}
def train(self, training_data):
prev = ()
for token in training_data:
token = sys.intern(token)
for pprev in [prev[i:] for i in range(len(prev) + 1)]:
if not pprev in self.data:
self.data[pprev] = [0, {}]
if not token in self.data[pprev][1]:
self.data[pprev][1][token] = 0
self.data[pprev][1][token] += 1
self.data[pprev][0] += 1
prev += (token,)
if len(prev) > self.n:
prev = prev[1:]
def load(self, filename):
with open(os.path.expanduser(filename), "rb") as f:
try:
n, self.data = pickle.load(f)
if self.n > n:
print("warning: changing n value to", n)
self.n = n
return True
except:
print("Loading data file failed!")
return False
def dump(self, filename):
try:
with open(os.path.expanduser(filename), "wb") as f:
pickle.dump((self.n, self.data), f)
return True
except:
print("Could not dump to file.")
return False
def reset(self, seed, prob, prev):
self.seed = seed
self.p = prob
self.prev = prev
random.seed(seed)
def __iter__(self):
return self
def __next__(self):
if self.prev == () or random.random() < self.p:
next = self._choose(self.data[()])
else:
try:
next = self._choose(self.data[self.prev])
except:
self.prev = ()
next = self._choose(self.data[self.prev])
self.prev += (next,)
if len(self.prev) > self.n:
self.prev = self.prev[1:]
return next
def _choose(self, freqdict):
total, choices = freqdict
idx = random.randrange(total)
for token, freq in choices.items():
if idx <= freq:
return token
idx -= freq