-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathzmdp_util.py
52 lines (43 loc) · 1.58 KB
/
zmdp_util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# -*- coding: utf-8 -*-
"""
pypomdp.util.zmdp
~~~~~~~~~~~~~~~~~
:copyright: (c) 2012 by Bastian Migge, Santiago Droll
:license: BSD3, see LICENSE for more details.
:description: zMDP utilities (http://www.cs.cmu.edu/~trey/zmdp/)
"""
import io
import re
import yaml
def read_zmdp_policy(filename, state_count):
"""
return list of alphavectors and corresponding actions from zMDP policy file
zMDP info: http://www.cs.cmu.edu/~trey/zmdp/
arguments:
filename
"""
alpha_vectors, alpha_vector_actions = [], []
yaml_data = io.StringIO()
yaml_str = ""
# read and convert zMDP policy format to YAML
with open(filename,'r') as f:
zmdp_data = f.read()
yaml_data.write( u'%s' % re.sub('=>',':',zmdp_data) )
yaml_str += re.sub('=>',':',zmdp_data)
# read YAML
dataMap = yaml.load(yaml_str)
# sanity check
assert dataMap['policyType'] == 'MaxPlanesLowerBound', 'unrecognized policy'
# generate cassandra alpha string
for plane in dataMap['planes']:
alpha_vector_actions.append(int(plane['action']))
# since the mdp value is defined only if non zero, we must
# manually define the zero values of the alpha vector
alpha_vector = [None] * state_count
for index in range(len(plane['entries'])/2):
actionId = int(plane['entries'][index*2])
mdp_value = float(plane['entries'][index*2+1])
alpha_vector[actionId] = mdp_value
alpha_vectors.append(alpha_vector)
yaml_data.close()
return alpha_vector_actions, alpha_vectors