-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.js
125 lines (102 loc) · 3.86 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
var setReward = require('./reward.jsm');
var q = require('./q.jsm');
var environment = require('./environment.jsm');
var MAT_STATES, INITIAL_STATE, GAMMA;
({MAT_STATES, INITIAL_STATE, GAMMA} = environment);
/*
Generates the reward matrix
*/
var reward = setReward(MAT_STATES);
/*
Returns a matrix of zeroes with original dimensions from argument matrix
*/
var zeroTable = (mat) => mat.map( line => line.map( e => 0));
/*
STRATEGIES DEFINITIONS:
Strategy1:
The action will be a randomic neighbor, and Q Table will be updated.
Strategy2:
EITHER the action will be the neighbor with greatest Q value, and Q Table will be updated
OR the action will be a randomic neighbor, and Q Table will NOT be updated.
Strategy3:
The action will be the neighbor with greatest Q value, and Q Table will NOT be updated
*/
var strategy1 = (neighbors, qtxy) => {
return {
action: neighbors[Math.floor((10*Math.random())%neighbors.length)],
change: true
}
};
var strategy2 = (neighbors, qtxy) => {
return Math.random()>=0.5 ? {
action: neighbors[Math.floor((10*Math.random())%neighbors.length)],
change: true
} : {
action: neighbors.reduce((e,a)=>qtxy[e.x][e.y]>qtxy[a.x][a.y]?e:a),
change: false
}
};
var strategy3 = (neighbors, qtxy) => {
return {
action: neighbors.reduce((e,a)=>qtxy[e.x][e.y]>qtxy[a.x][a.y]?e:a),
change:false
}
};
/*
Called after the algorithm is finished.
Walks through the matrix from INITIAL_STATE to a certain point and counts the steps.
*/
var walk = (qTable, c, INITIAL_STATE) => {
//console.log(c);
q(MAT_STATES, INITIAL_STATE, GAMMA, reward, qTable, 0, (neighbors, qtxy) => strategy3(neighbors, qtxy), 0, '('+INITIAL_STATE.x+','+INITIAL_STATE.y+') => ', (i, qTable, q, reward, c, steps) =>
console.log('\n\nBEST PATH has '+c+' steps: '+steps)
);
};
/*
Prints out the Q-Table
*/
var print = (qTable, c, callback) => qTable.map(line => line.map(col =>
col.map( colline => colline.map( colcol =>
colcol==0 ? 0 : parseFloat(colcol).toFixed(4)
))
))
.forEach((line,x,A)=> line.forEach((col,y,B) => {
console.log("\n("+x+","+y+")\n\t\t0\t1\t2\t3\t4\t5\t6\t7\t8\t9\n\t ---------------------------------------------------------------------------------------");
col.forEach((item, itemN) =>
console.log('\t'+itemN+" [\t"+item[0]+"\t"+item[1]+"\t"+item[2]+"\t"+item[3]+"\t"+item[4]
+"\t"+item[5]+"\t"+item[6]+"\t"+item[7]+"\t"+item[8]+"\t"+item[9]+" \t]")
);
console.log("\t ---------------------------------------------------------------------------------------");
if ( x==(A.length-1) && y==(B.length-1)) callback(qTable, c, INITIAL_STATE);
}));
/*
Q Table is initialized with zero values:
Each cell of MAT_STATES matrix will be a matrix of zeroes with dimensions of MAT_STATES matrix.
*/
var qTable = MAT_STATES.map( line => line.map( col =>
zeroTable(MAT_STATES)
));
/*
Setting initial strategy
*/
var strategy = strategy1;
/*
Will set enviroment each time a GOAL STATE has been reached.
*/
var check = (i, qTable, q, reward, c, callback) =>{
if ( i == 50 ) {GAMMA = 0.5;strategy=strategy2;}
else if ( i == 100 ) {GAMMA = 0.2;strategy=strategy3;}
if ( i < 150 ) setTimeout(()=>q(MAT_STATES, INITIAL_STATE, GAMMA, reward, qTable, i, (neighbors, qtxy) => strategy(neighbors, qtxy), c, '', (i, qTable, q, reward, c) =>
check(i, qTable, q, reward, c, callback)
),1);
// if the episodes have been finished, prints out a human-friendly Q Table.
else print(qTable, c, callback);
}
// LET'S DO IT!
q(MAT_STATES, INITIAL_STATE, GAMMA, reward, qTable, 0, (neighbors, qtxy) => strategy(neighbors), 0, '', (i, qTable, q, reward, c, steps) =>
check(i, qTable, q, reward, c, (qTable, c, INITIAL_STATE)=>walk(qTable, c, INITIAL_STATE))
);
//reward.forEach(line => console.log(line));
//console.log('',result);
//console.log('',reward[5][9]);
//console.log('',reward);