-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAssignment4p4.m
108 lines (98 loc) · 3.04 KB
/
Assignment4p4.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
%% read csv
try
load('GridWorldData');
load('GridWorldData4');
catch
[num, txt, raw] = xlsread('test1Results.csv');
[num1,txt1,raw1]=xlsread('smallGridWorld.csv');
save('GridWorldData', getVarName('num'), getVarName('txt'), getVarName('raw'));
save('GridWorldData4', getVarName('num'), getVarName('txt'), getVarName('raw'));
end
%% graph
close all
[r, c] = size(raw1);
locations = zeros(1,3);
count = 1;
for i =1 : r
if strcmp(raw1{i,1}, 'Iterations')
locations(count) = i;
count = count + 1;
end
end
for i=1:length(locations)
figure(i)
hold on
p1 = plot(num1(locations(i), :),num1(locations(i)+1,:), 'LineWidth', 2);
p2 = plot(num1(locations(i), :),num1(locations(i)+2,:), 'LineWidth', 2);
p3 = plot(num1(locations(i), :),num1(locations(i)+3,:), 'LineWidth', 2);
xlabel('Iterations');
legend('Value Iteration', 'Policy Iteration', 'Q-Learning');
hold off
end
figure(1)
ylabel('Number of steps/actions to reach terminal state');
figure(2)
ylabel('Milliseconds required to generate optimal policy');
figure(3)
ylabel('Reward gained by the optimal policy per iteration');
figure(4)
ylabel('Reward gained by the optimal policy per iteration');
%% compare
close all
[r, c] = size(raw);
locations2 = zeros(1,3);
count = 1;
for i =1 : r
if strcmp(raw{i,1}, 'Iterations')
locations2(count) = i;
count = count + 1;
end
end
figNum = 1;
for i=1:length(locations)
figure(figNum);
figNum = figNum+1;
hold on
p1 = plot(num1(locations(i), :),num1(locations(i)+1,:), 'LineWidth', 2);
p1 = plot(num(locations2(i),:), num(locations2(i)+1,:), 'LineWidth',2);
hold off
title('Value Iteration Comparison');
xlabel('Iterations');
legend('Large grid world', 'Small grid world');
figure(figNum);
figNum = figNum+1;
hold on
p1 = plot(num1(locations(i), :),num1(locations(i)+2,:), 'LineWidth', 2);
p1 = plot(num(locations2(i),:), num(locations2(i)+2,:), 'LineWidth',2);
hold off
title('Policy Iteration Comparison');
xlabel('Iterations');
legend('Large grid world', 'Small grid world');
figure(figNum);
figNum = figNum+1;
hold on
p1 = plot(num1(locations(i), :),num1(locations(i)+3,:), 'LineWidth', 2);
p1 = plot(num(locations2(i),:), num(locations2(i)+3,:), 'LineWidth',2);
hold off
title('Q learning Comparison');
xlabel('Iterations');
legend('Large grid world', 'Small grid world');
end
figure(1)
ylabel('Number of steps/actions to reach terminal state');
figure(2)
ylabel('Number of steps/actions to reach terminal state');
figure(3)
ylabel('Number of steps/actions to reach terminal state');
figure(4)
ylabel('Milliseconds required to generate optimal policy');
figure(5)
ylabel('Milliseconds required to generate optimal policy');
figure(6)
ylabel('Milliseconds required to generate optimal policy');
figure(7)
ylabel('Reward gained by the optimal policy per iteration');
figure(8)
ylabel('Reward gained by the optimal policy per iteration');
figure(9)
ylabel('Reward gained by the optimal policy per iteration');