-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathQlearning2.csv
We can make this file beautiful and searchable if this error is corrected: It looks like row 3 should actually have 2 columns, instead of 101 in line 2.
40 lines (35 loc) · 6.05 KB
/
Qlearning2.csv
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
epsilon rate, 0.1
actions,
Q Learning,542,303,1331,393,74,218,151,236,89,71,135,58,147,37,32,914,156,47,37,86,58,45,74,33,571,87,109,59,96,44,58,212,126,100,64,35,45,95,140,93,50,86,61,994,148,113,49,110,90,65,158,92,124,31,48,226,41,143,75,81,127,163,48,125,54,86,71,76,75,55,61,113,28,46,178,52,43,79,43,258,143,36,67,29,48,83,59,47,125,248,262,48,56,56,89,57,197,54,108,80
time,
Q Learning,23,53,31,23,16,54,30,38,50,34,19,35,25,25,23,34,28,28,33,19,20,31,27,37,86,46,62,27,23,26,36,41,27,25,27,20,48,26,38,21,17,35,29,38,49,34,33,27,26,28,26,43,32,36,33,31,29,26,27,36,40,22,30,35,44,29,39,34,41,52,26,76,39,59,48,57,43,34,57,86,39,62,44,37,42,50,37,54,67,70,55,42,38,56,42,31,54,48,48,38
reward,
Q Learning Rewards,-534,-95,-345,-357,35,20,-64,-138,47,41,30,106,33,62,63,-258,-106,1,70,37,44,70,75,81,-123,11,63,43,41,44,64,141,20,-23,45,74,67,0,-74,13,102,74,53,-239,0,34,67,-4,12,69,-43,-113,-10,77,58,26,69,50,31,68,-21,86,51,19,45,74,54,37,101,68,75,16,78,40,-71,75,57,16,54,-96,-46,58,73,66,58,43,40,39,79,-247,-39,58,54,76,45,74,-13,68,90,-26
epsilon rate, 0.2
actions,
Q Learning,361,557,207,133,184,222,198,216,228,153,198,326,38,146,261,103,116,44,44,80,77,143,95,101,87,36,118,67,55,108,55,32,62,41,45,29,57,71,93,86,69,44,120,104,118,80,34,37,60,43,29,44,49,38,41,64,206,50,84,77,303,98,70,66,117,51,50,30,46,71,41,28,34,47,83,136,39,78,41,51,49,68,101,42,49,30,48,39,27,53,52,200,26,50,34,49,182,34,110,47
Milliseconds,
Q Learning,35,30,18,23,31,18,79,22,18,19,77,17,23,64,28,24,67,26,12,19,14,15,16,20,20,18,15,17,20,23,13,25,24,23,21,29,26,21,20,36,36,28,30,51,22,28,33,21,16,22,25,26,26,24,17,26,38,28,38,29,42,30,23,23,22,22,22,37,19,29,23,24,32,22,31,31,50,51,42,35,50,35,32,39,33,27,38,32,24,30,31,37,33,34,29,33,30,30,32,36
reward:,
Q Learning Rewards,-358.0,-491.0,-114.0,-40.0,-118.0,-129.0,-114.0,-204.0,-207.0,-60.0,-159.0,-233.0,64.0,-116.0,-159.0,-1.0,-14.0,58.0,58.0,22.0,25.0,-41.0,-2.0,-8.0,15.0,57.0,-16.0,17.0,29.0,-6.0,47.0,70.0,13.0,61.0,57.0,73.0,45.0,31.0,9.0,7.0,33.0,58.0,-18.0,-11.0,-16.0,22.0,68.0,65.0,42.0,50.0,64.0,58.0,53.0,64.0,61.0,-25.0,-104.0,52.0,9.0,16.0,-246.0,4.0,14.0,27.0,-24.0,51.0,52.0,72.0,56.0,4.0,43.0,74.0,68.0,55.0,19.0,-34.0,63.0,24.0,61.0,51.0,26.0,34.0,1.0,60.0,53.0,72.0,54.0,54.0,75.0,40.0,14.0,-98.0,76.0,52.0,68.0,53.0,-80.0,68.0,-8.0,55.0
epsilon rate, 0.5
actions,
Q Learning,1018,323,539,244,147,193,451,159,130,69,85,182,102,147,251,67,74,51,79,110,105,134,94,104,37,80,162,70,138,90,139,151,77,114,82,57,93,98,85,56,54,63,64,49,77,129,77,66,87,54,160,95,62,88,39,138,88,210,196,109,53,90,123,67,55,86,62,31,77,96,149,95,344,129,61,125,60,98,68,68,192,98,53,40,39,87,134,73,35,133,72,51,68,59,71,93,126,53,59,51
time,
Q Learning,139,66,26,22,35,50,68,48,31,38,33,36,28,27,30,24,34,30,29,20,46,33,34,50,28,23,28,17,27,23,32,48,40,39,38,51,41,51,54,43,44,43,40,37,42,44,51,49,51,70,52,44,55,104,70,37,41,32,25,22,37,27,29,30,29,27,26,32,30,31,30,24,30,29,27,32,33,33,31,34,32,34,37,35,33,64,34,31,37,36,35,33,37,35,45,38,38,45,40,36
reward,
Q Learning Rewards,-1258.0,-365.0,-617.0,-178.0,-135.0,-145.0,-412.0,-120.0,-28.0,15.0,-1.0,-125.0,-36.0,-54.0,-221.0,8.0,-8.0,-21.0,-4.0,-26.0,-39.0,-68.0,-37.0,-11.0,47.0,22.0,-60.0,32.0,-63.0,3.0,-109.0,-49.0,25.0,-12.0,11.0,45.0,9.0,-68.0,17.0,46.0,48.0,39.0,11.0,53.0,25.0,-27.0,7.0,-27.0,15.0,48.0,-58.0,7.0,40.0,14.0,63.0,-63.0,14.0,-135.0,-94.0,-7.0,49.0,3.0,-39.0,35.0,47.0,16.0,40.0,71.0,25.0,-30.0,-56.0,7.0,-278.0,-27.0,-4.0,-32.0,42.0,-23.0,25.0,34.0,-234.0,4.0,49.0,62.0,63.0,-3.0,-32.0,20.0,13.0,-58.0,12.0,42.0,34.0,43.0,13.0,9.0,-24.0,13.0,43.0,51.0
epsilon rate, 0.7
actions,
Q Learning,462,511,135,494,625,69,464,149,97,175,44,260,123,140,205,47,160,269,333,164,113,82,333,145,73,181,39,141,140,468,273,41,196,45,82,100,74,198,88,162,75,154,147,539,158,98,397,90,40,144,278,173,124,154,98,99,177,199,155,54,313,137,130,101,69,114,69,140,263,158,175,197,91,86,109,134,151,119,96,79,132,71,155,45,109,58,147,102,132,73,317,82,93,97,110,228,132,174,89,202
time,
Q Learning,114,51,45,54,48,51,90,54,43,38,34,32,30,34,36,36,34,49,40,44,39,30,58,33,37,53,62,44,48,45,44,38,41,31,38,42,41,42,49,48,43,59,63,65,50,65,50,60,53,68,58,65,51,51,58,36,33,33,31,32,40,37,32,31,34,36,37,31,51,39,67,36,40,40,37,36,41,34,40,38,41,41,44,47,45,54,61,55,40,45,43,43,47,44,49,49,51,48,50,47
reward,
Q Learning Rewards,-477.0,-481.0,-78.0,-545.0,-730.0,15.0,-479.0,-119.0,-13.0,-100.0,13.0,-266.0,-21.0,-128.0,-184.0,55.0,-112.0,-311.0,-330.0,-71.0,-137.0,-61.0,-402.0,-115.0,-16.0,-115.0,27.0,-111.0,-38.0,-564.0,-243.0,16.0,-112.0,3.0,-7.0,-34.0,-8.0,-96.0,14.0,-78.0,18.0,-52.0,-54.0,-779.0,-164.0,-50.0,-430.0,3.0,35.0,-42.0,-275.0,-89.0,-31.0,-169.0,4.0,-15.0,-174.0,-250.0,-107.0,30.0,-229.0,-143.0,-73.0,1.0,24.0,-30.0,6.0,-74.0,-242.0,-110.0,-118.0,-230.0,-16.0,16.0,-7.0,-32.0,-49.0,-17.0,6.0,23.0,-93.0,31.0,-107.0,57.0,-7.0,17.0,-198.0,-63.0,-30.0,29.0,-422.0,20.0,9.0,5.0,-8.0,-198.0,-102.0,-72.0,-5.0,-271.0
epsilon rate, 0.02
actions,
Q Learning,699,707,237,141,182,246,552,81,200,75,112,166,103,143,123,124,153,53,227,54,36,73,84,62,83,92,93,36,34,65,132,47,107,45,124,57,70,53,28,81,31,31,409,22,29,106,67,160,44,153,28,99,35,66,104,45,35,119,37,92,41,53,72,39,39,70,77,72,32,90,119,28,30,190,35,24,30,134,43,224,29,74,19,33,174,39,33,67,31,66,32,39,70,53,36,57,72,118,36,49
time,
Q Learning,147,74,37,27,45,46,77,32,43,33,33,47,32,24,26,29,32,27,27,21,25,22,35,21,37,26,23,23,26,23,29,34,40,26,65,32,29,22,17,30,35,30,39,42,40,38,39,41,35,42,28,38,37,37,46,53,44,40,34,51,31,55,31,35,56,36,30,34,42,34,45,36,36,37,30,30,36,58,54,64,38,38,34,30,25,37,25,48,34,51,56,29,35,46,37,38,40,38,39,37
reward,
Q Learning Rewards,-822.0,-668.0,-144.0,-120.0,-152.0,-198.0,-594.0,-15.0,-152.0,27.0,-28.0,-100.0,-19.0,-41.0,-21.0,-31.0,-51.0,40.0,-134.0,30.0,66.0,20.0,18.0,40.0,10.0,10.0,9.0,66.0,59.0,37.0,-30.0,55.0,-5.0,57.0,-22.0,45.0,14.0,49.0,74.0,3.0,71.0,71.0,-307.0,80.0,73.0,-22.0,17.0,-58.0,58.0,-51.0,74.0,-6.0,67.0,36.0,-29.0,57.0,67.0,-26.0,65.0,10.0,61.0,49.0,30.0,63.0,63.0,32.0,16.0,30.0,70.0,12.0,-17.0,74.0,54.0,-115.0,67.0,78.0,72.0,-59.0,59.0,-140.0,73.0,1.0,83.0,69.0,-126.0,63.0,69.0,35.0,71.0,36.0,61.0,63.0,32.0,49.0,66.0,45.0,30.0,-25.0,66.0,53.0