-
Notifications
You must be signed in to change notification settings - Fork 0
/
ctutest.bib
302 lines (269 loc) · 14.4 KB
/
ctutest.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
@article{cite:1,
author = {BELLMAN, RICHARD},
issn = {00959057, 19435274},
journal = {Journal of Mathematics and Mechanics},
number = {5},
pages = {679--684},
publisher = {Indiana University Mathematics Department},
title = {{A Markovian Decision Process}},
url = {http://www.jstor.org/stable/24900506},
volume = {6},
year = {1957}
}
@misc{ wiki:1,
author = "{Wikipedia contributors}",
title = "Markov decision process --- {Wikipedia}{,} The Free Encyclopedia",
year = "2020",
howpublished = "\url{https://en.wikipedia.org/w/index.php?title=Markov_decision_process&oldid=995233484}",
note = "[Online; accessed 7-January-2021]"
}
@article{egorov2017pomdps,
author = {Maxim Egorov and Zachary N. Sunberg and Edward Balaban and Tim A. Wheeler and Jayesh K. Gupta and Mykel J. Kochenderfer},
title = {{POMDP}s.jl: A Framework for Sequential Decision Making under Uncertainty},
journal = {Journal of Machine Learning Research},
year = {2017},
volume = {18},
number = {26},
pages = {1-5},
url = {http://jmlr.org/papers/v18/16-300.html}
}
@inbook{russel2010,
added-at = {2020-02-01T18:23:11.000+0100},
author = {Russell, Stuart and Norvig, Peter},
biburl = {https://www.bibsonomy.org/bibtex/20533b732950d1c5ab4ac12d4f32fe637/mialhoma},
edition = 3,
interhash = {53908a52dd4c6c8e39f93f4ffc8341be},
intrahash = {0533b732950d1c5ab4ac12d4f32fe637},
keywords = {ties4530},
publisher = {Prentice Hall},
timestamp = {2020-02-01T18:23:11.000+0100},
title = {Artificial Intelligence: A Modern Approach},
pages = {42-44},
year = 2010
}
@article{Kolobov2012,
abstract = {Markov Decision Processes (MDPs) are widely popular in Artificial Intelligence for modeling sequential decision-making scenarios with probabilistic dynamics. They are the framework of choice when designing an intelligent agent that needs to act for long periods of time in an environment where its actions could have uncertain outcomes. MDPs are actively researched in two related subareas of AI, probabilistic planning and reinforcement learning. Probabilistic planning assumes known models for the agent's goals and domain dynamics, and focuses on determining how the agent should behave to achieve its objectives. On the other hand, reinforcement learning additionally learns these models based on the feedback the agent gets from the environment. This book provides a concise introduction to the use of MDPs for solving probabilistic planning problems, with an emphasis on the algorithmic perspective. It covers the whole spectrum of the field, from the basics to state-of-the-art optimal and approximation algorithms. We first describe the theoretical foundations of MDPs and the fundamental solution techniques for them. We then discuss modern optimal algorithms based on heuristic search and the use of structured representations. A major focus of the book is on the numerous approximation schemes for MDPs that have been developed in the AI literature. These include determinization-based approaches, sampling techniques, heuristic functions, dimensionality reduction, and hierarchical representations. Finally, we briefly introduce several extensions of the standard MDP classes that model and solve even more complex planning problems. Copyright {\textcopyright} 2012 by Morgan {\&} Claypool.},
author = {Kolobov, Mausam and Kolobov, Andrey},
doi = {10.2200/S00426ED1V01Y201206AIM017},
isbn = {9781608458868},
issn = {19394608},
journal = {Synthesis Lectures on Artificial Intelligence and Machine Learning},
keywords = {AI planning,MDP,probabilistic planning,reinforcement learning,sequential decision making under uncertainty,uncertainty in AI},
pages = {1--203},
publisher = {Morgan {\&} Claypool publishers},
title = {{Planning with markov decision processes: An AI perspective}},
url = {https://drive.google.com/drive/u/0/folders/1HKSlbtEmkHtF4G1{\_}8TgPq7LYqCmFpmoH},
volume = {17},
year = {2012}
}
@misc{JuliaPOMDP,
author = {Keith, Andrew and Egorov, Maxim and Peters, Lasse and Kochenderfer, Mykel},
title = {JuliaPOMDP},
year = {2020},
publisher = {GitHub},
journal = {GitHub Documentation},
howpublished = {\url{https://juliapomdp.github.io/POMDPs.jl/latest/}},
note = "[Online; accessed 10-January-2021]"
}
@misc{FHPOMDP,
author = {Omasta, Tomas},
title = {JuliaPOMDP/FiniteHorizonPOMDPs.jl},
year = {2020},
publisher = {GitHub},
journal = {GitHub Repository},
howpublished = {\url{https://github.com/JuliaPOMDP/FiniteHorizonPOMDPs.jl}},
note = "[Online; accessed 10-January-2021]"
}
@misc{JuliaLang,
author = {JuliaLang},
title = {JuliaLang/julia},
year = {2020},
publisher = {GitHub},
journal = {GitHub Repository},
howpublished = {\url{https://github.com/JuliaPOMDP/FiniteHorizonPOMDPs.jl}},
note = "[Online; accessed 10-January-2021]"
}
@misc{DVI,
author = {Egorov, Maxim},
title = {JuliaPOMDP/DiscreteValueIteration.jl},
year = {2020},
publisher = {GitHub},
journal = {GitHub Repository},
howpublished = {\url{https://github.com/JuliaLang/julia}},
note = "[Online; accessed 10-January-2021]"
}
@misc{JuliaStars,
author = {Qian, Tim},
year = {2015},
title = {History of stars at JuliaLang\\julia},
url = {\url{https://star-history.t9t.io/#JuliaLang\\julia}}
}
@misc{JuliaHistory,
author = {Tung, Liam},
title = {Is Julia fastest-growing new programming language? Stats chart rapid rise in 2018},
editor = {zdnet.com},
month = {January},
year = {2019},
url = {https://www.zdnet.com/article/is-julia-fastest-growing-new-programming-language-stats-chart-rapid-rise-in-2018/},
note = {[Online; posted 24-January-2019]},
}
@misc{JuliaLangorg,
author = {JuliaLang},
title = {Julia home page},
year = 2020,
url = {https://julialang.org/},
urldate = {2021-01-10}
}
@Article{Shani2013,
author={Shani, Guy
and Pineau, Joelle
and Kaplow, Robert},
title={A survey of point-based POMDP solvers},
journal={Autonomous Agents and Multi-Agent Systems},
year={2013},
month={Jul},
day={01},
volume={27},
number={1},
pages={1-51},
abstract={The past decade has seen a significant breakthrough in research on solving partially observable Markov decision processes (POMDPs). Where past solvers could not scale beyond perhaps a dozen states, modern solvers can handle complex domains with many thousands of states. This breakthrough was mainly due to the idea of restricting value function computations to a finite subset of the belief space, permitting only local value updates for this subset. This approach, known as point-based value iteration, avoids the exponential growth of the value function, and is thus applicable for domains with longer horizons, even with relatively large state spaces. Many extensions were suggested to this basic idea, focusing on various aspects of the algorithm---mainly the selection of the belief space subset, and the order of value function updates. In this survey, we walk the reader through the fundamentals of point-based value iteration, explaining the main concepts and ideas. Then, we survey the major extensions to the basic algorithm, discussing their merits. Finally, we include an extensive empirical analysis using well known benchmarks, in order to shed light on the strengths and limitations of the various approaches.},
issn={1573-7454},
doi={10.1007/s10458-012-9200-2},
url={https://doi.org/10.1007/s10458-012-9200-2}
}
@article{Walraven19,
author = {Walraven, Erwin and Spaan, Matthijs T. J.},
title = {Point-Based Value Iteration for Finite-Horizon POMDPs},
year = {2019},
issue_date = {May 2019},
publisher = {AI Access Foundation},
address = {El Segundo, CA, USA},
volume = {65},
number = {1},
issn = {1076-9757},
url = {https://doi.org/10.1613/jair.1.11324},
doi = {10.1613/jair.1.11324},
abstract = {Partially Observable Markov Decision Processes (POMDPs) are a popular formalism for sequential decision making in partially observable environments. Since solving POMDPs to optimality is a difficult task, point-based value iteration methods are widely used. These methods compute an approximate POMDP solution, and in some cases they even provide guarantees on the solution quality, but these algorithms have been designed for problems with an infinite planning horizon. In this paper we discuss why state-of-the-art point-based algorithms cannot be easily applied to finite-horizon problems that do not include discounting. Subsequently, we present a general point-based value iteration algorithm for finite-horizon problems which provides solutions with guarantees on solution quality. Furthermore, we introduce two heuristics to reduce the number of belief points considered during execution, which lowers the computational requirements. In experiments we demonstrate that the algorithm is an effective method for solving finite-horizon POMDPs.},
journal = {J. Artif. Int. Res.},
month = may,
pages = {307–341},
numpages = {35}
}
@article{10.2307/171496,
ISSN = {0030364X, 15265463},
URL = {http://www.jstor.org/stable/171496},
abstract = {A partially observed Markov decision process (POMDP) is a sequential decision problem where information concerning parameters of interest is incomplete, and possible actions include sampling, surveying, or otherwise collecting additional information. Such problems can theoretically be solved as dynamic programs, but the relevant state space is infinite, which inhibits algorithmic solution. This paper explains how to approximate the state space by a finite grid of points, and use that grid to construct upper and lower value function bounds, generate approximate nonstationary and stationary policies, and bound the value loss relative to optimal for using these policies in the decision problem. A numerical example illustrates the methodology.},
author = {William S. Lovejoy},
journal = {Operations Research},
number = {1},
pages = {162--175},
publisher = {INFORMS},
title = {Computationally Feasible Bounds for Partially Observed Markov Decision Processes},
volume = {39},
year = {1991}
}
@inproceedings{pbvi,
author = {Pineau, Joelle and Gordon, Geoffrey and Thrun, Sebastian},
year = {2003},
month = {01},
pages = {1025-1032},
title = {Point-based value iteration: An anytime algorithm for POMDPs},
journal = {Proc. Int. Joint Conf. Artif. Intell.}
}
@article{Hauskrecht00,
added-at = {2019-06-02T00:00:00.000+0200},
author = {Hauskrecht, Milos},
biburl = {https://www.bibsonomy.org/bibtex/24e77180f38970a13b1ac28e15fae38fe/dblp},
ee = {https://www.wikidata.org/entity/Q60164369},
interhash = {aa674f14e36c1743a5261ea967a55517},
intrahash = {4e77180f38970a13b1ac28e15fae38fe},
journal = {J. Artif. Intell. Res.},
keywords = {dblp},
pages = {33-94},
timestamp = {2019-06-04T12:16:35.000+0200},
title = {Value-Function Approximations for Partially Observable Markov Decision Processes.},
url = {http://dblp.uni-trier.de/db/journals/jair/jair13.html#Hauskrecht00},
volume = 13,
year = 2000
}
@inproceedings{Littman,
author = {Littman, Michael L. and Cassandra, Anthony R. and Kaelbling, Leslie Pack},
title = {Learning Policies for Partially Observable Environments: Scaling Up},
year = {1995},
isbn = {1558603778},
publisher = {Morgan Kaufmann Publishers Inc.},
address = {San Francisco, CA, USA},
booktitle = {Proceedings of the Twelfth International Conference on International Conference on Machine Learning},
pages = {362–370},
numpages = {9},
location = {Tahoe City, California, USA},
series = {ICML'95}
}
@inproceedings{sarsop,
title={Sarsop: Efficient point-based pomdp planning by approximating optimally reachable belief spaces.},
author={Kurniawati, Hanna and Hsu, David and Lee, Wee Sun},
booktitle={Robotics: Science and systems},
volume={2008},
year={2008},
organization={Citeseer}
}
@article{perseus,
author = {Spaan, Matthijs T. J. and Vlassis, Nikos},
title = {Perseus: Randomized Point-Based Value Iteration for POMDPs},
year = {2005},
issue_date = {July 2005},
publisher = {AI Access Foundation},
address = {El Segundo, CA, USA},
volume = {24},
number = {1},
issn = {1076-9757},
abstract = {Partially observable Markov decision processes (POMDPs) form an attractive and principled framework for agent planning under uncertainty. Point-based approximate techniques for POMDPs compute a policy based on a finite set of points collected in advance from the agent's belief space. We present a randomized point-based value iteration algorithm called PERSEUS. The algorithm performs approximate value backup stages, ensuring that in each backup stage the value of each point in the belief set is improved; the key observation is that a single backup may improve the value of many belief points. Contrary to other point-based methods, PERSEUS backs up only a (randomly selected) subset of points in the belief set, sufficient for improving the value of each belief point in the set. We show how the same idea can be extended to dealing with continuous action spaces. Experimental results show the potential of PERSEUS in large scale POMDP problems.},
journal = {J. Artif. Int. Res.},
month = aug,
pages = {195–220},
numpages = {26}
}
@article{hsvi,
author = {Trey Smith and
Reid G. Simmons},
title = {Heuristic Search Value Iteration for POMDPs},
journal = {CoRR},
volume = {abs/1207.4166},
year = {2012},
url = {http://arxiv.org/abs/1207.4166},
archivePrefix = {arXiv},
eprint = {1207.4166},
timestamp = {Mon, 13 Aug 2018 16:48:32 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1207-4166.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{fsvi,
author = {Shani, Guy and Brafman, Ronen and Shimony, Solomon},
year = {2007},
month = {01},
pages = {2619-2624},
title = {Forward search value iteration for POMDPs},
journal = {Proceedings of the International Joint Conference on Artificial Intelligence}
}
@article{Hauskrecht_2000,
title={Value-Function Approximations for Partially Observable Markov Decision Processes},
volume={13},
ISSN={1076-9757},
url={http://dx.doi.org/10.1613/jair.678},
DOI={10.1613/jair.678},
journal={Journal of Artificial Intelligence Research},
publisher={AI Access Foundation},
author={Hauskrecht, M.},
year={2000},
month={Aug},
pages={33–94}
}
@misc{cassandra_1999,
title={Tony's POMDP Page}
url={https://cs.brown.edu/research/ai/pomdp/index.html},
journal={Tony's POMDP Page},
author={Cassandra, Anthony R.},
year={1999},
month={Jan},
note = "[Online; accessed 7-January-2021]"
}