references.bib

% Encoding: UTF-8

@String{BSTJ = {Bell System Technical Journal}}

@Misc{Abbeel2019,
  author = {Pieter Abbeel},
  title  = {Lecture notes for CS 287: Solving Continuous MDPs with Discretization},
  year   = {2019},
  url    = {https://people.eecs.berkeley.edu/~pabbeel/cs287-fa19/slides/Lec3-discretization-of-continuous-state-space-MDPs.pdf},
}

@Article{Adell2006,
  author    = {Adell, José A. and Jodrá, P.},
  journal   = {Journal of Inequalities and Applications},
  title     = {Exact Kolmogorov and total variation distances between some familiar discrete distributions},
  year      = {2006},
  issn      = {1029-242X},
  pages     = {1--8},
  volume    = {2006},
  doi       = {10.1155/jia/2006/64307},
  publisher = {Springer Science and Business Media LLC},
}

@Article{Afshari2023,
  author    = {Mohammad Afshari and Aditya Mahajan},
  title     = {Decentralized Linear Quadratic Systems With Major and Minor Agents and Non-Gaussian Noise},
  journal   = {{IEEE} Transactions on Automatic Control},
  year      = {2023},
  volume    = {68},
  number    = {8},
  pages     = {4666--4681},
  month     = {aug},
  doi       = {10.1109/tac.2022.3210049},
  publisher = {Institute of Electrical and Electronics Engineers ({IEEE})},
}

@InProceedings{Agarwal2020,
  author    = {Agarwal, Alekh and Kakade, Sham M and Lee, Jason D and Mahajan, Gaurav},
  title     = {Optimality and Approximation with Policy Gradient Methods in Markov Decision Processes},
  booktitle = {Proceedings of Thirty Third Conference on Learning Theory},
  year      = {2020},
  editor    = {Abernethy, Jacob and Agarwal, Shivani},
  volume    = {125},
  series    = {Proceedings of Machine Learning Research},
  pages     = {64--66},
  month     = {09--12 Jul},
  publisher = {PMLR},
  url       = {https://proceedings.mlr.press/v125/agarwal20a.html},
}

@Article{Altman1992,
  author    = {Altman, Eitan and Nain, Philippe},
  title     = {Closed-loop control with delayed information},
  journal   = {ACM SIGMETRICS Performance Evaluation Review},
  year      = {1992},
  volume    = {20},
  number    = {1},
  pages     = {193–204},
  month     = jun,
  issn      = {0163-5999},
  doi       = {10.1145/149439.133106},
  publisher = {Association for Computing Machinery (ACM)},
  url       = {http://dx.doi.org/10.1145/149439.133106},
}

@Book{Altman1999,
  title     = {Constrained Markov decision processes},
  publisher = {CRC Press},
  year      = {1999},
  author    = {Altman, Eitan.},
  url       = {http://www-sop.inria.fr/members/Eitan.Altman/TEMP/h.pdf},
}

@InCollection{Altman2002,
  author    = {Eitan Altman},
  booktitle = {International Series in Operations Research {\&} Management Science},
  publisher = {Springer {US}},
  title     = {Applications of Markov Decision Processes in Communication Networks},
  year      = {2002},
  pages     = {489--536},
  doi       = {10.1007/978-1-4615-0805-2_16},
}

@InProceedings{Altman2007,
  author       = {Altman, Eitan and Avrachenkov, Konstantin and Garnaev, Andrey},
  booktitle    = {EuroFGI International Conference on Network Control and Optimization (NET-COOP)},
  title        = {A jamming game in wireless networks with transmission cost},
  year         = {2007},
  address      = {Avignon, France},
  month        = jun,
  organization = {Springer},
  pages        = {1--12},
  url          = {https://www-sop.inria.fr/members/Eitan.Altman/PAPERS/andrey-lncs.pdf},
}

@InProceedings{Altman2008,
  author       = {Altman, Eitan and Avrachenkov, Konstantin and Garnaev, Andrey},
  booktitle    = {IEEE INFOCOM Conference on Computer Communications},
  title        = {Closed form solutions for symmetric water filling games},
  year         = {2008},
  organization = {IEEE},
  pages        = {673--681},
  doi          = {https://doi.org/10.1109/INFOCOM.2008.117},
}

@InProceedings{Altman2009,
  author       = {Altman, Eitan and Avrachenkov, Konstantin and Garnaev, Andrey},
  booktitle    = {2009 International Conference on Game Theory for Networks},
  title        = {Jamming in wireless networks: The case of several jammers},
  year         = {2009},
  organization = {IEEE},
  pages        = {585--592},
}

@InProceedings{Altman2009a,
  author    = {Altman, Eitan and Avrachenkov, Konstantin and Garnaev, Andrey},
  booktitle = {Proceedings of the Fourth International ICST Conference on Performance Evaluation Methodologies and Tools},
  title     = {Jamming game with incomplete information about the jammer},
  year      = {2009},
  pages     = {1--9},
}

@Article{Altman2011,
  author    = {Altman, Eitan and Avrachenkov, Konstantin and Garnaev, Andrey},
  journal   = {Telecommunication Systems},
  title     = {Closed form solutions for water-filling problems in optimization and game frameworks},
  year      = {2011},
  pages     = {153--164},
  volume    = {47},
  publisher = {Springer},
}

@Article{Amir2005,
  author    = {Rabah Amir},
  title     = {Supermodularity and Complementarity in Economics: An Elementary Survey},
  journal   = {Southern Economic Journal},
  year      = {2005},
  volume    = {71},
  number    = {3},
  pages     = {636--660},
  issn      = {00384038},
  doi       = {10.2307/20062066},
  publisher = {Southern Economic Association},
}

@Article{Amir2017,
  author    = {Rabah Amir and Sergei Belkov and Igor V. Evstigneev},
  journal   = {Theory and Decision},
  title     = {Correlated equilibrium in a nutshell},
  year      = {2017},
  month     = {jun},
  number    = {4},
  pages     = {457--468},
  volume    = {83},
  doi       = {10.1007/s11238-017-9609-9},
  publisher = {Springer Science and Business Media {LLC}},
}

@InProceedings{Arabneydi2014,
  author    = {Arabneydi, Jalal and Mahajan, Aditya},
  title     = {Team optimal control of coupled subsystems with mean-field sharing},
  booktitle = {53rd IEEE Conference on Decision and Control},
  year      = {2014},
  month     = dec,
  publisher = {IEEE},
  doi       = {10.1109/cdc.2014.7039639},
}

@InProceedings{Arabneydi2015,
  author    = {Arabneydi, Jalal and Mahajan, Aditya},
  title     = {Reinforcement learning in decentralized stochastic control systems with partial history sharing},
  booktitle = {2015 American Control Conference (ACC)},
  year      = {2015},
  month     = jul,
  publisher = {IEEE},
  doi       = {10.1109/acc.2015.7172192},
  url       = {http://dx.doi.org/10.1109/ACC.2015.7172192},
}

@Misc{Arabneydi2016,
  author = {Jalal Arabneydi and Aditya Mahajan},
  title  = {Linear Quadratic Mean Field Teams: Optimal and Approximately Optimal Decentralized Solutions},
  month  = aug,
  year   = {2016},
  note   = {arviv:1609.00056v2},
  file   = {online:http\://arxiv.org/pdf/1609.00056v2:PDF},
  url    = {https://arxiv.org/abs/1609.00056v2},
}

@Article{Arapostathis1993,
  author  = {Aristotle Arapostathis and Vivek S. Borkar and Emmaneul Fernandez-Gaucherand and Mrinak K. Ghosh and Steven I. Marcus},
  journal = {{SIAM} Journal of Control and Optimization},
  title   = {Discrete-time controlled {M}arkov processes with average cost criterion - A survey},
  year    = {1993},
  month   = mar,
  number  = {2},
  pages   = {282--344},
  volume  = {31},
}

@Article{Arrow1949,
  author    = {K. J. Arrow and D. Blackwell and M. A. Girshick},
  journal   = {Econometrica},
  title     = {Bayes and Minimax Solutions of Sequential Decision Problems},
  year      = {1949},
  month     = {jul},
  number    = {3/4},
  pages     = {213},
  volume    = {17},
  doi       = {10.2307/1905525},
  publisher = {{JSTOR}},
}

@Article{Arrow1951,
  author    = {Arrow, Kenneth J and Harris, Theodore and Marschak, Jacob},
  journal   = {Econometrica},
  title     = {Optimal inventory policy},
  year      = {1952},
  month     = {jan},
  number    = {1},
  pages     = {250--272},
  volume    = {20},
  doi       = {10.2307/1907830},
  publisher = {JSTOR},
}

@Book{Arthur1994,
  title     = {Increasing Returns and Path Dependence in the Economy},
  publisher = {University of Michigan Press},
  year      = {1994},
  author    = {Arthur, W. Brian},
  doi       = {10.3998/mpub.10029},
}

@Article{Artzrouni1986,
  author    = {Artzrouni, Marc},
  title     = {On the convergence of infinite products of matrices},
  journal   = {Linear Algebra and its Applications},
  year      = {1986},
  volume    = {74},
  pages     = {11--21},
  month     = feb,
  issn      = {0024-3795},
  doi       = {10.1016/0024-3795(86)90112-6},
  publisher = {Elsevier BV},
}

@InProceedings{Asadi2018,
  author    = {Asadi, Kavosh and Misra, Dipendra and Littman, Michael},
  title     = {{L}ipschitz Continuity in Model-based Reinforcement Learning},
  booktitle = {Proceedings of the 35th International Conference on Machine Learning},
  year      = {2018},
  volume    = {80},
  series    = {Proceedings of Machine Learning Research},
  pages     = {264--273},
  month     = jul,
  publisher = {PMLR},
  file      = {asadi18a.pdf:http\://proceedings.mlr.press/v80/asadi18a/asadi18a.pdf:PDF},
  url       = {https://proceedings.mlr.press/v80/asadi18a.html},
}

@Article{Astrom1965,
  author    = {Karl .J {\AA}ström},
  title     = {Optimal control of Markov processes with incomplete state information},
  journal   = {Journal of Mathematical Analysis and Applications},
  year      = {1965},
  volume    = {10},
  number    = {1},
  pages     = {174--205},
  month     = {feb},
  doi       = {10.1016/0022-247x(65)90154-x},
  publisher = {Elsevier {BV}},
}

@Book{Astrom1970,
  title     = {Introduction to Stochastic Control Theory},
  publisher = {Dover},
  year      = {1970},
  author    = {Karl J. {\AA}str\"om},
}

@Article{Athans1971,
  author    = {M. Athans},
  journal   = {{IEEE} Transactions on Automatic Control},
  title     = {The role and use of the stochastic linear-quadratic-Gaussian problem in control system design},
  year      = {1971},
  month     = {dec},
  number    = {6},
  pages     = {529--552},
  volume    = {16},
  doi       = {10.1109/tac.1971.1099818},
  publisher = {Institute of Electrical and Electronics Engineers ({IEEE})},
}

@Article{Aumann1974,
  author    = {Robert J. Aumann},
  journal   = {Journal of Mathematical Economics},
  title     = {Subjectivity and correlation in randomized strategies},
  year      = {1974},
  month     = {mar},
  number    = {1},
  pages     = {67--96},
  volume    = {1},
  doi       = {10.1016/0304-4068(74)90037-8},
  publisher = {Elsevier {BV}},
}

@Article{Aumann1976,
  author    = {Robert J. Aumann},
  journal   = {The Annals of Statistics},
  title     = {Agreeing to Disagree},
  year      = {1976},
  month     = {nov},
  number    = {6},
  volume    = {4},
  doi       = {10.1214/aos/1176343654},
  publisher = {Institute of Mathematical Statistics},
}

@Article{Aumann1987,
  author    = {Robert J. Aumann},
  journal   = {Econometrica},
  title     = {Correlated Equilibrium as an Expression of Bayesian Rationality},
  year      = {1987},
  month     = {jan},
  number    = {1},
  pages     = {1},
  volume    = {55},
  doi       = {10.2307/1911154},
  publisher = {{JSTOR}},
}

@Article{Avrachenkov2022,
  author    = {Avrachenkov, Konstantin E. and Borkar, Vivek S.},
  title     = {Whittle index based Q-learning for restless bandits with average reward},
  journal   = {Automatica},
  year      = {2022},
  volume    = {139},
  pages     = {110186},
  month     = may,
  issn      = {0005-1098},
  doi       = {10.1016/j.automatica.2022.110186},
  publisher = {Elsevier BV},
}

@InProceedings{Bagnell2003,
  author    = {Bagnell, J. Andrew and Kakade, Sham and Ng, Andrew Y. and Schneider, Jeff},
  title     = {Policy search by dynamic programming},
  booktitle = {Neural Information Processing Systems},
  year      = {2003},
  series    = {NIPS'03},
  pages     = {831–838},
  address   = {Cambridge, MA, USA},
  publisher = {MIT Press},
  location  = {Whistler, British Columbia, Canada},
  numpages  = {8},
}

@Article{Bai2015,
  author    = {Bai, Cheng-Zong and Katewa, Vaibhav and Gupta, Vijay and Huang, Yih-Fang},
  journal   = {IEEE transactions on signal processing},
  title     = {A stochastic sensor selection scheme for sequential hypothesis testing with multiple sensors},
  year      = {2015},
  number    = {14},
  pages     = {3687--3699},
  volume    = {63},
  publisher = {IEEE},
}

@Article{Bander1999,
  author    = {Bander, J L and White, C C},
  title     = {Markov decision processes with noise-corrupted and delayed state observations},
  journal   = {Journal of the Operational Research Society},
  year      = {1999},
  volume    = {50},
  number    = {6},
  pages     = {660–668},
  month     = jun,
  issn      = {1476-9360},
  doi       = {10.1057/palgrave.jors.2600745},
  publisher = {Informa UK Limited},
  url       = {http://dx.doi.org/10.1057/palgrave.jors.2600745},
}

@Article{Baras1984,
  author    = {Baras, J. S. and Dorsey, A. J. and Makowski, A. M.},
  title     = {Two competing queues with linear costs: the μc-rule is often optimal},
  journal   = {Advances in Applied Probability},
  year      = {1984},
  volume    = {16},
  number    = {1},
  pages     = {8--8},
  month     = mar,
  issn      = {1475-6064},
  doi       = {10.1017/s000186780002187x},
  publisher = {Cambridge University Press (CUP)},
}

@Article{Bellman1955,
  author    = {Bellman, Richard and Glicksberg, Irving and Gross, Oliver},
  journal   = {Management Science},
  title     = {On the optimal inventory equation},
  year      = {1955},
  month     = {oct},
  number    = {1},
  pages     = {83--104},
  volume    = {2},
  doi       = {10.1287/mnsc.2.1.83},
  publisher = {Institute for Operations Research and the Management Sciences ({INFORMS})},
}

@Book{Bellman1957,
  author    = {Richard Bellman},
  publisher = {Princeton University Press},
  title     = {Dynamic Programming},
  year      = {1957},
}

@Article{Bellman1957a,
  author   = {Richard Bellman},
  title    = {A Markovian Decision Process},
  journal  = {Indiana Univ. Math. J.},
  year     = {1957},
  volume   = {6},
  pages    = {679--684},
  issn     = {0022-2518},
  coden    = {IUMJAB},
  fjournal = {Indiana University Mathematics Journal},
  issue    = {4},
}

@PhdThesis{Berry2000,
  author = {Berry, Randall Alexander},
  title  = {Power and delay trade-offs in fading channels},
  school = {Massachusetts Institute of Technology},
  year   = {2000},
  url    = {https://dspace.mit.edu/handle/1721.1/9290},
}

@Article{Berry2002,
  author    = {Berry, Randall A and Gallager, Robert G},
  journal   = {{IEEE} Transactions on Information Theory},
  title     = {Communication over fading channels with delay constraints},
  year      = {2002},
  month     = {may},
  number    = {5},
  pages     = {1135--1149},
  volume    = {48},
  doi       = {10.1109/18.995554},
  publisher = {Institute of Electrical and Electronics Engineers ({IEEE})},
}

@Article{Berry2012,
  author  = {Berry, Randall and Modiano, Eytan and Zafer, Murtaza},
  title   = {Energy-Efficient Scheduling under Delay Constraints for Wireless Networks},
  journal = {Synthesis Lectures on Communication Networks},
  year    = {2012},
  volume  = {5},
  number  = {2},
  pages   = {1-96},
  doi     = {10.2200/S00443ED1V01Y201208CNT011},
  eprint  = {https://doi.org/10.2200/S00443ED1V01Y201208CNT011},
}

@Article{Berry2013,
  author    = {Berry, Randall A},
  journal   = {IEEE Transactions on Information Theory},
  title     = {Optimal power-delay tradeoffs in fading channels—Small-delay asymptotics},
  year      = {2013},
  month     = {jun},
  number    = {6},
  pages     = {3939--3952},
  volume    = {59},
  doi       = {10.1109/TIT.2013.2253194},
  publisher = {IEEE},
}

@Book{Bertsekas:book,
  title     = {Dynamic programming and optimal control},
  publisher = {Athena Scientific},
  year      = {2011},
  author    = {Bertsekas, Dimitri P},
  volume    = {{I} and {II}},
  edition   = {3rd},
  url       = {http://www.athenasc.com/dpbook.html},
}

@Article{Bertsekas1975,
  author   = {Demitri {Bertsekas}},
  journal  = {IEEE Transactions on Automatic Control},
  title    = {Convergence of discretization procedures in dynamic programming},
  year     = {1975},
  issn     = {2334-3303},
  month    = jun,
  number   = {3},
  pages    = {415--419},
  volume   = {20},
  doi      = {10.1109/TAC.1975.1100984},
  groups   = {[adityam:]},
  keywords = {Dynamic programming, Nonlinear systems, stochastic discrete-time, Optimal stochastic control, Stochastic optimal control, Convergence, Stochastic processes, Optimal control, Cost function, Heuristic algorithms, Grid computing, Concrete, Veins, Probability distribution},
}

@Article{Bertsekas2000,
  author    = {Dimitri P. Bertsekas and John N. Tsitsiklis},
  title     = {Gradient Convergence in Gradient methods with Errors},
  journal   = {{SIAM} Journal on Optimization},
  year      = {2000},
  volume    = {10},
  number    = {3},
  pages     = {627--642},
  month     = {jan},
  doi       = {10.1137/s1052623497331063},
  publisher = {Society for Industrial {\&} Applied Mathematics ({SIAM})},
}

@Book{Bertsekas2013,
  title     = {Abstract dynamic programming},
  publisher = {Athena Scientific Belmont},
  year      = {2013},
  author    = {Bertsekas, Dimitri P},
  url       = {https://web.mit.edu/dimitrib/www/abstractdp_MIT.html},
}

@Book{BertsekasTsitsiklis1996,
  title     = {Neuro-dynamic Programming},
  publisher = {Athena Scientific},
  year      = {1996},
  author    = {Bertsekas, D.P. and Tsitsiklis, J.N.},
  series    = {Anthropological Field Studies},
  isbn      = {9781886529106},
  lccn      = {lc96085338},
}

@InProceedings{Bitar2012,
  author       = {Bitar, Eilyan and Poolla, Kameshwar and Khargonekar, Pramod and Rajagopal, Ram and Varaiya, Pravin and Wu, Felix},
  title        = {Selling random wind},
  booktitle    = {Hawaii International Conference on System Sciences},
  year         = {2012},
  pages        = {1931--1937},
  organization = {IEEE},
}

@Article{Blackwell1964,
  author    = {Blackwell, David},
  journal   = {The Annals of Mathematical Statistics},
  title     = {Memoryless strategies in finite-stage dynamic programming},
  year      = {1964},
  month     = {jun},
  number    = {2},
  pages     = {863--865},
  volume    = {35},
  doi       = {10.1214/aoms/1177703586},
  publisher = {Institute of Mathematical Statistics},
}

@Article{Blackwell1965,
  author    = {David Blackwell},
  title     = {Discounted Dynamic Programming},
  journal   = {The Annals of Mathematical Statistics},
  year      = {1965},
  volume    = {36},
  number    = {1},
  pages     = {226--235},
  month     = {feb},
  doi       = {10.1214/aoms/1177700285},
  publisher = {Institute of Mathematical Statistics},
}

@Article{Blackwell1970,
  author    = {David Blackwell},
  title     = {On Stationary Policies},
  journal   = {Journal of the Royal Statistical Society. Series A (General)},
  year      = {1970},
  volume    = {133},
  number    = {1},
  pages     = {33},
  doi       = {10.2307/2343810},
  publisher = {{JSTOR}},
}

@Article{Blum1954,
  author    = {Blum, Julius R.},
  title     = {Multidimensional Stochastic Approximation Methods},
  journal   = {The Annals of Mathematical Statistics},
  year      = {1954},
  volume    = {25},
  number    = {4},
  pages     = {737--744},
  month     = dec,
  issn      = {0003-4851},
  doi       = {10.1214/aoms/1177728659},
  publisher = {Institute of Mathematical Statistics},
}

@Misc{Bogdan2022,
  author = {Krzysztof Bogdan and Mateusz Więcek},
  title  = {Burkholder inequality by Bregman divergence},
  month  = apr,
  year   = {2022},
  note   = {arxiv:2103.06358v3},
  file   = {online:http\://arxiv.org/pdf/2103.06358v3:PDF},
  url    = {http://arxiv.org/pdf/2103.06358v3},
}

@Article{Bohlin1970,
  author    = {T. Bohlin},
  title     = {Information pattern for linear discrete-time models with stochastic coefficients},
  journal   = {IEEE Transactions on Automatic Control (TAC)},
  year      = {1970},
  volume    = {15},
  number    = {1},
  pages     = {104--106},
  month     = feb,
}

@Article{Borkar1988,
  author    = {Vivek S. Borkar},
  journal   = {Probability Theory and Related Fields},
  title     = {A convex analytic approach to Markov decision processes},
  year      = {1988},
  month     = {aug},
  number    = {4},
  pages     = {583--602},
  volume    = {78},
  doi       = {10.1007/bf00353877},
  publisher = {Springer Science and Business Media {LLC}},
}

@Article{Borkar1997,
  author    = {V.S. Borkar and K. Soumyanatha},
  journal   = {{IEEE} Transactions on Circuits and Systems I: Fundamental Theory and Applications},
  title     = {An analog scheme for fixed point computation. I. Theory},
  year      = {1997},
  month     = {apr},
  number    = {4},
  pages     = {351--355},
  volume    = {44},
  doi       = {10.1109/81.563625},
  publisher = {Institute of Electrical and Electronics Engineers ({IEEE})},
}

@Article{Borkar2000,
  author    = {V. S. Borkar and S. P. Meyn},
  journal   = {{SIAM} Journal on Control and Optimization},
  title     = {The O.D.E. Method for Convergence of Stochastic Approximation and Reinforcement Learning},
  year      = {2000},
  month     = {jan},
  number    = {2},
  pages     = {447--469},
  volume    = {38},
  doi       = {10.1137/s0363012997331639},
  publisher = {Society for Industrial {\&} Applied Mathematics ({SIAM})},
}

@InCollection{Borkar2002,
  author    = {Vivek S. Borkar},
  booktitle = {International Series in Operations Research {\&} Management Science},
  publisher = {Springer {US}},
  title     = {Convex Analytic Methods in Markov Decision Processes},
  year      = {2002},
  pages     = {347--375},
  doi       = {10.1007/978-1-4615-0805-2_11},
}

@Book{Borkar2008,
  title     = {Stochastic Approximation},
  publisher = {Hindustan Book Agency},
  year      = {2008},
  author    = {Vivek S. Borkar},
  doi       = {10.1007/978-93-86279-38-5},
}

@Conference{Bozkurt2023,
  author    = {Berk Bozkurt and Aditya Mahajan and Ashutosh Nayyar and Yi Ouyang},
  title     = {Weighted norm bounds in MDPs with unbounded per-step cost},
  booktitle = {IEEE Conference on Decision and Control},
  year      = {2023},
  address   = {Singapore},
  month     = dec,
  publisher = {IEEE},
}

@InProceedings{Burda2019,
  author    = {Yuri Burda and Harrison Edwards and Amos Storkey and Oleg Klimov},
  title     = {Exploration by random network distillation},
  booktitle = {International Conference on Learning Representations},
  year      = {2019},
  url       = {https://openreview.net/forum?id=H1lJJnR5Ym},
}

@Article{Burkholder1966,
  author    = {D. L. Burkholder},
  title     = {Martingale Transforms},
  journal   = {The Annals of Mathematical Statistics},
  year      = {1966},
  volume    = {37},
  number    = {6},
  pages     = {1494--1504},
  month     = {dec},
  doi       = {10.1214/aoms/1177699141},
  publisher = {Institute of Mathematical Statistics},
}

@Article{Burkholder1973,
  author    = {D. L. Burkholder},
  title     = {Distribution Function Inequalities for Martingales},
  journal   = {The Annals of Probability},
  year      = {1973},
  volume    = {1},
  number    = {1},
  month     = {feb},
  doi       = {10.1214/aop/1176997023},
  publisher = {Institute of Mathematical Statistics},
}

@Article{Buyukkoc1985,
  author    = {Buyukkoc, C. and Varaiya, P. and Walrand, J.},
  title     = {The cμ rule revisited},
  journal   = {Advances in Applied Probability},
  year      = {1985},
  volume    = {17},
  number    = {1},
  pages     = {237--238},
  month     = mar,
  issn      = {1475-6064},
  doi       = {10.2307/1427064},
  publisher = {Cambridge University Press (CUP)},
}

@Article{Cao2000,
  author    = {Cao, Xi-Ren},
  title     = {A unified approach to {Markov} decision problems and performance sensitivity analysis},
  journal   = {Automatica},
  year      = {2000},
  volume    = {36},
  number    = {5},
  pages     = {771--774},
  month     = may,
  issn      = {0005-1098},
  doi       = {10.1016/s0005-1098(99)00207-1},
  publisher = {Elsevier BV},
}

@Article{Cao2003,
  author    = {Cao, Xi-Ren},
  title     = {From Perturbation Analysis to {Markov} Decision Processes and Reinforcement Learning},
  journal   = {Discrete Event Dynamic Systems},
  year      = {2003},
  volume    = {13},
  number    = {1/2},
  pages     = {9--39},
  issn      = {0924-6703},
  doi       = {10.1023/a:1022188803039},
  publisher = {Springer Science and Business Media LLC},
}

@Article{Cao2005,
  author    = {Xi-Ren Cao},
  journal   = {Discrete Event Dynamic Systems},
  title     = {Basic Ideas for Event-Based Optimization of Markov Systems},
  year      = {2005},
  month     = {jun},
  number    = {2},
  pages     = {169--197},
  volume    = {15},
  doi       = {10.1007/s10626-004-6211-4},
  publisher = {Springer Science and Business Media {LLC}},
}

@Book{Cao2007,
  author    = {Xi-Ren Cao},
  publisher = {Springer},
  title     = {Stochastic Learning and Optimization},
  year      = {2007},
  doi       = {10.1007/978-0-387-69082-7},
}

@InProceedings{Cassandra1994,
  author    = {Cassandra, Anthony R and Kaelbling, Leslie Pack and Littman, Michael L},
  booktitle = {AAAI},
  title     = {Acting optimally in partially observable stochastic domains},
  year      = {1994},
  pages     = {1023--1028},
  volume    = {94},
}

@InProceedings{Cassandra1997,
  author    = {Anthony Cassandra and Michael L. Littman and Nevin L. Zhang},
  booktitle = {{P}roceedings of the Thirteenth Conference on Uncertainty in Artificial Intelligence},
  title     = {Incremental pruning: A simple, fast, exact method for partially observable {M}arkov decision processes},
  year      = {1997},
}

  abstract  = {The economics literature is replete with examples of monotone comparative statics; that is, scenarios where optimal decisions or equilibria in a parameterized collection of models vary monotonically with the parameter. Most of these examples are manifestations of complementarity, with a common explicit or implicit theoretical basis in properties of a super-modular function on a lattice. Supermodular functions yield a characterization for complementarity and extend the notion of complementarity to a general setting that is a natural mathematical context for studying complementarity and monotone comparative statics. Concepts and results related to supermodularity and monotone comparative statics constitute a new and important formal step in the long line of economics literature on complementarity.This monograph links complementarity to powerful concepts and results involving supermodular functions on lattices and focuses on analyses and issues related to monotone comparative statics. Don Topkis, who is known for his seminal contributions to this area, here presents a self-contained and up-to-date view of this field, including many new results, to scholars interested in economic theory and its applications as well as to those in related disciplines. The emphasis is on methodology. The book systematically develops a comprehensive, integrated theory pertaining to supermodularity, complementarity, and monotone comparative statics. It then applies that theory in the analysis of many diverse economic models formulated as decision problems, noncooperative games, and cooperative games.},
  doi       = {10.2307/j.ctt7s83q},
}

@Article{Chakravorty2018,
  author  = {J. {Chakravorty} and A. {Mahajan}},
  title   = {Sufficient Conditions for the Value Function and Optimal Strategy to be Even and Quasi-Convex},
  journal = {IEEE Transactions on Automatic Control},
  year    = {2018},
  volume  = {63},
  number  = {11},
  pages   = {3858--3864},
  month   = nov,
  issn    = {2334-3303},
  doi     = {10.1109/TAC.2018.2800796},
}

@Article{Chakravorty2020,
  author    = {Jhelum Chakravorty and Aditya Mahajan},
  journal   = {{IEEE} Transactions on Automatic Control},
  title     = {Remote Estimation Over a Packet-Drop Channel With Markovian State},
  year      = {2020},
  month     = {may},
  number    = {5},
  pages     = {2016--2031},
  volume    = {65},
  doi       = {10.1109/tac.2019.2926160},
  publisher = {Institute of Electrical and Electronics Engineers ({IEEE})},
}

@Unpublished{Chang2007,
  author = {Joseph T. Chang},
  title  = {Stochastic Processes},
  month  = feb,
  year   = {2007},
  url    = {http://www.stat.yale.edu/~pollard/Courses/251.spring2013/Handouts/Chang-notes.pdf},
}

@Book{Chen1991,
  title     = {Identification and Stochastic Adaptive Control},
  publisher = {Birkhäuser Boston},
  year      = {1991},
  author    = {Han-Fu Chen and Lei Guo},
  doi       = {10.1007/978-1-4612-0429-9},
}

@Article{Chen2017,
  author    = {Chen, Xin},
  title     = {{$L^{\#}$}-convexity and its applications in operations},
  journal   = {Frontiers of Engineering Management},
  year      = {2017},
  volume    = {4},
  number    = {3},
  pages     = {283},
  issn      = {2095-7513},
  doi       = {10.15302/j-fem-2017057},
  publisher = {Engineering Sciences Press},
}

@PhdThesis{Cheng1988,
  author  = {Cheng, Hsien-Te},
  school  = {University of British Columbia},
  title   = {Algorithms for Partially Observable Markov Decision Processes},
  year    = {1988},
  address = {Vancouver, BC},
}

@Article{Chow1991,
  author    = {Chow, C.-S. and Tsitsiklis, J.N.},
  title     = {An optimal one-way multigrid algorithm for discrete-time stochastic control},
  journal   = {IEEE Transactions on Automatic Control},
  year      = {1991},
  volume    = {36},
  number    = {8},
  pages     = {898--914},
  issn      = {0018-9286},
  doi       = {10.1109/9.133184},
  publisher = {Institute of Electrical and Electronics Engineers (IEEE)},
}

@Article{Chung2006,
  author    = {Chung, Fan and Lu, Linyuan},
  journal   = {Internet Math.},
  title     = {Concentration inequalities and martingale inequalities: a survey},
  year      = {2006},
  number    = {1},
  pages     = {79--127},
  volume    = {3},
  fjournal  = {Internet Mathematics},
  publisher = {A K Peters, Ltd.},
  url       = {https://projecteuclid.org:443/euclid.im/1175266369},
}

@Article{Cowan2015,
  author    = {Wesley Cowan and Michael N. Katehakis},
  journal   = {Probability in the Engineering and Informational Sciences},
  title     = {Multi-armed bandits under general deprecation and commitment},
  year      = {2015},
  month     = {oct},
  number    = {1},
  pages     = {51--76},
  volume    = {29},
  doi       = {10.1017/s0269964814000217},
  publisher = {Cambridge University Press ({CUP})},
}

@Article{Daley1968,
  author    = {Daley, D J},
  title     = {Stochastically monotone Markov chains},
  journal   = {Zeitschrift f{\"u}r Wahrscheinlichkeitstheorie und verwandte Gebiete},
  year      = {1968},
  volume    = {10},
  number    = {4},
  pages     = {305--317},
  doi       = {10.1007/BF00531852},
  publisher = {Springer},
}

@InCollection{Davis1979,
  author    = {M. H. A. Davis},
  booktitle = {Stochastic Control Theory and Stochastic Differential Systems},
  publisher = {Springer-Verlag},
  title     = {Martingale methods in stochastic control},
  year      = {1979},
  pages     = {85--117},
  doi       = {10.1007/bfb0009377},
}

@Article{DavisVaraiya1972,
  author    = {M.H.A Davis and P.P Varaiya},
  title     = {Information states for linear stochastic systems},
  journal   = {Journal of Mathematical Analysis and Applications},
  year      = {1972},
  volume    = {37},
  number    = {2},
  pages     = {384--402},
  month     = feb,
}

@Book{Dawkins1976,
  author    = {Dawkins, Richard},
  publisher = {Oxford University Press},
  title     = {The selfish gene},
  year      = {1976},
  isbn      = {019857519X},
  pages     = {224},
}

@Book{DeGroot1970,
  author    = {DeGroot, Morris},
  publisher = {Wiley-Interscience},
  title     = {Optimal statistical decisions},
  year      = {1970},
  address   = {Hoboken, N.J},
  isbn      = {047168029X},
}

@Book{Dellacherie1982b,
  title     = {Probabilities and Potential {B}: Theory of Martingales},
  publisher = {North-Holland Mathematical Studies},
  year      = {1982},
  author    = {Claude Dellacherie and Paul-André Meyer},
}

@InProceedings{Devlin2012,
  author    = {Devlin, Sam and Kudenko, Daniel},
  booktitle = {Proceedings of the 11th International Conference on Autonomous Agents and Multiagent Systems},
  title     = {Dynamic Potential-Based Reward Shaping},
  year      = {2012},
  address   = {Richland, SC},
  pages     = {433–440},
  publisher = {International Foundation for Autonomous Agents and Multiagent Systems},
  series    = {AAMAS '12},
  abstract  = {Potential-based reward shaping can significantly improve the time needed to learn an optimal policy and, in multi-agent systems, the performance of the final joint-policy. It has been proven to not alter the optimal policy of an agent learning alone or the Nash equilibria of multiple agents learning together.However, a limitation of existing proofs is the assumption that the potential of a state does not change dynamically during the learning. This assumption often is broken, especially if the reward-shaping function is generated automatically.In this paper we prove and demonstrate a method of extending potential-based reward shaping to allow dynamic shaping and maintain the guarantees of policy invariance in the single-agent case and consistent Nash equilibria in the multi-agent case.},
  isbn      = {0981738117},
  keywords  = {reward shaping, reinforcement learning},
  location  = {Valencia, Spain},
  numpages  = {8},
}

@Unpublished{Devlin2014,
  author = {Sam Devlin},
  title  = {Potential Based Reward Shaping Tutorial},
  note   = {ALA 2014},
  year   = {2014},
  url    = {http://www-users.cs.york.ac.uk/~devlin/presentations/pbrs-tut.pdf},
}

@Article{Dibangoye2016,
  author    = {Dibangoye, Jilles Steeve and Amato, Christopher and Buffet, Olivier and Charpillet, François},
  title     = {Optimally Solving Dec-POMDPs as Continuous-State MDPs},
  journal   = {Journal of Artificial Intelligence Research},
  year      = {2016},
  volume    = {55},
  pages     = {443--497},
  month     = feb,
  issn      = {1076-9757},
  doi       = {10.1613/jair.4623},
  publisher = {AI Access Foundation},
}

@Article{Ding2016,
  author  = {N. {Ding} and P. {Sadeghi} and R. A. {Kennedy}},
  title   = {On Monotonicity of the Optimal Transmission Policy in Cross-Layer Adaptive $m$ -QAM Modulation},
  journal = {IEEE Transactions on Communications},
  year    = {2016},
  volume  = {64},
  number  = {9},
  pages   = {3771-3785},
  month   = {Sep.},
  issn    = {1558-0857},
  doi     = {10.1109/TCOMM.2016.2590427},
}

@Article{Doob1971,
  author    = {Doob, J. L.},
  title     = {What is a Martingale?},
  journal   = {The American Mathematical Monthly},
  year      = {1971},
  volume    = {78},
  number    = {5},
  pages     = {451},
  month     = may,
  issn      = {0002-9890},
  doi       = {10.2307/2317751},
  publisher = {JSTOR},
}

@Article{Dorato1971,
  author    = {P. Dorato and A. Levis},
  title     = {Optimal linear regulators: The discrete-time case},
  journal   = {{IEEE} Transactions on Automatic Control},
  year      = {1971},
  volume    = {16},
  number    = {6},
  pages     = {613--620},
  month     = {dec},
  doi       = {10.1109/tac.1971.1099832},
  publisher = {Institute of Electrical and Electronics Engineers ({IEEE})},
}

@Book{Dubins2014,
  title     = {How to Gamble If You Must: Inequalities for Stochastic Processes},
  publisher = {Dover Publications},
  year      = {2014},
  author    = {Dubins, Lester E and Savage, Leonard J},
  editor    = {Sudderth, William and Gilat, David},
}

@Book{Durrett2019,
  title     = {Probability: Theory and Examples},
  publisher = {Cambridge University Press},
  year      = {2019},
  author    = {Durrett, Rick},
  month     = apr,
  isbn      = {9781108473682},
  doi       = {10.1017/9781108591034},
}

@Misc{Dutta2024,
  author      = {Manali Dutta and Rahul Singh},
  title       = {Optimal Risk-Sensitive Scheduling Policies for Remote Estimation of Autoregressive Markov Processes},
  year        = {2024},
  abstract    = {We design scheduling policies that minimize a risk-sensitive cost criterion for a remote estimation setup. Since risk-sensitive cost objective takes into account not just the mean value of the cost, but also higher order moments of its probability distribution, the resulting policy is robust to changes in the underlying system's parameters. The setup consists of a sensor that observes a discrete-time autoregressive Markov process, and at each time $t$ decides whether or not to transmit its observations to a remote estimator using an unreliable wireless communication channel after encoding these observations into data packets. We model the communication channel as a Gilbert-Elliott channel \cite{10384144}. Sensor probes the channel \cite{laourine2010betting} and hence knows the channel state at each time $t$ before making scheduling decision. The scheduler has to minimize the expected value of the exponential of the finite horizon cumulative cost that is sum of the following two quantities (i) the cumulative transmission power consumed, (ii) the cumulative squared estimator error. We pose this dynamic optimization problem as a Markov decision process (MDP), in which the system state at time $t$ is composed of (i) the instantaneous error $\Delta(t):= x(t)-a\hat{x}(t-1)$, where $x(t),\hat{x}(t-1)$ are the system state and the estimate at time $t,t-1$ respectively, and (ii) the channel state $c(t)$. We show that there exists an optimal policy that has a threshold structure, i.e., at each time $t$, for each possible channel state $c$, there is a threshold $\D\ust(c)$ such that if the current channel state is $c$, then it transmits only when the error $\D(t)$ exceeds $\D\ust(c)$.},
  date        = {2024-03-20},
  eprint      = {2403.13898v1},
  eprintclass = {math.OC},
  eprinttype  = {arXiv},
  file        = {online:http\://arxiv.org/pdf/2403.13898v1:PDF},
  keywords    = {math.OC, math.PR},
  url         = {http://arxiv.org/pdf/2403.13898v1},
}

@Article{Dvoretzky1953,
  author    = {Dvoretzky, A. and Kiefer, J. and Wolfowitz, J.},
  title     = {On the Optimal Character of the (s, S) Policy in Inventory Theory},
  journal   = {Econometrica},
  year      = {1953},
  volume    = {21},
  number    = {4},
  pages     = {586},
  month     = oct,
  issn      = {0012-9682},
  doi       = {10.2307/1907924},
  publisher = {JSTOR},
}

@Article{Edgeworth1888,
  author    = {Edgeworth, Francis Y},
  title     = {The mathematical theory of banking},
  journal   = {Journal of the Royal Statistical Society},
  year      = {1888},
  volume    = {51},
  number    = {1},
  pages     = {113--127},
  publisher = {JSTOR},
  url       = {https://www.jstor.org/stable/2979084},
}

@Article{Elliott2013,
  author    = {Robert Elliott and Xun Li and Yuan-Hua Ni},
  title     = {Discrete time mean-field stochastic linear-quadratic optimal control problems},
  journal   = {Automatica},
  year      = {2013},
  volume    = {49},
  number    = {11},
  pages     = {3222--3233},
  month     = {nov},
  doi       = {10.1016/j.automatica.2013.08.017},
  publisher = {Elsevier {BV}},
}

@Book{Ellis1985,
  title     = {Entropy, Large Deviations, and Statistical Mechanics},
  publisher = {Springer New York},
  year      = {1985},
  author    = {Richard S. Ellis},
  doi       = {10.1007/978-1-4613-8533-2},
}

@InCollection{Fasoulakis2019,
  author    = {Michail Fasoulakis and Apostolos Traganitis and Anthony Ephremides},
  booktitle = {Lecture Notes of the Institute for Computer Sciences, Social Informatics and Telecommunications Engineering},
  publisher = {Springer International Publishing},
  title     = {Jamming in Multiple Independent {G}aussian Channels as a Game},
  year      = {2019},
  pages     = {3--8},
  doi       = {10.1007/978-3-030-16989-3_1},
}

@Article{Feinberg2005,
  author    = {Feinberg, Eugene A.},
  title     = {On essential information in sequential decision processes},
  journal   = {Mathematical Methods of Operations Research},
  year      = {2005},
  volume    = {62},
  number    = {3},
  pages     = {399–410},
  month     = nov,
  issn      = {1432-5217},
  doi       = {10.1007/s00186-005-0035-3},
  publisher = {Springer Science and Business Media LLC},
  url       = {http://dx.doi.org/10.1007/s00186-005-0035-3},
}

@InBook{Feinberg2016,
  pages     = {14--45},
  title     = {Optimality Conditions for Inventory Control},
  publisher = {INFORMS},
  year      = {2016},
  author    = {Feinberg, Eugene A.},
  month     = oct,
  booktitle = {Optimization Challenges in Complex, Networked and Risky Systems},
  doi       = {10.1287/educ.2016.0145},
}

@Article{Feinberg2020,
  author    = {Eugene A. Feinberg and Gaojin He},
  journal   = {Operations Research Letters},
  title     = {Complexity bounds for approximately solving discounted {MDPs} by value iterations},
  year      = {2020},
  month     = {jul},
  doi       = {10.1016/j.orl.2020.07.001},
  publisher = {Elsevier {BV}},
}

@Unpublished{Fergurson2004,
  author = {Ferguson, Thomas S. and Gilstein, C. Zachary},
  title  = {Optimal Investment Policies for the Horse Race Model"},
  note   = {Unpublished manuscript},
  year   = {2004},
  url    = {https://www.math.ucla.edu/~tom/papers/unpublished/Zach2.pdf},
}

@Unpublished{Ferguson:book,
  author = {Thomas S. Ferguson},
  title  = {Optimal Stopping and Applications},
  note   = {http://www.math.ucla.edu/~tom/Stopping/Contents.html},
  year   = {2008},
  url    = {http://www.math.ucla.edu/~tom/Stopping/Contents.html},
}

@Article{Ferguson1989,
  author    = {Ferguson, Thomas S},
  title     = {Who solved the secretary problem?},
  journal   = {Statistical science},
  year      = {1989},
  pages     = {282--289},
  publisher = {JSTOR},
}

@InBook{Follmer2010,
  author    = {Föllmer, Hans and Schied, Alexander},
  publisher = {American Cancer Society},
  title     = {Convex Risk Measures},
  year      = {2010},
  isbn      = {9780470061602},
  booktitle = {Encyclopedia of Quantitative Finance},
  doi       = {10.1002/9780470061602.eqf15003},
  url       = {https://onlinelibrary.wiley.com/doi/abs/10.1002/9780470061602.eqf15003},
}

@Article{Freeman1983,
  author    = {Freeman, P. R.},
  title     = {The Secretary Problem and Its Extensions: A Review},
  journal   = {International Statistical Review / Revue Internationale de Statistique},
  year      = {1983},
  volume    = {51},
  number    = {2},
  pages     = {189},
  month     = aug,
  issn      = {0306-7734},
  doi       = {10.2307/1402748},
  publisher = {JSTOR},
}

@Article{Frostig2016,
  author   = {Frostig, Esther and Weiss, Gideon},
  journal  = {Annals of Operations Research},
  title    = {Four proofs of {Gittins'} multiarmed bandit theorem},
  year     = {2016},
  issn     = {1572-9338},
  number   = {1},
  pages    = {127--165},
  volume   = {241},
  abstract = {We study four proofs that the Gittins index priority rule is optimal for alternative bandit processes. These include Gittins' original exchange argument, Weber's prevailing charge argument, Whittle's Lagrangian dual approach, and Bertsimas and Ni{\~{n}}o-Mora's proof based on the achievable region approach and generalized conservation laws. We extend the achievable region proof to infinite countable state spaces, by using infinite dimensional linear programming theory.},
  doi      = {10.1007/s10479-013-1523-0},
  url      = {http://dx.doi.org/10.1007/s10479-013-1523-0},
}

@Article{Fu2012,
  author    = {Fangwen Fu and van der Schaar, M.},
  title     = {Structure-Aware Stochastic Control for Transmission Scheduling},
  journal   = {IEEE Transactions on Vehicular Technology},
  year      = {2012},
  volume    = {61},
  number    = {9},
  pages     = {3931--3945},
  month     = nov,
  issn      = {1939-9359},
  doi       = {10.1109/tvt.2012.2213850},
  publisher = {Institute of Electrical and Electronics Engineers (IEEE)},
}

@InProceedings{Fu2018,
  author    = {Fu, Michael C.},
  title     = {Monte Carlo Tree Search: A tutorial},
  booktitle = {2018 Winter Simulation Conference (WSC)},
  year      = {2018},
  month     = dec,
  publisher = {IEEE},
  doi       = {10.1109/wsc.2018.8632344},
}

@Article{Gao2022,
  author    = {Shuang Gao and Aditya Mahajan},
  title     = {Optimal Control of Network-Coupled Subsystems: Spectral Decomposition and Low-Dimensional Solutions},
  journal   = {{IEEE} Transactions on Control of Network Systems},
  year      = {2022},
  volume    = {9},
  number    = {2},
  pages     = {657--669},
  month     = {jun},
  doi       = {10.1109/tcns.2021.3124259},
  publisher = {Institute of Electrical and Electronics Engineers ({IEEE})},
}

@Article{Gast2022,
  author    = {Gast, Nicolas and Gaujal, Bruno and Khun, Kimang},
  title     = {Learning algorithms for {Markovian} Bandits: Is Posterior Sampling more Scalable than Optimism?},
  journal   = {Transactions on Machine Learning Research Journal},
  year      = {2022},
  month     = Nov,
  file      = {tmlr_main.pdf:https\://inria.hal.science/hal-03262006v3/file/tmlr_main.pdf:PDF},
  publisher = {{[Amherst Massachusetts]: OpenReview.net, 2022}},
  url       = {https://inria.hal.science/hal-03262006},
}

@Article{Gast2023,
  author    = {Gast, Nicolas and Gaujal, Bruno and Yan, Chen},
  title     = {Exponential asymptotic optimality of Whittle index policy},
  journal   = {Queueing Systems},
  year      = {2023},
  volume    = {104},
  number    = {1–2},
  pages     = {107--150},
  month     = may,
  issn      = {1572-9443},
  doi       = {10.1007/s11134-023-09875-x},
  publisher = {Springer Science and Business Media LLC},
}

@Article{Geiss2021,
  author    = {Sarah Geiss and Michael Scheutzow},
  title     = {Sharpness of {Lenglart’s} domination inequality and a sharp monotone version},
  journal   = {Electronic Communications in Probability},
  year      = {2021},
  volume    = {26},
  number    = {none},
  pages     = {1 -- 8},
  doi       = {10.1214/21-ECP413},
  keywords  = {BDG inequality, Garsia’s lemma, Lenglart’s domination inequality, monotone Lenglart’s inequality, sharpness},
  publisher = {Institute of Mathematical Statistics and Bernoulli Society},
  url       = {https://doi.org/10.1214/21-ECP413},
}

@InProceedings{Geist2019,
  author    = {Geist, Matthieu and Scherrer, Bruno and Pietquin, Olivier},
  title     = {A Theory of Regularized {M}arkov Decision Processes},
  booktitle = {Proceedings of the 36th International Conference on Machine Learning},
  year      = {2019},
  editor    = {Chaudhuri, Kamalika and Salakhutdinov, Ruslan},
  volume    = {97},
  series    = {Proceedings of Machine Learning Research},
  pages     = {2160--2169},
  month     = {09--15 Jun},
  publisher = {PMLR},
  abstract  = {Many recent successful (deep) reinforcement learning algorithms make use of regularization, generally based on entropy or Kullback-Leibler divergence. We propose a general theory of regularized Markov Decision Processes that generalizes these approaches in two directions: we consider a larger class of regularizers, and we consider the general modified policy iteration approach, encompassing both policy iteration and value iteration. The core building blocks of this theory are a notion of regularized Bellman operator and the Legendre-Fenchel transform, a classical tool of convex optimization. This approach allows for error propagation analyses of general algorithmic schemes of which (possibly variants of) classical algorithms such as Trust Region Policy Optimization, Soft Q-learning, Stochastic Actor Critic or Dynamic Policy Programming are special cases. This also draws connections to proximal convex optimization, especially to Mirror Descent.},
  file      = {geist19a.pdf:http\://proceedings.mlr.press/v97/geist19a/geist19a.pdf:PDF},
  url       = {https://proceedings.mlr.press/v97/geist19a.html},
}

@InProceedings{Gelada2019,
  author    = {Gelada, Carles and Kumar, Saurabh and Buckman, Jacob and Nachum, Ofir and Bellemare, Marc G.},
  title     = {{D}eep{MDP}: Learning Continuous Latent Space Models for Representation Learning},
  booktitle = {Proceedings of the 36th International Conference on Machine Learning},
  year      = {2019},
  editor    = {Chaudhuri, Kamalika and Salakhutdinov, Ruslan},
  volume    = {97},
  series    = {Proceedings of Machine Learning Research},
  pages     = {2170--2179},
  address   = {Long Beach, California, USA},
  month     = {09--15 Jun},
  publisher = {PMLR},
  file      = {gelada19a.pdf:http\://proceedings.mlr.press/v97/gelada19a/gelada19a.pdf:PDF},
  url       = {http://proceedings.mlr.press/v97/gelada19a.html},
}

@Book{Gibbons1992,
  author    = {Gibbons, Robert},
  publisher = {Princeton University Press},
  title     = {Game theory for applied economists},
  year      = {1992},
  isbn      = {0691043086},
  pages     = {267},
}

@InCollection{Gittins1974,
  author    = {Gittins, J. C. and Jones, D. M.},
  booktitle = {Progress in Statistics},
  publisher = {North-Holland},
  title     = {A Dynamic Allocation Index for the Discounted Multiarmed Bandit Problem},
  year      = {1974},
  address   = {Amsterdam, Netherlands},
  pages     = {241-266},
  volume    = {9},
  language  = {English},
}

@Article{Gittins1979,
  author    = {Gittins, John C},
  journal   = {Journal of the Royal Statistical Society. Series B (Methodological)},
  title     = {Bandit processes and dynamic allocation indices},
  year      = {1979},
  number    = {2},
  pages     = {148--177},
  volume    = {41},
  owner     = {adityam},
  publisher = {JSTOR},
  timestamp = {2013.07.22},
}

@Article{Gladyshev1965,
  author    = {E. G. Gladyshev},
  title     = {On Stochastic Approximation},
  journal   = {Theory of Probability and Its Applications},
  year      = {1965},
  volume    = {10},
  number    = {2},
  pages     = {275--278},
  month     = {jan},
  doi       = {10.1137/1110031},
  publisher = {Society for Industrial {\&} Applied Mathematics ({SIAM})},
}

@Article{Glicksberg1952,
  author    = {I. L. Glicksberg},
  journal   = {Proceedings of the American Mathematical Society},
  title     = {A Further Generalization of the {Kakutani} Fixed Point Theorem, with Application to Nash Equilibrium Points},
  year      = {1952},
  month     = {feb},
  number    = {1},
  pages     = {170},
  volume    = {3},
  doi       = {10.2307/2032478},
  publisher = {{JSTOR}},
}

@InProceedings{Grzes2009,
  author    = {M. {Grzes} and D. {Kudenko}},
  title     = {Theoretical and Empirical Analysis of Reward Shaping in Reinforcement Learning},
  booktitle = {International Conference on Machine Learning and Applications},
  year      = {2009},
  pages     = {337-344},
  month     = {Dec},
  doi       = {10.1109/ICMLA.2009.33},
}

@Article{Hager1989,
  author    = {William W. Hager},
  journal   = {{SIAM} Review},
  title     = {Updating the Inverse of a Matrix},
  year      = {1989},
  month     = {jun},
  number    = {2},
  pages     = {221--239},
  volume    = {31},
  doi       = {10.1137/1031049},
  publisher = {Society for Industrial {\&} Applied Mathematics ({SIAM})},
}

@Article{Hardy1931,
  author    = {G. H. Hardy and J. E. Littlewood},
  journal   = {Journal of the London Mathematical Society},
  title     = {Notes on the Theory of Series ({XVI}): Two {T}auberian Theorems},
  year      = {1931},
  month     = {oct},
  number    = {4},
  pages     = {281--286},
  volume    = {s1-6},
  doi       = {10.1112/jlms/s1-6.4.281},
  publisher = {Wiley},
}

@Book{Hardy1952,
  title     = {Inequalities},
  publisher = {Cambridge University Press},
  year      = {1952},
  author    = {Hardy, G. H. and Littlewood, J. E. and Pólya, G.},
}

@Article{Harris1913,
  author    = {Harris, Ford W},
  title     = {How many parts to make at once},
  year      = {1913},
  journal   = {The magazine of management},
  month     = feb,
  volume    = 10,
  number    = 2,
  pages     = {135-152},
  doi       = {10.1287/opre.38.6.947},
}

@Article{Harsanyi1967,
  author    = {John C. Harsanyi},
  journal   = {Management Science},
  title     = {Games with Incomplete Information Played by "Bayesian" Players, I{\textendash}{III} Part I. The Basic Model},
  year      = {1967},
  month     = {nov},
  number    = {3},
  pages     = {159--182},
  volume    = {14},
  doi       = {10.1287/mnsc.14.3.159},
  publisher = {Institute for Operations Research and the Management Sciences ({INFORMS})},
}

@Article{Harsanyi1968a,
  author    = {John C. Harsanyi},
  journal   = {Management Science},
  title     = {Games with Incomplete Information Played by "Bayesian" Players Part {II}. Bayesian Equilibrium Points},
  year      = {1968},
  month     = {jan},
  number    = {5},
  pages     = {320--334},
  volume    = {14},
  doi       = {10.1287/mnsc.14.5.320},
  publisher = {Institute for Operations Research and the Management Sciences ({INFORMS})},
}

@Article{Harsanyi1968b,
  author    = {John C. Harsanyi},
  journal   = {Management Science},
  title     = {Games with Incomplete Information Played by `Bayesian' Players, Part {III}. The Basic Probability Distribution of the Game},
  year      = {1968},
  month     = {mar},
  number    = {7},
  pages     = {486--502},
  volume    = {14},
  doi       = {10.1287/mnsc.14.7.486},
  publisher = {Institute for Operations Research and the Management Sciences ({INFORMS})},
}

@InProceedings{Hay2012,
  author    = {Nicholas Hay and S. Russell and David Tolpin and S. E. Shimony},
  booktitle = {UAI},
  title     = {Selecting Computations: Theory and Applications},
  year      = {2012},
  eprint    = {http://www.auai.org/uai2012/papers/123.pdf},
  url       = {http://www.auai.org/uai2012/papers/123.pdf},
}

@Article{HernandezHernandez1996,
  author    = {Daniel Hernandez-Hern{\'{a}}ndez and Steven I. Marcus},
  journal   = {Systems {\&} Control Letters},
  title     = {Risk sensitive control of Markov processes in countable state space},
  year      = {1996},
  month     = {nov},
  number    = {3},
  pages     = {147--155},
  volume    = {29},
  doi       = {10.1016/s0167-6911(96)00051-5},
  publisher = {Elsevier {BV}},
}

@Article{HernandezHernandez1999,
  author    = {D. Hern{\'{a}}ndez-Hern{\'{a}}ndez},
  journal   = {Applied Mathematics and Optimization},
  title     = {Existence of Risk-Sensitive Optimal Stationary Policies for Controlled Markov Processes},
  year      = {1999},
  month     = {nov},
  number    = {3},
  pages     = {273--285},
  volume    = {40},
  doi       = {10.1007/s002459900126},
  publisher = {Springer Science and Business Media {LLC}},
}

@Book{HernandezLerma1996,
  title     = {Discrete-Time Markov Control Processes},
  publisher = {Springer New York},
  year      = {1996},
  author    = {On{\'{e}}simo Hern{\'{a}}ndez-Lerma and Jean Bernard Lasserre},
  doi       = {10.1007/978-1-4612-0729-0},
}

@Book{HernandezLerma1999,
  title     = {Further Topics on Discrete-Time Markov Control Processes},
  publisher = {Springer New York},
  year      = {1999},
  author    = {On{\'{e}}simo Hern{\'{a}}ndez-Lerma and Jean Bernard Lasserre},
  doi       = {10.1007/978-1-4612-0561-6},
}

@Article{Hinderer2005,
  author    = {Hinderer, Karl},
  title     = {Lipschitz continuity of value functions in {M}arkovian decision processes},
  journal   = {Mathematical Methods of Operations Research},
  year      = {2005},
  volume    = {62},
  number    = {1},
  pages     = {3--22},
  doi       = {10.1007/s00186-005-0438-1},
  publisher = {Springer},
}

@Article{Hofbauer1979,
  author  = {Hofbauer, Josef and Schuster, Peter and Sigmund, Karl},
  journal = {Journal of Theoretical Biology},
  title   = {A note on evolutionary stable strategies and game dynamics},
  year    = {1979},
  number  = {3},
  pages   = {609--612},
  volume  = {81},
}

@Unpublished{Hopcroft2012,
  author = {John Hopcroft and Ravi Kannan},
  title  = {Computer Science Theory for the Information Age},
  month  = jan,
  year   = {2012},
  url    = {https://www.cs.cmu.edu/~venkatg/teaching/CStheory-infoage/hopcroft-kannan-feb2012.pdf},
}

@Book{Howard1960,
  author    = {Ronald A. Howard},
  publisher = {The M.I.T. Press},
  title     = {Dynamic Programming and Markov Processes},
  year      = {1960},
}

@Article{Howard1972,
  author    = {Ronald A. Howard and James E. Matheson},
  journal   = {Management Science},
  title     = {Risk-Sensitive Markov Decision Processes},
  year      = {1972},
  month     = {mar},
  number    = {7},
  pages     = {356--369},
  volume    = {18},
  doi       = {10.1287/mnsc.18.7.356},
  publisher = {Institute for Operations Research and the Management Sciences ({INFORMS})},
}

@Article{Jaakkola1994,
  author    = {Tommi Jaakkola and Michael I. Jordan and Satinder P. Singh},
  journal   = {Neural Computation},
  title     = {On the Convergence of Stochastic Iterative Dynamic Programming Algorithms},
  year      = {1994},
  month     = {nov},
  number    = {6},
  pages     = {1185--1201},
  volume    = {6},
  doi       = {10.1162/neco.1994.6.6.1185},
  publisher = {{MIT} Press - Journals},
}

@Article{Janakiraman2004,
  author    = {Janakiraman, Ganesh and Roundy, Robin O.},
  title     = {Lost-Sales Problems with Stochastic Lead Times: Convexity Results for Base-Stock Policies},
  journal   = {Operations Research},
  year      = {2004},
  volume    = {52},
  number    = {5},
  pages     = {795--803},
  month     = oct,
  issn      = {1526-5463},
  doi       = {10.1287/opre.1040.0130},
  publisher = {Institute for Operations Research and the Management Sciences (INFORMS)},
}

@Misc{Jenner2022,
  author      = {Erik Jenner and Herke van Hoof and Adam Gleave},
  title       = {Calculus on MDPs: Potential Shaping as a Gradient},
  year        = {2022},
  abstract    = {In reinforcement learning, different reward functions can be equivalent in terms of the optimal policies they induce. A particularly well-known and important example is potential shaping, a class of functions that can be added to any reward function without changing the optimal policy set under arbitrary transition dynamics. Potential shaping is conceptually similar to potentials, conservative vector fields and gauge transformations in math and physics, but this connection has not previously been formally explored. We develop a formalism for discrete calculus on graphs that abstract a Markov Decision Process, and show how potential shaping can be formally interpreted as a gradient within this framework. This allows us to strengthen results from Ng et al. (1999) describing conditions under which potential shaping is the only additive reward transformation to always preserve optimal policies. As an additional application of our formalism, we define a rule for picking a single unique reward function from each potential shaping equivalence class.},
  date        = {2022-08-20},
  eprint      = {2208.09570v1},
  eprintclass = {cs.LG},
  eprinttype  = {arXiv},
  file        = {:http\://arxiv.org/pdf/2208.09570v1:PDF},
  keywords    = {cs.LG},
}

@Article{Joseph1961,
  author    = {D. Peter Joseph and T. Julius Tou},
  journal   = {Transactions of the American Institute of Electrical Engineers, Part {II}: Applications and Industry},
  title     = {On linear control theory},
  year      = {1961},
  number    = {4},
  pages     = {193--196},
  volume    = {80},
  doi       = {10.1109/tai.1961.6371743},
  publisher = {Institute of Electrical and Electronics Engineers ({IEEE})},
}

@Article{Kalman1960b,
  author  = {Kalman, Rudolf Emil},
  title   = {Contributions to the theory of optimal control},
  journal = {Boletin de la Sociedad Matematica Mexicana},
  year    = {1960},
  volume  = {5},
  pages   = {102-119},
}

@Article{Kara2022,
  author    = {Kara, Ali Devran and Yüksel, Serdar},
  title     = {Convergence of Finite Memory {Q} Learning for {POMDPs} and Near Optimality of Learned Policies Under Filter Stability},
  journal   = {Mathematics of Operations Research},
  year      = {2022},
  month     = nov,
  issn      = {1526-5471},
  doi       = {10.1287/moor.2022.1331},
  publisher = {INFORMS},
}

@Article{Kara2023,
  author  = {Ali Kara and Naci Saldi and Serdar YÃ¼ksel},
  title   = {Q-Learning for MDPs with General Spaces: Convergence and Near Optimality via Quantization under Weak Continuity},
  journal = {Journal of Machine Learning Research},
  year    = {2023},
  volume  = {24},
  number  = {199},
  pages   = {1--34},
  url     = {http://jmlr.org/papers/v24/21-1457.html},
}

@Article{Karatzas2010,
  author    = {Ioannis Karatzas and William D. Sudderth},
  journal   = {Applied Mathematics and Optimization},
  title     = {Two Characterizations of Optimality in Dynamic Programming},
  year      = {2010},
  month     = {nov},
  number    = {3},
  pages     = {421--434},
  volume    = {61},
  doi       = {10.1007/s00245-009-9093-x},
  publisher = {Springer Science and Business Media {LLC}},
}

@Article{Katehakis1987,
  author    = {Katehakis, Michael N and Veinott, Arthur F},
  journal   = {Mathematics of Operations Research},
  title     = {The multi-armed bandit problem: decomposition and computation},
  year      = {1987},
  number    = {2},
  pages     = {262--268},
  volume    = {12},
  owner     = {adityam},
  publisher = {INFORMS},
  timestamp = {2013.07.18},
}

@Article{Keilson1977,
  author    = {Keilson, Julian and Kester, Adri},
  title     = {Monotone matrices and monotone Markov processes},
  journal   = {Stochastic Processes and their Applications},
  year      = {1977},
  volume    = {5},
  number    = {3},
  pages     = {231--241},
  publisher = {Elsevier},
}

@Article{Kelly1956,
  author    = {Kelly, Jr., John L.},
  journal   = {Bell System Technical Journal},
  title     = {A New Interpretation of Information Rate},
  year      = {1956},
  month     = {jul},
  number    = {4},
  pages     = {917--926},
  volume    = {35},
  doi       = {10.1002/j.1538-7305.1956.tb03809.x},
  publisher = {Institute of Electrical and Electronics Engineers ({IEEE})},
}

@Unpublished{Kennerly2011,
  author = {Kennerly, Sam},
  title  = {A graphical derivation of the Legendre transform},
  month  = apr,
  year   = {2011},
  url    = {http://einstein.drexel.edu/~skennerly/maths/Legendre.pdf},
}

@PhdThesis{Khun2023,
  author = {Kimang Khun},
  title  = {Indexability and Learning Algorithms for {Markovian} bandits},
  school = {Unversity of Grenoble Alpes},
  year   = {2023},
}

@Article{Kjeldsen2001,
  author    = {Tinne Hoff Kjeldsen},
  journal   = {Archive for History of Exact Sciences},
  title     = {{John von Neumann's} Conception of the Minimax Theorem: A Journey Through Different Mathematical Contexts},
  year      = {2001},
  issn      = {00039519, 14320657},
  number    = {1},
  pages     = {39--68},
  volume    = {56},
  publisher = {Springer},
  url       = {http://www.jstor.org/stable/41134130},
}

@InProceedings{Konda1999,
  author    = {Konda, Vijay and Tsitsiklis, John},
  title     = {Actor-critic algorithms},
  booktitle = {Advances in Neural Information Processing Systems},
  year      = {1999},
  volume    = {12},
}

@Article{Konda2003,
  author    = {Konda, Vijay R and Tsitsiklis, John N},
  title     = {On actor-critic algorithms},
  journal   = {SIAM Journal on Control and Optimization},
  year      = {2003},
  volume    = {42},
  number    = {4},
  pages     = {1143--1166},
  publisher = {SIAM},
}

@Article{Koole2006,
  author    = {Ger Koole},
  journal   = {Foundations and Trends{\textregistered} in Stochastic Systems},
  title     = {Monotonicity in Markov Reward and Decision Chains: Theory and Applications},
  year      = {2006},
  number    = {1},
  pages     = {1--76},
  volume    = {1},
  doi       = {10.1561/0900000002},
  publisher = {Now Publishers},
}

@InCollection{Korevaar2004,
  author    = {Jacob Korevaar},
  booktitle = {Tauberian Theory: A Century of Developments},
  publisher = {Springer Berlin Heidelberg},
  title     = {The {H}ardy-{L}ittlewood Theorems},
  year      = {2004},
  pages     = {1--64},
  doi       = {10.1007/978-3-662-10225-1_1},
}

@Article{Kuhn1950,
  author    = {Kuhn, H. W.},
  title     = {Extensive Games},
  journal   = {Proceedings of the National Academy of Sciences},
  year      = {1950},
  volume    = {36},
  number    = {10},
  pages     = {570--576},
  month     = oct,
  issn      = {1091-6490},
  doi       = {10.1073/pnas.36.10.570},
  publisher = {Proceedings of the National Academy of Sciences},
}

@InCollection{Kuhn1953,
  author    = {Kuhn, H. W.},
  title     = {Extensive Games and the Problem of Information},
  booktitle = {Contributions to the Theory of Games},
  publisher = {Princeton University Press},
  year      = {1953},
  editor    = {Kuhn, H. W. and Tucker, A. W},
  volume    = {II},
  pages     = {193--216},
  langid    = {english},
  urldate   = {2022-07-26},
}

@Article{Kumar1983,
  author    = {P. R. Kumar},
  journal   = {{SIAM} Journal on Control and Optimization},
  title     = {Optimal Adaptive Control of Linear-Quadratic-Gaussian Systems},
  year      = {1983},
  month     = {mar},
  number    = {2},
  pages     = {163--178},
  volume    = {21},
  doi       = {10.1137/0321009},
  publisher = {Society for Industrial {\&} Applied Mathematics ({SIAM})},
}

@Book{KumarVaraiya1986,
  author    = {P. R. Kumar and Pravin Varaiya},
  publisher = {Prentice Hall},
  title     = {Stochastic Systems: Estimation Identification and Adaptive Control},
  year      = {1986},
}

@Article{Kunnumkal2008,
  author    = {Kunnumkal, Sumit and Topaloglu, Huseyin},
  title     = {Exploiting the Structural Properties of the Underlying Markov Decision Problem in the Q-Learning Algorithm},
  journal   = {INFORMS Journal on Computing},
  year      = {2008},
  volume    = {20},
  number    = {2},
  pages     = {288--301},
  month     = may,
  issn      = {1526-5528},
  doi       = {10.1287/ijoc.1070.0240},
  publisher = {Institute for Operations Research and the Management Sciences (INFORMS)},
}

@Book{Kushner1997,
  title     = {Stochastic Approximation Algorithms and Applications},
  publisher = {Springer New York},
  year      = {1997},
  author    = {Harold J. Kushner and G. George Yin},
  doi       = {10.1007/978-1-4899-2696-8},
}

@InBook{Kwakernaak1965,
  chapter   = {Admissible Adaptive Control},
  pages     = {14-18},
  title     = {Theory of Self-Adaptive Control Systems},
  publisher = {Springer},
  year      = {1965},
  author    = {H. Kwakernaak},
}

@Article{Lai2003,
  author    = {Tze Leung Lai},
  title     = {Stochastic approximation: invited paper},
  journal   = {The Annals of Statistics},
  year      = {2003},
  volume    = {31},
  number    = {2},
  month     = {apr},
  doi       = {10.1214/aos/1051027873},
  publisher = {Institute of Mathematical Statistics},
}

@Article{LaiRobbins1985,
  author  = {T.L Lai and Herbert Robbins},
  journal = {Advances in Applied Mathematics},
  title   = {Asymptotically efficient adaptive allocation rules},
  year    = {1985},
  issn    = {0196-8858},
  number  = {1},
  pages   = {4 - 22},
  volume  = {6},
  doi     = {http://dx.doi.org/10.1016/0196-8858(85)90002-8},
}

@InCollection{Lamond2002,
  author    = {Bernard F. Lamond and Abdeslem Boukhtouta},
  booktitle = {International Series in Operations Research {\&} Management Science},
  publisher = {Springer {US}},
  title     = {Water Reservoir Applications of Markov Decision Processes},
  year      = {2002},
  pages     = {537--558},
  doi       = {10.1007/978-1-4615-0805-2_17},
}

@Article{Lasry2018,
  author    = {Jean-Michel Lasry and Pierre-Louis Lions},
  journal   = {Comptes Rendus Mathematique},
  title     = {Mean-field games with a major player},
  year      = {2018},
  month     = {aug},
  number    = {8},
  pages     = {886--890},
  volume    = {356},
  doi       = {10.1016/j.crma.2018.06.001},
  publisher = {Elsevier {BV}},
}

@InProceedings{Lenglart1977,
  author    = {Lenglart, {\'E}rik},
  title     = {Relation de domination entre deux processus},
  booktitle = {Annales de l'institut Henri Poincar{\'e}. Section B. Calcul des probabilit{\'e}s et statistiques},
  year      = {1977},
  volume    = {13},
  number    = {2},
  pages     = {171--179},
}

@Article{Levy1992,
  author    = {Levy, Haim},
  title     = {Stochastic dominance and expected utility: survey and analysis},
  journal   = {Management Science},
  year      = {1992},
  volume    = {38},
  number    = {4},
  pages     = {555--593},
  doi       = {10.1287/mnsc.38.4.555},
  publisher = {INFORMS},
}

@Book{Levy2015,
  title     = {Stochastic dominance: Investment decision making under uncertainty},
  publisher = {Springer},
  year      = {2015},
  author    = {Levy, Haim},
  doi       = {10.1007/978-3-319-21708-6},
}

@Book{Lewis2012,
  title     = {Optimal control},
  publisher = {John Wiley \& Sons},
  year      = {2012},
  author    = {Lewis, Frank L and Vrabie, Draguna and Syrmos, Vassilis L},
}

@InProceedings{Li2006,
  author    = {Li, Lihong and Walsh, Thomas J and Littman, Michael L},
  booktitle = {ISAIM},
  title     = {Towards a Unified Theory of State Abstraction for {MDPs}},
  year      = {2006},
  groups    = {adityam:6},
  url       = {http://anytime.cs.umass.edu/aimath06/proceedings/P21.pdf},
}

@Article{Lindley1961,
  author    = {Lindley, D. V.},
  title     = {Dynamic Programming and Decision Theory},
  journal   = {Applied Statistics},
  year      = {1961},
  volume    = {10},
  number    = {1},
  pages     = {39},
  month     = mar,
  issn      = {0035-9254},
  doi       = {10.2307/2985407},
  publisher = {JSTOR},
}

@InProceedings{Lipsa2009,
  author    = {Gabriel M. Lipsa and Nuno C. Martins},
  booktitle = {Annual Allerton Conference on Communication, Control, and Computing (Allerton)},
  title     = {Optimal state estimation in the presence of communication costs and packet drops},
  year      = {2009},
  month     = {sep},
  publisher = {{IEEE}},
  doi       = {10.1109/allerton.2009.5394899},
}

@Article{Lipsa2011,
  author    = {G. M. Lipsa and N. C. Martins},
  journal   = {{IEEE} Transactions on Automatic Control},
  title     = {Remote State Estimation With Communication Costs for First-Order {LTI} Systems},
  year      = {2011},
  month     = {sep},
  number    = {9},
  pages     = {2013--2025},
  volume    = {56},
  doi       = {10.1109/tac.2011.2139370},
  publisher = {Institute of Electrical and Electronics Engineers ({IEEE})},
}

@Article{Lu2023,
  author    = {Xiuyuan Lu and Benjamin Van Roy and Vikranth Dwaracherla and Morteza Ibrahimi and Ian Osband and Zheng Wen},
  title     = {Reinforcement Learning, Bit by Bit},
  journal   = {Foundations and Trends{\textregistered} in Machine Learning},
  year      = {2023},
  volume    = {16},
  number    = {6},
  pages     = {733--865},
  doi       = {10.1561/2200000097},
  publisher = {Now Publishers},
}

@InCollection{Mahajan:cdc2024a,
  author    = {Aditya Mahajan and Silviu-Iulian Niculescu and Muthukumali Vidyasagar},
  title     = {A vector almost-sure supermartingale theorem and its applications},
  booktitle = {IEEE Conference on Decision and Control},
  publisher = {IEEE},
  year      = {2024},
  month     = dec,
}

@InBook{Mahajan2008,
  author    = {Mahajan, A. and Teneketzis, D.},
  chapter   = {Multi-armed bandits},
  pages     = {121-151},
  publisher = {Springer-Verlag},
  title     = {Foundations and Applications of Sensor Management},
  year      = {2008},
  file      = {:/home/adityam/Papers/My Publications/bandits/book/2007-mab.pdf:PDF},
  owner     = {adityam},
  timestamp = {2012.03.14},
}

@InProceedings{Mahajan2012,
  author    = {Mahajan, Aditya and Martins, Nuno C. and Rotkowitz, Michael C. and Yuksel, Serdar},
  title     = {Information structures in optimal decentralized control},
  booktitle = {IEEE Conference on Decision and Control (CDC)},
  year      = {2012},
  month     = dec,
  publisher = {IEEE},
  doi       = {10.1109/cdc.2012.6425819},
}

@PhdThesis{MahajanPhD,
  author  = {Aditya Mahajan},
  title   = {Sequential decomposition of sequential dynamic teams: applications to real-time communication and networked control systems},
  school  = {University of Michigan},
  year    = {2008},
  type    = {phdthesis},
  address = {Ann Arbor, MI},
}

@Book{Marshall2011,
  author    = {Albert W. Marshall and Ingram Olkin and Barry C. Arnold},
  publisher = {Springer New York},
  title     = {Inequalities: Theory of Majorization and Its Applications},
  year      = {2011},
  doi       = {10.1007/978-0-387-68276-1},
}

@Article{MaskinTirole1988a,
  author  = {Maskin, Eric and Tirole, Jean},
  title   = {A theory of dynamic oligopoly, I: Overview and quantity competition with large fixed costs},
  journal = {Econometrica: Journal of the Econometric Society},
  year    = {1988},
  pages   = {549--569},
}

@Article{MaskinTirole1988b,
  author  = {Maskin, Eric and Tirole, Jean},
  title   = {A theory of dynamic oligopoly, II: Price competition, kinked demand curves, and Edgeworth cycles},
  journal = {Econometrica: Journal of the Econometric Society},
  year    = {1988},
  pages   = {571--599},
}

@Article{MaynardSmith1973,
  author    = {Maynard Smith, J and G. R. Price},
  journal   = {Nature},
  title     = {The Logic of Animal Conflict},
  year      = {1973},
  month     = {nov},
  number    = {5427},
  pages     = {15--18},
  volume    = {246},
  doi       = {10.1038/246015a0},
  publisher = {Springer Science and Business Media {LLC}},
}

@Book{MaynardSmith1982,
  author    = {Maynard Smith, John},
  publisher = {Cambridge University Press},
  title     = {Evolution and the theory of games},
  year      = {1982},
  isbn      = {0521246733},
  pages     = {224},
}

@Book{Mazliak2022,
  title     = {The Splendors and Miseries of Martingales: Their History from the Casino to Mathematics},
  publisher = {Springer International Publishing},
  year      = {2022},
  editor    = {Laurent Mazliak and Glenn Shafer},
  isbn      = {9783031059889},
  doi       = {10.1007/978-3-031-05988-9},
  issn      = {2297-296X},
  journal   = {Trends in the History of Science},
}

@TechReport{Mcgee1985,
  author      = {Mcgee , Leonard A.and Schmidt, Stanley F.},
  institution = {National Aeronautics and Space Administration},
  title       = {Discovery of the Kalman Filter as a Practical Tool for Aerospace and},
  year        = {1985},
  url         = {https://ntrs.nasa.gov/archive/nasa/casi.ntrs.nasa.gov/19860003843.pdf},
}

@Unpublished{MIT18.S997,
  author = {Phillippe Rigollet},
  note   = {Course Notes for 18.S997},
  title  = {High-Dimensional Statistics},
  month  = jul,
  year   = {2015},
  url    = {https://ocw.mit.edu/courses/mathematics/18-s997-high-dimensional-statistics-spring-2015/lecture-notes/},
}

@Book{Morse1951,
  title     = {Methods of Operations Research},
  publisher = {Technology Press of MIT},
  year      = {1951},
  author    = {Morse, P. and Kimball, G.},
}

@Article{Muller1997a,
  author    = {Alfred Müller},
  title     = {Integral Probability Metrics and Their Generating Classes of Functions},
  journal   = {Advances in Applied Probability},
  year      = {1997},
  volume    = {29},
  number    = {2},
  pages     = {429--443},
  month     = {jun},
  doi       = {10.2307/1428011},
  publisher = {Cambridge University Press ({CUP})},
}

@Article{Muller1997b,
  author    = {Alfred M\"{u}ller},
  title     = {How Does the Value Function of a Markov Decision Process Depend on the Transition Probabilities?},
  journal   = {Mathematics of Operations Research},
  year      = {1997},
  volume    = {22},
  number    = {4},
  pages     = {872--885},
  month     = {nov},
  doi       = {10.1287/moor.22.4.872},
  publisher = {Institute for Operations Research and the Management Sciences ({INFORMS})},
}

@Article{Murota1998,
  author    = {Murota, Kazuo},
  title     = {Discrete convex analysis},
  journal   = {Mathematical Programming},
  year      = {1998},
  volume    = {83},
  number    = {1–3},
  pages     = {313--371},
  month     = jan,
  issn      = {1436-4646},
  doi       = {10.1007/bf02680565},
  publisher = {Springer Science and Business Media LLC},
}

@Article{Nain1989,
  author    = {Nain, Philippe and Tsoucas, Pantelis and Walrand, Jean},
  title     = {Interchange arguments in stochastic scheduling},
  journal   = {Journal of Applied Probability},
  year      = {1989},
  volume    = {26},
  number    = {4},
  pages     = {815--826},
  month     = dec,
  issn      = {1475-6072},
  doi       = {10.2307/3214386},
  publisher = {Cambridge University Press (CUP)},
}

@Article{Nash1950,
  author    = {John F. Nash},
  journal   = {Proceedings of the National Academy of Sciences},
  title     = {Equilibrium points in $n$ -person games},
  year      = {1950},
  month     = {jan},
  number    = {1},
  pages     = {48--49},
  volume    = {36},
  doi       = {10.1073/pnas.36.1.48},
  publisher = {Proceedings of the National Academy of Sciences},
}

@Article{Nayyar2013,
  author    = {Nayyar, Ashutosh and Mahajan, Aditya and Teneketzis, Demosthenis},
  title     = {Decentralized Stochastic Control with Partial History Sharing: A Common Information Approach},
  journal   = {IEEE Transactions on Automatic Control},
  year      = {2013},
  volume    = {58},
  number    = {7},
  pages     = {1644--1658},
  month     = jul,
  issn      = {1558-2523},
  doi       = {10.1109/tac.2013.2239000},
  publisher = {Institute of Electrical and Electronics Engineers (IEEE)},
}

@Article{Nayyar2014,
  author    = {Nayyar, Ashutosh and Gupta, Abhishek and Langbort, Cedric and Basar, Tamer},
  title     = {Common Information Based Markov Perfect Equilibria for Stochastic Games With Asymmetric Information: Finite Games},
  journal   = {IEEE Transactions on Automatic Control},
  year      = {2014},
  volume    = {59},
  number    = {3},
  pages     = {555--570},
  month     = mar,
  issn      = {1558-2523},
  doi       = {10.1109/tac.2013.2283743},
  publisher = {Institute of Electrical and Electronics Engineers (IEEE)},
}

@Article{Nerode1958,
  Title                    = {Linear Automaton Transformations},
  Author                   = {Nerode, A.},
  Journal                  = {{P}roceedings of American Mathematical Society},
  Year                     = {1958},
  Pages                    = {541-544},
  Volume                   = {9}
  }

@Book{Neveu1975,
  title     = {Discrete Parameter Martingales},
  publisher = {North Holland},
  year      = {1975},
  author    = {J. Neveu},
}

@InProceedings{Ng1999,
  author    = {Ng, Andrew Y and Harada, Daishi and Russell, Stuart},
  booktitle = {ICML},
  title     = {Policy invariance under reward transformations: Theory and application to reward shaping},
  year      = {1999},
  pages     = {278--287},
  volume    = {99},
  url       = {http://aima.eecs.berkeley.edu/~russell/papers/icml99-shaping.pdf},
}

@Book{Norris1998,
  title     = {Markov chains},
  publisher = {Cambridge university press},
  year      = {1998},
  author    = {Norris, James R},
  number    = {2},
}

@Article{Oh2016,
  author    = {Oh, Sechan and {\"O}zer, {\"O}zalp},
  journal   = {Production and Operations Management},
  title     = {Characterizing the Structure of Optimal Stopping Policies},
  year      = {2016},
  month     = {jul},
  number    = {11},
  pages     = {1820--1838},
  volume    = {25},
  doi       = {10.1111/poms.12579},
  publisher = {Wiley},
}

@InProceedings{Ouyang2017,
  author    = {Yi Ouyang and Mukul Gagrani and Ashutosh Nayyar and Rahul Jain},
  title     = {Learning Unknown Markov Decision Processes: A {Thompson} Sampling Approach},
  booktitle = {Neurips},
  year      = {2017},
}

@Article{Papadaki2007,
  author    = {Katerina Papadaki and Warren B. Powell},
  journal   = {Operations Research Letters},
  title     = {Monotonicity in multidimensional Markov decision processes for the batch dispatch problem},
  year      = {2007},
  month     = {mar},
  number    = {2},
  pages     = {267--272},
  volume    = {35},
  doi       = {10.1016/j.orl.2006.03.013},
  publisher = {Elsevier {BV}},
}

@Article{Papadimitriou2008,
  author    = {Christos H. Papadimitriou and Tim Roughgarden},
  journal   = {Journal of the {ACM}},
  title     = {Computing correlated equilibria in multi-player games},
  year      = {2008},
  month     = {jul},
  number    = {3},
  pages     = {1--29},
  volume    = {55},
  doi       = {10.1145/1379759.1379762},
  publisher = {Association for Computing Machinery ({ACM})},
}

@Article{Pereira1991,
  author    = {M. V. F. Pereira and L. M. V. G. Pinto},
  journal   = {Mathematical Programming},
  title     = {Multi-stage stochastic optimization applied to energy planning},
  year      = {1991},
  month     = {may},
  number    = {1-3},
  pages     = {359--375},
  volume    = {52},
  doi       = {10.1007/bf01582895},
  publisher = {Springer Science and Business Media {LLC}},
}

@Book{Picard2007,
  author    = {Jean Picard},
  publisher = {Springer Berlin Heidelberg},
  title     = {Concentration Inequalities and Model Selection},
  year      = {2007},
  doi       = {10.1007/978-3-540-48503-2},
}

@Book{Piunovskiy2011,
  title     = {Examples in Markov Decision Processes},
  publisher = {Imperial College Proess},
  year      = {2011},
  author    = {A B Piunovskiy},
  month     = {nov},
  doi       = {10.1142/p809},
}

@Book{Pollard2002,
  title     = {A User's Guide to Measure Theoretic Probability},
  publisher = {Cambridge University Press},
  year      = {2002},
  author    = {David Pollard},
}

@Article{Pomatto2020,
  author    = {Pomatto, Luciano and Strack, Philipp and Tamuz, Omer},
  title     = {Stochastic Dominance under Independent Noise},
  journal   = {Journal of Political Economy},
  year      = {2020},
  volume    = {128},
  number    = {5},
  pages     = {1877--1900},
  month     = may,
  issn      = {1537-534X},
  doi       = {10.1086/705555},
  publisher = {University of Chicago Press},
}

@Article{Porteus1975,
  author    = {Evan L. Porteus},
  journal   = {Operations Research},
  title     = {Bounds and Transformations for Discounted Finite Markov Decision Chains},
  year      = {1975},
  month     = {aug},
  number    = {4},
  pages     = {761--784},
  volume    = {23},
  doi       = {10.1287/opre.23.4.761},
  publisher = {Institute for Operations Research and the Management Sciences ({INFORMS})},
}

@InBook{Porteus2008,
  chapter   = {The Newsvendor Problem},
  pages     = {115--134},
  title     = {Building Intuition: Insights from Basic Operations Management Models and Principles},
  publisher = {Springer},
  year      = {2008},
  author    = {Porteus, Evan L.},
  editor    = {Chhajed, D. and Lowe, T.J.},
  doi       = {10.1007/978-0-387-73699-0},
}

@Book{Puterman2014,
  title     = {Markov decision processes: discrete stochastic dynamic programming},
  publisher = {John Wiley \& Sons},
  year      = {2014},
  author    = {Puterman, Martin L},
  doi       = {10.1002/9780470316887},
}

@Article{Qin2020,
  author    = {Yuzhen Qin and Ming Cao and Brian D. O. Anderson},
  title     = {Lyapunov Criterion for Stochastic Systems and Its Applications in Distributed Computation},
  journal   = {{IEEE} Transactions on Automatic Control},
  year      = {2020},
  volume    = {65},
  number    = {2},
  pages     = {546--560},
  month     = {feb},
  doi       = {10.1109/tac.2019.2910948},
  publisher = {Institute of Electrical and Electronics Engineers ({IEEE})},
}

@InProceedings{Rachelson2010,
  author    = {Rachelson, Emmanuel and Lagoudakis, Michail G},
  title     = {On the locality of action domination in sequential decision making},
  booktitle = {Proceedings of 11th International Symposium on Artificial Intelligence and Mathematics},
  year      = {2010},
  address   = {Fort Lauderdale},
  month     = jan,
  url       = {https://oatao.univ-toulouse.fr/17977/},
}

@Book{Rachev1991,
  title     = {Probability Metrics and the Stability of Stochastic Models},
  publisher = {Wiley},
  year      = {1991},
  author    = {Svetlozar T. Rachev},
  address   = {New York},
}

@Article{Riis1965,
  author    = {Riis, Jens Ove},
  title     = {Discounted {M}arkov Programming in a Periodic Process},
  journal   = {Operations Research},
  year      = {1965},
  volume    = {13},
  number    = {6},
  pages     = {920--929},
  month     = dec,
  issn      = {1526-5463},
  doi       = {10.1287/opre.13.6.920},
  publisher = {Institute for Operations Research and the Management Sciences (INFORMS)},
}

@Unpublished{Rivasplata2012,
  author = {Omar Rivasplata},
  title  = {Subgaussian random variables: An expository note},
  month  = nov,
  year   = {2012},
  url    = {http://stat.cmu.edu/~arinaldo/36788/subgaussians.pdf},
}

@Article{Robbins1951,
  author    = {Herbert Robbins and Sutton Monro},
  journal   = {The Annals of Mathematical Statistics},
  title     = {A Stochastic Approximation Method},
  year      = {1951},
  month     = {sep},
  number    = {3},
  pages     = {400--407},
  volume    = {22},
  doi       = {10.1214/aoms/1177729586},
  publisher = {Institute of Mathematical Statistics},
}

@InCollection{Robbins1971,
  author    = {H. Robbins and D. Siegmund},
  title     = {A convergence theorem for non-negative almost supermartingales and some applications},
  booktitle = {Optimizing Methods in Statistics},
  publisher = {Elsevier},
  year      = {1971},
  pages     = {233--257},
  doi       = {10.1016/b978-0-12-604550-5.50015-8},
}

@Book{Rockafellar2009,
  title     = {Variational analysis},
  publisher = {Springer Science \& Business Media},
  year      = {2009},
  author    = {Rockafellar, R Tyrrell and Wets, Roger J-B},
  volume    = {317},
}

@Article{Ross1974,
  author    = {Sheldon M. Ross},
  journal   = {Advances in Applied Probability},
  title     = {Dynamic programming and gambling models},
  year      = {1974},
  month     = {sep},
  number    = {3},
  pages     = {593--606},
  volume    = {6},
  doi       = {10.2307/1426236},
  publisher = {Applied Probability Trust},
}

@Article{Roy2022,
  author    = {Roy, Arghyadip and Borkar, Vivek and Karandikar, Abhay and Chaporkar, Prasanna},
  title     = {Online Reinforcement Learning of Optimal Threshold Policies for {M}arkov Decision Processes},
  journal   = {IEEE Transactions on Automatic Control},
  year      = {2022},
  volume    = {67},
  number    = {7},
  pages     = {3722--3729},
  month     = jul,
  issn      = {2334-3303},
  doi       = {10.1109/tac.2021.3108121},
  publisher = {Institute of Electrical and Electronics Engineers (IEEE)},
}

@Book{Saldi2018,
  title     = {Finite Approximations in Discrete-Time Stochastic Control},
  publisher = {Springer International Publishing},
  year      = {2018},
  author    = {Saldi, Naci and Linder, Tamás and Yüksel, Serdar},
  isbn      = {9783319790336},
  doi       = {10.1007/978-3-319-79033-6},
  issn      = {2324-9757},
  journal   = {Systems \&amp; Control: Foundations \&amp; Applications},
}

@PhdThesis{Sandell1974,
  author  = {Sandell, Nils R., Jr},
  title   = {Control of Finite-State, Finite-Memory Stochastic Systems},
  school  = {Massachussets Institute of Technology},
  year    = {1974},
  type    = {phdthesis},
  address = {Cambridge, MA},
}

@Book{Sandholm2010,
  author    = {Sandholm, William H.},
  publisher = {MIT Press},
  title     = {Population Games and Evolutionary Dynamics},
  year      = {2010},
  isbn      = {9780262195874},
}

@Article{Sanjari2023,
  author    = {Sanjari, Sina and Saldi, Naci and Yüksel, Serdar},
  title     = {Optimality of Independently Randomized Symmetric Policies for Exchangeable Stochastic Teams with Infinitely Many Decision Makers},
  journal   = {Mathematics of Operations Research},
  year      = {2023},
  volume    = {48},
  number    = {3},
  pages     = {1254--1285},
  month     = aug,
  issn      = {1526-5471},
  doi       = {10.1287/moor.2022.1296},
  publisher = {Institute for Operations Research and the Management Sciences (INFORMS)},
}

@InProceedings{Sayedana2020,
  author    = {Sayedana, Borna and Mahajan, Aditya and Yeh, Edmund},
  booktitle = {International Symposium on Modeling and Optimization in Mobile, Ad Hoc, and Wireless Networks (WiOPT)},
  title     = {Cross-layer communication over fading channels with adaptive decision feedback},
  year      = {2020},
  pages     = {1-8},
  eprint    = {https://ieeexplore.ieee.org/abstract/document/9155327},
}

@Article{Sayedana2020a,
  author    = {Borna Sayedana and Aditya Mahajan},
  journal   = {{IEEE} Wireless Communications Letters},
  title     = {Counterexamples on the monotonicity of delay optimal strategies for energy harvesting transmitters},
  year      = {2020},
  pages     = {1--1},
  doi       = {10.1109/lwc.2020.2981066},
  publisher = {Institute of Electrical and Electronics Engineers ({IEEE})},
}

@InBook{Scarf1960,
  chapter   = {The optimality of (S, s) policies in the dynamic inventory problem},
  pages     = {49--56},
  title     = {Mathematical Methods in Social Sciences.},
  publisher = {Stanford University Press, Stanford CA},
  year      = {1960},
  author    = {Herbert Scarf},
  editor    = {J. Arrow, S. Karlin, and P. Suppes},
  url       = {http://dido.wss.yale.edu/~hes/pub/ss-policies.pdf},
}

@InProceedings{Scherrer2012,
  author    = {Scherrer, Bruno and Lesner, Boris},
  title     = {On the Use of Non-Stationary Policies for Stationary Infinite-Horizon Markov Decision Processes},
  booktitle = {Advances in Neural Information Processing Systems},
  year      = {2012},
  editor    = {F. Pereira and C.J. Burges and L. Bottou and K.Q. Weinberger},
  volume    = {25},
  publisher = {Curran Associates, Inc.},
  url       = {https://proceedings.neurips.cc/paper_files/paper/2012/file/8b6dd7db9af49e67306feb59a8bdc52c-Paper.pdf},
}

@Misc{Scherrer2016,
  author       = {Bruno Scherrer},
  title        = {On Periodic Markov Decision Processes},
  howpublished = {EWRL},
  month        = dec,
  year         = {2016},
  url          = {https://ewrl.files.wordpress.com/2016/12/scherrer.pdf},
}


@TechReport{Sejdinovic2014,
  author = {Dino Sejdinovic and Arthur Gretton},
  title = {What is {RKHS}?},
  url   = {http://www.stats.ox.ac.uk/~sejdinov/teaching/atml14/Theory_2014.pdf}
}

@Book{Sennott:book,
  Title                    = {Stochastic dynamic programming and the control of queueing systems},
  Author                   = {Linn I. Sennott},
  Publisher                = {Wiley},
  Year                     = {1999},

  Address                  = {New York, NY, USA},

  ISBN                     = {0-471-16120-9}
}

@InCollection{Serfozo1976,
  author    = {Richard F. Serfozo},
  booktitle = {Mathematical Programming Studies},
  publisher = {Springer Berlin Heidelberg},
  title     = {Monotone optimal policies for Markov decision processes},
  year      = {1976},
  pages     = {202--215},
  doi       = {10.1007/bfb0120752},
}

@Article{Shapiro2011,
  author    = {Alexander Shapiro},
  journal   = {European Journal of Operational Research},
  title     = {Analysis of stochastic dual dynamic programming method},
  year      = {2011},
  month     = {feb},
  number    = {1},
  pages     = {63--72},
  volume    = {209},
  doi       = {10.1016/j.ejor.2010.08.007},
  publisher = {Elsevier {BV}},
}

@Article{Shebrawai2012,
  author    = {Shebrawai, Khalid and Albadawani, Hussien},
  title     = {Trace inequalities for matrices},
  journal   = {Bulletin of the Australian Mathematical Society},
  year      = {2012},
  volume    = {87},
  number    = {1},
  pages     = {139--148},
  month     = aug,
  issn      = {1755-1633},
  doi       = {10.1017/s0004972712000627},
  publisher = {Cambridge University Press (CUP)},
}

@Article{Shwartz2001,
  author    = {A. Shwartz},
  title     = {Death and discounting},
  journal   = {{IEEE} Transactions on Automatic Control},
  year      = {2001},
  volume    = {46},
  number    = {4},
  pages     = {644--647},
  month     = apr,
  doi       = {10.1109/9.917668},
  publisher = {Institute of Electrical and Electronics Engineers ({IEEE})},
  url       = {https://doi.org/10.1109/9.917668},
}

@Article{Simon1956,
  author    = {Simon, Herbert A},
  journal   = {Econometrica},
  title     = {Dynamic programming under uncertainty with a quadratic criterion function},
  year      = {1956},
  month     = {jan},
  number    = {1},
  pages     = {74--81},
  volume    = {24},
  doi       = {10.2307/1905261},
  publisher = {JSTOR},
}

@Article{Singh1994,
  author    = {Satinder P. Singh and Richard C. Yee},
  title     = {An upper bound on the loss from approximate optimal-value functions},
  journal   = {Machine Learning},
  year      = {1994},
  volume    = {16},
  number    = {3},
  pages     = {227--233},
  month     = {sep},
  doi       = {10.1007/bf00993308},
  publisher = {Springer Science and Business Media {LLC}},
}

@Unpublished{Sinha2024,
  author = {Amit Sinha and Aditya Mahajan},
  title  = {On the sensitivity of restless bandit solutions to uncertainty in the model of the arms},
  year   = {2024},
}

@Book{Skinner1938,
  author    = {Skinner, B. F.},
  publisher = {Appleton-Century},
  title     = {Behavior of Organisms},
  year      = {1938},
  isbn      = {9781583900079},
  pages     = {472},
}

@Article{Smallwood1973,
  author    = {Richard D. Smallwood and Edward J. Sondik},
  journal   = {Operations Research},
  title     = {The Optimal Control of Partially Observable Markov Processes over a Finite Horizon},
  year      = {1973},
  month     = {oct},
  number    = {5},
  pages     = {1071--1088},
  volume    = {21},
  doi       = {10.1287/opre.21.5.1071},
  publisher = {Institute for Operations Research and the Management Sciences ({INFORMS})},
}

@Article{Smith2002,
  author    = {James E. Smith and Kevin F. McCardle},
  journal   = {Operations Research},
  title     = {Structural Properties of Stochastic Dynamic Programs},
  year      = {2002},
  month     = {oct},
  number    = {5},
  pages     = {796--809},
  volume    = {50},
  doi       = {10.1287/opre.50.5.796.365},
  publisher = {Institute for Operations Research and the Management Sciences ({INFORMS})},
}

@Book{Stout1974,
  title     = {Almost sure convergence},
  publisher = {Academic Press},
  year      = {1974},
  author    = {W.F. Stout},
}

@Article{Striebel1965,
  Title                    = {Sufficient Statistics in the Optimal Control of Stochastic Systems},
  Author                   = {Charlotte Striebel},
  Journal                  = {Journal of Mathematical Analysis and Applications},
  Year                     = {1965},
  Pages                    = {576-592},
  Volume                   = {12}
}

@Book{Strusevich_2017,
  title     = {Scheduling with Time-Changing Effects and Rate-Modifying Activities},
  publisher = {Springer International Publishing},
  year      = {2017},
  author    = {Strusevich, Vitaly A. and Rustogi, Kabir},
  isbn      = {9783319395746},
  doi       = {10.1007/978-3-319-39574-6},
  issn      = {2214-7934},
  journal   = {International Series in Operations Research \&amp; Management Science},
}

@InBook{Strusevich2016,
  pages     = {19--36},
  title     = {Pairwise Interchange Argument and Priority Rules},
  publisher = {Springer International Publishing},
  year      = {2016},
  author    = {Strusevich, Vitaly A. and Rustogi, Kabir},
  month     = oct,
  isbn      = {9783319395746},
  booktitle = {Scheduling with Time-Changing Effects and Rate-Modifying Activities},
  doi       = {10.1007/978-3-319-39574-6_2},
  issn      = {2214-7934},
}

@InProceedings{Subramanian2019,
  author    = {Jayakumar Subramanian and Aditya Mahajan},
  booktitle = {2019 {IEEE} 58th Conference on Decision and Control ({CDC})},
  title     = {Approximate information state for partially observed systems},
  year      = {2019},
  month     = {dec},
  publisher = {{IEEE}},
  doi       = {10.1109/cdc40024.2019.9029898},
}

@Article{Subramanian2022,
  author  = {Jayakumar Subramanian and Amit Sinha and Raihan Seraj and Aditya Mahajan},
  journal = {Journal of Machine Learning Research},
  title   = {Approximate Information State for Approximate Planning and Reinforcement Learning in Partially Observed Systems},
  year    = {2022},
  number  = {12},
  pages   = {1--83},
  volume  = {23},
  url     = {http://jmlr.org/papers/v23/20-1165.html},
}

@InProceedings{Sutton1999,
  author    = {Sutton, Richard S and McAllester, David and Singh, Satinder and Mansour, Yishay},
  title     = {Policy gradient methods for reinforcement learning with function approximation},
  booktitle = {Advances in Neural Information Processing Systems},
  year      = {1999},
  volume    = {12},
}

@Book{SuttonBarto2018,
  title     = {Reinforcement learning: An introduction},
  publisher = {MIT Press},
  year      = {2018},
  author    = {Sutton, Richard S and Barto, Andrew G},
}

@Article{Sznajder1992,
  author    = {R. Sznajder and J. A. Filar},
  journal   = {Journal of Optimization Theory and Applications},
  title     = {Some comments on a theorem of {H}ardy and {L}ittlewood},
  year      = {1992},
  month     = {oct},
  number    = {1},
  pages     = {201--208},
  volume    = {75},
  doi       = {10.1007/bf00939913},
  publisher = {Springer Science and Business Media {LLC}},
}

@Article{Tang2022,
  author  = {Tang, Dengwang and Tavafoghi, Hamidreza and Subramanian, Vijay and Nayyar, Ashutosh and Teneketzis, Demosthenis},
  title   = {Dynamic Games Among Teams with Delayed Intra-Team Information {Sharing}},
  journal = {Dynamic Games and Applications},
  year    = {2022},
  month   = feb,
}

@Article{Taylor1967,
  author    = {Howard M. Taylor},
  title     = {Evaluating a Call Option and Optimal Timing Strategy in the Stock Market},
  journal   = {Management Science},
  year      = {1967},
  volume    = {14},
  number    = {1},
  pages     = {111-120},
  issn      = {00251909, 15265501},
  publisher = {INFORMS},
  url       = {http://www.jstor.org/stable/2628546},
}

@Article{Taylor1978,
  author    = {Peter D. Taylor and Leo B. Jonker},
  journal   = {Mathematical Biosciences},
  title     = {Evolutionary stable strategies and game dynamics},
  year      = {1978},
  month     = {jul},
  number    = {1-2},
  pages     = {145--156},
  volume    = {40},
  doi       = {10.1016/0025-5564(78)90077-9},
  publisher = {Elsevier {BV}},
}

@Article{Theil1954,
  author  = {Theil, Henri},
  title   = {Econometric Models and Welfare Maximization},
  journal = {Wirtschaftliches Archiv},
  year    = {1954},
  volume  = {72},
  pages   = {60-83},
  doi     = {10.1007/978-94-011-2410-2_1},
}

@Article{Theil1957,
  author    = {Theil, Henri},
  title     = {A note on certainty equivalence in dynamic planning},
  journal   = {Econometrica},
  year      = {1957},
  pages     = {346--349},
  doi       = {10.1007/978-94-011-2410-2_3},
  publisher = {JSTOR},
}

@Book{Topkis1998,
  title     = {Supermodularity and Complementarity},
  publisher = {Princeton University Press},
  year      = {1998},
  author    = {Donald M. Topkis},
  isbn      = {9780691032443},
}

@Article{Trench1999,
  author    = {Trench, William F.},
  title     = {Invertibly convergent infinite products of matrices},
  journal   = {Journal of Computational and Applied Mathematics},
  year      = {1999},
  volume    = {101},
  number    = {1–2},
  pages     = {255--263},
  month     = jan,
  issn      = {0377-0427},
  doi       = {10.1016/s0377-0427(98)00191-5},
  publisher = {Elsevier BV},
}


@Article{Tsitsiklis1984,
  author    = {John N. Tsitsiklis},
  journal   = {Management Science},
  title     = {Periodic Review Inventory Systems with Continuous Demand and Discrete Order Sizes},
  year      = {1984},
  month     = {oct},
  number    = {10},
  pages     = {1250--1254},
  volume    = {30},
  doi       = {10.1287/mnsc.30.10.1250},
  publisher = {Institute for Operations Research and the Management Sciences ({INFORMS})},
}

@Article{Tsitsiklis1994,
  author    = {John N. Tsitsiklis},
  journal   = {Machine Learning},
  title     = {Asynchronous stochastic approximation and Q-learning},
  year      = {1994},
  month     = {sep},
  number    = {3},
  pages     = {185--202},
  volume    = {16},
  doi       = {10.1007/bf00993306},
  publisher = {Springer Science and Business Media {LLC}},
}

@Article{Tsitsiklis1996,
  author    = {John N. Tsitsiklis and van Roy, Benjamin},
  title     = {Feature-based methods for large scale dynamic programming},
  journal   = {Machine Learning},
  year      = {1996},
  volume    = {22},
  number    = {1-3},
  pages     = {59--94},
  month     = {mar},
  doi       = {10.1007/bf00114724},
  publisher = {Springer Science and Business Media {LLC}},
}

@Article{Urgaonkar2015,
  author    = {Rahul Urgaonkar and Shiqiang Wang and Ting He and Murtaza Zafer and Kevin Chan and Kin K. Leung},
  journal   = {Performance Evaluation},
  title     = {Dynamic service migration and workload scheduling in edge-clouds},
  year      = {2015},
  month     = {sep},
  pages     = {205--228},
  volume    = {91},
  doi       = {10.1016/j.peva.2015.06.013},
  publisher = {Elsevier {BV}},
}

@Article{Veinott1965,
  author    = {Arthur F. Veinott},
  journal   = {Operations Research},
  title     = {The Optimal Inventory Policy for Batch Ordering},
  year      = {1965},
  month     = {jun},
  number    = {3},
  pages     = {424--432},
  volume    = {13},
  doi       = {10.1287/opre.13.3.424},
  publisher = {Institute for Operations Research and the Management Sciences ({INFORMS})},
}

@Article{Veinott1966,
  author   = {Veinott, Jr., Arthur F.},
  title    = {On the Opimality of (s,S) Inventory Policies: New Conditions and a New Proof},
  journal  = {SIAM Journal on Applied Mathematics},
  year     = {1966},
  volume   = {14},
  number   = {5},
  pages    = {1067-1083},
  abstract = { Scarf [6] has shown that the \$( {s,S} )\$ policy is optimal for a class of discrete review dynamic nonstationary inventory models. In this paper a new proof of this result is found under new conditions which do not imply and are not implied by Scarf’s hypotheses. We replace Scarf’s hypothesis that the one period expected costs are convex by the weaker assumption that the negatives of these expected costs are unimodal. On the other hand we impose the additional assumption not made by Scarf that the absolute minima of the one, period expected costs are (nearly) rising over time. For the infinite period stationary model, this last hypothesis is automatically satisfied. Thus in this case our hypotheses are weaker than Scarf’s. The bounds on the optimal parameter values given by Veinott and Wagner [12] are established for the present case. The bounds in a period are easily computed, and depend only upon the expected costs for that period. Moreover, simple conditions are given which ensure that the optimal parameter values in a given period equal their lower bounds. When there is no fixed charge for ordering, this reduces to earlier results of Karlin [5] and Veinott [9], [10], [11] for the nonstationary case. The above result is exploited to extend the planning horizon theorem of Veinott [9] to the case where there is a fixed charge for ordering. },
  doi      = {10.1137/0114086},
  eprint   = {https://doi.org/10.1137/0114086},
  url      = { 
    
        https://doi.org/10.1137/0114086
    
    
},
}

@BOOK{Venttsel1980,
  author = {Ye S. Venttsel},
  title = {Elements of Game Theory},
  publisher = {Mir publishers},
  year = {1980},
  url       = {https://archive.org/details/ElementsOfGameTheorylittleMathematicsLibrary/},
}

@Article{Vidyasagar2023,
  author    = {M. Vidyasagar},
  title     = {Convergence of stochastic approximation via martingale and converse {Lyapunov} methods},
  journal   = {Mathematics of Control, Signals, and Systems},
  year      = {2023},
  volume    = {35},
  number    = {2},
  pages     = {351--374},
  month     = {jan},
  doi       = {10.1007/s00498-023-00342-9},
  publisher = {Springer Science and Business Media {LLC}},
}

@Book{Villani2008,
  title     = {Optimal transport: old and new},
  publisher = {Springer},
  year      = {2008},
  author    = {Villani, C{\'e}dric and others},
  volume    = {338},
}

@Book{Wainwright2019,
  author    = {Martin J. Wainwright},
  publisher = {Cambridge University Press},
  title     = {High-Dimensional Statistics},
  year      = {2019},
  month     = {feb},
  doi       = {10.1017/9781108627771},
}

@Article{Wald1940,
  author    = {A. Wald and J. Wolfowitz},
  journal   = {The Annals of Mathematical Statistics},
  title     = {On a Test Whether Two Samples are from the Same Population},
  year      = {1940},
  month     = {jun},
  number    = {2},
  pages     = {147--162},
  volume    = {11},
  doi       = {10.1214/aoms/1177731909},
  publisher = {Institute of Mathematical Statistics},
}

@Article{Wald1945,
  author    = {A. Wald},
  journal   = {The Annals of Mathematical Statistics},
  title     = {Sequential Tests of Statistical Hypotheses},
  year      = {1945},
  month     = {jun},
  number    = {2},
  pages     = {117--186},
  volume    = {16},
  doi       = {10.1214/aoms/1177731118},
  publisher = {Institute of Mathematical Statistics},
}

@Article{Wald1948,
  author    = {A. Wald and J. Wolfowitz},
  journal   = {The Annals of Mathematical Statistics},
  title     = {Optimum Character of the Sequential Probability Ratio Test},
  year      = {1948},
  month     = {sep},
  number    = {3},
  pages     = {326--339},
  volume    = {19},
  doi       = {10.1214/aoms/1177730197},
  publisher = {Institute of Mathematical Statistics},
}

@Book{Walrand1988,
  title     = {An introduction to queueing networks},
  publisher = {Prentice Hall},
  year      = {1988},
  author    = {Jean Walrand},
}

@Article{Wang2019,
  author    = {Shiqiang Wang and Rahul Urgaonkar and Murtaza Zafer and Ting He and Kevin Chan and Kin K. Leung},
  journal   = {{IEEE}/{ACM} Transactions on Networking},
  title     = {Dynamic Service Migration in Mobile Edge Computing Based on {M}arkov Decision Process},
  year      = {2019},
  month     = {jun},
  number    = {3},
  pages     = {1272--1288},
  volume    = {27},
  doi       = {10.1109/tnet.2019.2916577},
  publisher = {Institute of Electrical and Electronics Engineers ({IEEE})},
}

@Article{Watkins1992,
  author    = {Christopher J. C. H. Watkins and Peter Dayan},
  journal   = {Machine Learning},
  title     = {Q-learning},
  year      = {1992},
  month     = {may},
  number    = {3-4},
  pages     = {279--292},
  volume    = {8},
  doi       = {10.1007/bf00992698},
  publisher = {Springer Science and Business Media {LLC}},
}

@Misc{weng2018PG,
  author = {Weng, Lilian},
  title  = {Policy Gradient Algorithms},
  year   = {2018},
  url    = {https://lilianweng.github.io/posts/2018-04-08-policy-gradient/},
}

@Book{Whitin1953,
  title     = {The Theory of Inventory Management},
  publisher = {Princeton University Press},
  year      = {1953},
  author    = {Whitin, S.},
}

@Article{Whitt1978,
  author    = {Whitt, Ward},
  title     = {Approximations of Dynamic Programs, {I}},
  journal   = {Mathematics of Operations Research},
  year      = {1978},
  volume    = {4},
  number    = {3},
  pages     = {231--243},
  month     = aug,
  issn      = {1526-5471},
  doi       = {https://doi.org/10.1287/moor.3.3.231},
  publisher = {Institute for Operations Research and the Management Sciences (INFORMS)},
}

@Article{Whitt1979,
  author    = {Whitt, Ward},
  title     = {Approximations of Dynamic Programs, {II}},
  journal   = {Mathematics of Operations Research},
  year      = {1979},
  volume    = {4},
  number    = {2},
  pages     = {179--185},
  month     = may,
  issn      = {1526-5471},
  doi       = {10.1287/moor.4.2.179},
  publisher = {Institute for Operations Research and the Management Sciences (INFORMS)},
}

@Article{Whittle1980,
  author    = {P. Whittle},
  journal   = {Journal of the Royal Statistical Society. Series B (Methodological)},
  title     = {Multi-Armed Bandits and the {Gittins} Index},
  year      = {1980},
  issn      = {00359246},
  number    = {2},
  pages     = {143-149},
  volume    = {42},
  abstract  = {A plausible conjecture (C) has the implication that a relationship (12) holds between the maximal expected rewards for a multi-project process and for a one-project process (F and Ï<sub>i</sub> respectively), if the option of retirement with reward M is available. The validity of this relation and optimality of Gittins' index rule are verified simultaneously by dynamic programming methods. These results are partially extended to the case of so-called "bandit superprocesses".},
  publisher = {[Royal Statistical Society, Wiley]},
}

@Book{Whittle1982,
  title     = {Optimization over time: dynamic programming and stochastic control. Vol. 1 and 2},
  publisher = {Wiley},
  year      = {1982},
  author    = {Whittle, Peter},
}

@Article{Whittle1988,
  author    = {Whittle, Peter},
  journal   = {Journal of applied probability},
  title     = {Restless bandits: Activity allocation in a changing world},
  year      = {1988},
  number    = {A},
  pages     = {287--298},
  volume    = {25},
  publisher = {Cambridge Univ Press},
}

@Article{Whittle1988a,
  author    = {P. Whittle and N. Komarova},
  journal   = {Probability in the Engineering and Informational Sciences},
  title     = {Policy Improvement and the Newton-Raphson Algorithm},
  year      = {1988},
  month     = {apr},
  number    = {2},
  pages     = {249--255},
  volume    = {2},
  doi       = {10.1017/s0269964800000760},
  publisher = {Cambridge University Press ({CUP})},
}

@Book{Whittle1996,
  title     = {Optimal control: Basics and Beyond},
  publisher = {Wiley},
  year      = {1996},
  author    = {Whittle, Peter},
}

@Article{Whittle2002,
  author    = {Peter Whittle},
  journal   = {Macroeconomic Dynamics},
  title     = {Risk Sensitivity, {A} strangely pervasive concept},
  year      = {2002},
  month     = {feb},
  number    = {1},
  pages     = {5--18},
  volume    = {6},
  doi       = {10.1017/s1365100502027025},
  publisher = {Cambridge University Press ({CUP})},
}

@Article{Wiewiora2003,
  author     = {Wiewiora, Eric},
  journal    = {Journal of Artificial Intelligence Research},
  title      = {Potential-Based Shaping and Q-Value Initialization Are Equivalent},
  year       = {2003},
  issn       = {1076-9757},
  month      = {sep},
  number     = {1},
  pages      = {205–208},
  volume     = {19},
  abstract   = {Shaping has proven to be a powerful but precarious means of improving reinforcement learning performance. Ng, Harada, and Russell (1999) proposed the potential-based shaping algorithm for adding shaping rewards in a way that guarantees the learner will learn optimal behavior.In this note, we prove certain similarities between this shaping algorithm and the initialization step required for several reinforcement learning algorithms. More specifically, we prove that a reinforcement learner with initial Q-values based on the shaping algorithm's potential function make the same updates throughout learning as a learner receiving potential-based shaping rewards. We further prove that under a broad category of policies, the behavior of these two learners are indistinguishable. The comparison provides intuition on the theoretical properties of the shaping algorithm as well as a suggestion for a simpler method for capturing the algorithm's benefit. In addition, the equivalence raises previously unaddressed issues concerning the efficiency of learning with potential-based shaping.},
  address    = {El Segundo, CA, USA},
  issue_date = {July 2003},
  numpages   = {4},
  publisher  = {AI Access Foundation},
}

@Article{Witsenhausen1969,
  author    = {Witsenhausen, H.S.},
  title     = {Inequalities for the performance of suboptimal uncertain systems},
  journal   = {Automatica},
  year      = {1969},
  volume    = {5},
  number    = {4},
  pages     = {507--512},
  month     = jul,
  issn      = {0005-1098},
  doi       = {10.1016/0005-1098(69)90112-5},
  publisher = {Elsevier BV},
}

@Article{Witsenhausen1970,
  author    = {Witsenhausen, H. S.},
  title     = {On Performance Bounds for Uncertain Systems},
  journal   = {SIAM Journal on Control},
  year      = {1970},
  volume    = {8},
  number    = {1},
  pages     = {55--89},
  month     = feb,
  issn      = {0036-1402},
  doi       = {10.1137/0308004},
  publisher = {Society for Industrial \& Applied Mathematics (SIAM)},
}

@Article{Witsenhausen1973,
  author    = {Witsenhausen, H. S.},
  title     = {A standard form for sequential stochastic control},
  journal   = {Mathematical Systems Theory},
  year      = {1973},
  volume    = {7},
  number    = {1},
  pages     = {5--11},
  month     = mar,
  issn      = {1433-0490},
  doi       = {10.1007/bf01824800},
  publisher = {Springer Science and Business Media LLC},
}


@Article{Witsenhausen1975,
  author  = {Hans S. Witsenhausen},
  journal = {Information and Control},
  title   = {On policy independence of conditional expectation},
  year    = {1975},
  pages   = {65-75},
  volume  = {28},
}

@InCollection{Witsenhausen1976,
  Title                    = {Some Remarks on the Concept of State},
  Author                   = {Hans S. Witsenhausen},
  Booktitle                = {Directions in Large-Scale Systems},
  Publisher                = {Plenum},
  Year                     = {1976},
  Editor                   = {Y. C. Ho and S. K. Mitter},
  Pages                    = {69-75}
}

@Article{Witsenhausen1979,
  author  = {Hans S. Witsenhausen},
  title   = {On the structure of Real-time Source Coders},
  journal = BSTJ,
  year    = {1979},
  volume  = {58},
  number  = {6},
  pages   = {1437-1451},
  month   = {July-August},
}

@InCollection{Wittenmark2002,
  author    = {Wittenmark, Bj{\"o}rn and {\AA}str{\"o}m, Karl Johan and {\AA}rz{\'e}n, Karl-Erik},
  title     = {Computer control: An overview},
  booktitle = {IFAC Professional Brief},
  publisher = {IFAC},
  year      = {2002},
  url       = {https://www.ifac-control.org/publications/list-of-professional-briefs/pb_wittenmark_etal_final.pdf},
}

@Article{Wonham1968,
  author    = {W. M. Wonham},
  journal   = {{SIAM} Journal on Control},
  title     = {On the Separation Theorem of Stochastic Control},
  year      = {1968},
  month     = {may},
  number    = {2},
  pages     = {312--326},
  volume    = {6},
  doi       = {10.1137/0306023},
  publisher = {Society for Industrial {\&} Applied Mathematics ({SIAM})},
}

@Article{Wonham1968a,
  author    = {Wonham, W. M.},
  title     = {On a Matrix Riccati Equation of Stochastic Control},
  journal   = {SIAM Journal on Control},
  year      = {1968},
  volume    = {6},
  number    = {4},
  pages     = {681--697},
  month     = nov,
  issn      = {0036-1402},
  doi       = {10.1137/0306044},
  publisher = {Society for Industrial \& Applied Mathematics (SIAM)},
}

@Article{Woodall1983,
  author    = {Willam H. Woodall and Marion R. Reynolds},
  journal   = {Communications in Statistics. Part C: Sequential Analysis},
  title     = {A discrete markov chain representation of The sequential probability ratio test},
  year      = {1983},
  month     = {jan},
  number    = {1},
  pages     = {27--44},
  volume    = {2},
  doi       = {10.1080/07474948308836025},
  publisher = {Informa {UK} Limited},
}

@Article{Yang2002,
  author    = {Yang, Zhong Peng and Feng, Xiao Xia},
  title     = {A note on the trace inequality for products of Hermitian matrix power.},
  journal   = {JIPAM. Journal of Inequalities in Pure \& Applied Mathematics},
  year      = {2002},
  volume    = {3},
  number    = {5},
  pages     = {Paper No. 78, 12 p., electronic only-Paper No. 78, 12 p., electronic only},
  keywords  = {trace inequalities; Hermitean matrix pair; skew-Hermitean matrix pairs},
  language  = {eng},
  publisher = {Victoria University, School of Communications and Informatics},
  url       = {http://eudml.org/doc/123245},
}

@Article{Yeh2012,
  author  = {Edmund M. Yeh},
  journal = {Foundations and Trends in Communications and Information Theory},
  title   = {Fundamental Performance Limits in Cross-layer Wireless Optimization: Throughput, Delay, and Energy},
  year    = {2012},
  issn    = {1567-2190},
  number  = {1},
  pages   = {1-112},
  volume  = {9},
  doi     = {10.1561/0100000014},
}

@TechReport{Zhang1996,
  author      = {N Zhang and W Liu},
  institution = {Hong Kong Univeristy of Science and Technology},
  title       = {Planning in stochastic domains: Problem characteristics and approximation},
  year        = {1996},
  number      = {HKUST-CS96-31},
}

@Article{Zhang2009,
  author  = {H. Zhang},
  journal = {Operations Research},
  title   = {Partially Observable {Markov} Decision Processes: A Geometric Technique and Analysis},
  year    = {2009},
}

@Article{Zheng1991,
  author    = {Zheng, Yu-Sheng and Federgruen, A.},
  title     = {Finding Optimal (s, S) Policies Is About As Simple As Evaluating a Single Policy},
  journal   = {Operations Research},
  year      = {1991},
  volume    = {39},
  number    = {4},
  pages     = {654--665},
  month     = aug,
  issn      = {1526-5463},
  doi       = {10.1287/opre.39.4.654},
  publisher = {Institute for Operations Research and the Management Sciences (INFORMS)},
}

@Misc{Zhou2018,
  author      = {Xingyu Zhou},
  title       = {On the Fenchel Duality between Strong Convexity and Lipschitz Continuous Gradient},
  year        = {2018},
  abstract    = {We provide a simple proof for the Fenchel duality between strong convexity and Lipschitz continuous gradient. To this end, we first establish equivalent conditions of convexity for a general function that may not be differentiable. By utilizing these equivalent conditions, we can directly obtain equivalent conditions for strong convexity and Lipschitz continuous gradient. Based on these results, we can easily prove Fenchel duality. Beside this main result, we also identify several conditions that are implied by strong convexity or Lipschitz continuous gradient, but are not necessarily equivalent to them. This means that these conditions are more general than strong convexity or Lipschitz continuous gradient themselves.},
  date        = {2018-03-17},
  eprint      = {1803.06573v1},
  eprintclass = {math.OC},
  eprinttype  = {arXiv},
  file        = {:http\://arxiv.org/pdf/1803.06573v1:PDF},
  keywords    = {math.OC},
  url         = {http://arxiv.org/pdf/1803.06573v1},
}

@Book{Zipkin2000,
  title     = {Foundations of Inventory Management},
  publisher = {McGraw-Hiil},
  year      = {2000},
  author    = {Paul H. Zipkin},
}

@Article{Zolotarev1984,
  author    = {V. M. Zolotarev},
  title     = {Probability Metrics},
  journal   = {Theory of Probability \& Its Applications},
  year      = {1984},
  volume    = {28},
  number    = {2},
  pages     = {278--302},
  month     = {jan},
  doi       = {10.1137/1128025},
  publisher = {Society for Industrial {\&} Applied Mathematics ({SIAM})},
}

@Comment{jabref-meta: databaseType:bibtex;}

@Comment{jabref-meta: saveOrderConfig:specified;citationkey;false;author;false;year;false;}