From 654d4207dce218c5aa2d555e190f28603de9bceb Mon Sep 17 00:00:00 2001 From: Aditya Mahajan Date: Fri, 9 Feb 2024 15:43:36 +0100 Subject: [PATCH] Expanding on the notes --- mdps/monotone-mdps.qmd | 8 +++++++- references.bib | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/mdps/monotone-mdps.qmd b/mdps/monotone-mdps.qmd index d4194e12..01a42605 100644 --- a/mdps/monotone-mdps.qmd +++ b/mdps/monotone-mdps.qmd @@ -769,4 +769,10 @@ d. Now suppose the distribution of $W_t$ is not Gaussian but is some Stochastic dominance has been employed in various areas of economics, finance, and statistics since the 1930s. See @Levy1992 and @Levy2015 for detailed overviews. The notion of stochastic monotonicity for Markov chains is due to @Daley1968. For a generalization of stochastic monotonicity to continuous state spaces, see @Serfozo1976. The characterization of stochastic monotonicity in @exr-sd-1--@exr-sd-4 are due to @Keilson1977. -@Ross1974 has an early treatment of monotonicity of optimal policies. The general theory was developed by @Topkis1998. The presentation here follows @Puterman2014. @exr-folded-monotonicity is from @Chakravorty2018. +@Ross1974 has an early treatment of monotonicity of optimal policies. The general theory was developed by @Topkis1998. An alternative treatment for queueing models is presented in @Koole2006. The presentation here follows @Puterman2014. + +The properties here are derived for finite horizon models. General conditions under which such properties extend to infinite horizon models are presented in @Smith2002. + +There are many recent papers which leverage the structural properties of value functions and optimal policy in reinforcement learning. For example @Kunnumkal2008 and @Fu2012 present variants of Q-learning which exploit properties of the value function and @Roy2022 present a variant of policy-learning which exploits properties of the optimal policy. + +@exr-folded-monotonicity is from @Chakravorty2018. diff --git a/references.bib b/references.bib index 91d610b4..4e929904 100644 --- a/references.bib +++ b/references.bib @@ -996,6 +996,20 @@ @Article{Frostig2016 url = {http://dx.doi.org/10.1007/s10479-013-1523-0}, } +@Article{Fu2012, + author = {Fangwen Fu and van der Schaar, M.}, + title = {Structure-Aware Stochastic Control for Transmission Scheduling}, + journal = {IEEE Transactions on Vehicular Technology}, + year = {2012}, + volume = {61}, + number = {9}, + pages = {3931--3945}, + month = nov, + issn = {1939-9359}, + doi = {10.1109/tvt.2012.2213850}, + publisher = {Institute of Electrical and Electronics Engineers (IEEE)}, +} + @InProceedings{Fu2018, author = {Fu, Michael C.}, title = {Monte Carlo Tree Search: A tutorial}, @@ -1565,6 +1579,20 @@ @Book{KumarVaraiya1986 year = {1986}, } +@Article{Kunnumkal2008, + author = {Kunnumkal, Sumit and Topaloglu, Huseyin}, + title = {Exploiting the Structural Properties of the Underlying Markov Decision Problem in the Q-Learning Algorithm}, + journal = {INFORMS Journal on Computing}, + year = {2008}, + volume = {20}, + number = {2}, + pages = {288--301}, + month = may, + issn = {1526-5528}, + doi = {10.1287/ijoc.1070.0240}, + publisher = {Institute for Operations Research and the Management Sciences (INFORMS)}, +} + @Book{Kushner1997, title = {Stochastic Approximation Algorithms and Applications}, publisher = {Springer New York}, @@ -2156,6 +2184,20 @@ @Article{Ross1974 publisher = {Applied Probability Trust}, } +@Article{Roy2022, + author = {Roy, Arghyadip and Borkar, Vivek and Karandikar, Abhay and Chaporkar, Prasanna}, + title = {Online Reinforcement Learning of Optimal Threshold Policies for {M}arkov Decision Processes}, + journal = {IEEE Transactions on Automatic Control}, + year = {2022}, + volume = {67}, + number = {7}, + pages = {3722--3729}, + month = jul, + issn = {2334-3303}, + doi = {10.1109/tac.2021.3108121}, + publisher = {Institute of Electrical and Electronics Engineers (IEEE)}, +} + @PhdThesis{Sandell1974, author = {Sandell, Nils R., Jr}, title = {Control of Finite-State, Finite-Memory Stochastic Systems},