From 654d4207dce218c5aa2d555e190f28603de9bceb Mon Sep 17 00:00:00 2001
From: Aditya Mahajan <adityam@umich.edu>
Date: Fri, 9 Feb 2024 15:43:36 +0100
Subject: [PATCH] Expanding on the notes

---
 mdps/monotone-mdps.qmd |  8 +++++++-
 references.bib         | 42 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/mdps/monotone-mdps.qmd b/mdps/monotone-mdps.qmd
index d4194e12..01a42605 100644
--- a/mdps/monotone-mdps.qmd
+++ b/mdps/monotone-mdps.qmd
@@ -769,4 +769,10 @@ d. Now suppose the distribution of $W_t$ is not Gaussian but is some
 
 Stochastic dominance has been employed in various areas of economics, finance, and statistics since the 1930s. See @Levy1992 and @Levy2015 for detailed overviews. The notion of stochastic monotonicity for Markov chains is due to @Daley1968. For a generalization of stochastic monotonicity to continuous state spaces, see @Serfozo1976. The characterization of stochastic monotonicity in @exr-sd-1--@exr-sd-4 are due to @Keilson1977.
 
-@Ross1974 has an early treatment of monotonicity of optimal policies. The general theory was developed by @Topkis1998. The presentation here follows @Puterman2014. @exr-folded-monotonicity is from @Chakravorty2018.
+@Ross1974 has an early treatment of monotonicity of optimal policies. The general theory was developed by @Topkis1998. An alternative treatment for queueing models is presented in @Koole2006. The presentation here follows @Puterman2014. 
+
+The properties here are derived for finite horizon models. General conditions under which such properties extend to infinite horizon models are presented in @Smith2002. 
+
+There are many recent papers which leverage the structural properties of value functions and optimal policy in reinforcement learning. For example @Kunnumkal2008 and @Fu2012 present variants of Q-learning which exploit properties of the value function and @Roy2022 present a variant of policy-learning which exploits properties of the optimal policy.
+
+@exr-folded-monotonicity is from @Chakravorty2018.
diff --git a/references.bib b/references.bib
index 91d610b4..4e929904 100644
--- a/references.bib
+++ b/references.bib
@@ -996,6 +996,20 @@ @Article{Frostig2016
   url      = {http://dx.doi.org/10.1007/s10479-013-1523-0},
 }
 
+@Article{Fu2012,
+  author    = {Fangwen Fu and van der Schaar, M.},
+  title     = {Structure-Aware Stochastic Control for Transmission Scheduling},
+  journal   = {IEEE Transactions on Vehicular Technology},
+  year      = {2012},
+  volume    = {61},
+  number    = {9},
+  pages     = {3931--3945},
+  month     = nov,
+  issn      = {1939-9359},
+  doi       = {10.1109/tvt.2012.2213850},
+  publisher = {Institute of Electrical and Electronics Engineers (IEEE)},
+}
+
 @InProceedings{Fu2018,
   author    = {Fu, Michael C.},
   title     = {Monte Carlo Tree Search: A tutorial},
@@ -1565,6 +1579,20 @@ @Book{KumarVaraiya1986
   year      = {1986},
 }
 
+@Article{Kunnumkal2008,
+  author    = {Kunnumkal, Sumit and Topaloglu, Huseyin},
+  title     = {Exploiting the Structural Properties of the Underlying Markov Decision Problem in the Q-Learning Algorithm},
+  journal   = {INFORMS Journal on Computing},
+  year      = {2008},
+  volume    = {20},
+  number    = {2},
+  pages     = {288--301},
+  month     = may,
+  issn      = {1526-5528},
+  doi       = {10.1287/ijoc.1070.0240},
+  publisher = {Institute for Operations Research and the Management Sciences (INFORMS)},
+}
+
 @Book{Kushner1997,
   title     = {Stochastic Approximation Algorithms and Applications},
   publisher = {Springer New York},
@@ -2156,6 +2184,20 @@ @Article{Ross1974
   publisher = {Applied Probability Trust},
 }
 
+@Article{Roy2022,
+  author    = {Roy, Arghyadip and Borkar, Vivek and Karandikar, Abhay and Chaporkar, Prasanna},
+  title     = {Online Reinforcement Learning of Optimal Threshold Policies for {M}arkov Decision Processes},
+  journal   = {IEEE Transactions on Automatic Control},
+  year      = {2022},
+  volume    = {67},
+  number    = {7},
+  pages     = {3722--3729},
+  month     = jul,
+  issn      = {2334-3303},
+  doi       = {10.1109/tac.2021.3108121},
+  publisher = {Institute of Electrical and Electronics Engineers (IEEE)},
+}
+
 @PhdThesis{Sandell1974,
   author  = {Sandell, Nils R., Jr},
   title   = {Control of Finite-State, Finite-Memory Stochastic Systems},