Sha-Lab
diff --git a/‎.config‎
Lines changed: 1 addition & 1 deletion b/‎.config‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Makefile‎
Lines changed: 0 additions & 3 deletions b/‎Makefile‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎arqmc.py‎
Lines changed: 4 additions & 3 deletions b/‎arqmc.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎environment.yml‎
Lines changed: 9 additions & 8 deletions b/‎environment.yml‎
Lines changed: 9 additions & 8 deletions
diff --git a/‎envs/gridworld/maps/four_ends.txt‎
Lines changed: 31 additions & 0 deletions b/‎envs/gridworld/maps/four_ends.txt‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎envs/gridworld/pointmass.py‎
Lines changed: 80 additions & 19 deletions b/‎envs/gridworld/pointmass.py‎
Lines changed: 80 additions & 19 deletions
diff --git a/‎experiments/cost_lqr.run‎
Lines changed: 0 additions & 12 deletions b/‎experiments/cost_lqr.run‎
Lines changed: 0 additions & 12 deletions
diff --git a/‎exps_utils/__init__.py‎ b/‎exps_utils/__init__.py‎
@@ -1 +1 @@
-{"exp_path": "pj/qmc"}
+{"exp_path": "pj/qmc/qmc"}
@@ -7,7 +7,7 @@
 from pathlib import Path
 from ipdb import launch_ipdb_on_exception
 
-import exp_utils.run
+import exps.utils.run
 from envs import Brownian, LQR
 #from rqmc_distributions.dist_rqmc import Uniform_RQMC, Normal_RQMC
 from rqmc_distributions import Normal_RQMC, Uniform_RQMC
@@ -31,7 +31,7 @@ def parse_args(args=None):
     parser.add_argument('--algos', type=str, nargs='+', default=['mc', 'rqmc', 'arqmc'])
     parser.add_argument('--exp_name', type=str, default=None)
     parser.add_argument('--seed', type=int, default=None)
-    return exp_utils.run.parse_args(parser, args, exp_name_attr='exp_name')
+    return exps.utils.run.parse_args(parser, args, exp_name_attr='exp_name')
 
 ### tasks ### (estimate cost, learn)
 
@@ -88,7 +88,8 @@ def brownian(args):
             states = [env.reset() for env in envs]
             dones = [False for _ in range(args.n_trajs)]
             uniform_noises = ssj_uniform(args.n_trajs, 1) # n_trajs , action_dim
-            noises = uniform2normal(random_shift(np.expand_dims(uniform_noises, 1).repeat(args.horizon, 1), 0))
+            noises = uniform2normal(random_shift(np.expand_dims(uniform_noises, 1).repeat(args.horizon, 1), 0)) # n_trajs, horizon, action_dim
+            import ipdb; ipdb.set_trace()
             for j in range(args.horizon):
                 if np.all(dones): break
                 envs, states, dones, returns = zip(*sorted(zip(envs, states, dones, returns), key=lambda x: np.inf if x[2] else x[1]))
 
@@ -4,7 +4,7 @@ channels:
   - https://repo.continuum.io/pkgs/free
   - defaults
 dependencies:
-  - blas=1.0=mkl
+  #- blas=1.0=mkl
   - ca-certificates=2019.1.23
   - certifi=2018.11.29
   - cffi=1.12.1
@@ -19,9 +19,9 @@ dependencies:
   - libpng=1.6.36
   - libstdcxx-ng=8.2.0
   - libtiff=4.0.10
-  - mkl=2019.1
-  - mkl_fft=1.0.10
-  - mkl_random=1.0.2
+  #- mkl=2019.1
+  #- mkl_fft=1.0.10
+  #- mkl_random=1.0.2
   - ncurses=6.1
   - ninja=1.8.2
   - numpy=1.16.2
@@ -59,7 +59,7 @@ dependencies:
     - filelock==3.0.10
     - future==0.17.1
     - glfw==1.8.1
-    - gym==0.12.0
+    - gym==0.15.3
     - idna==2.8
     - imageio==2.5.0
     - ipdb==0.11
@@ -80,7 +80,7 @@ dependencies:
     - markupsafe==1.1.1
     - matplotlib==3.0.3
     - mistune==0.8.4
-    - mujoco-py==2.0.2.2
+    #- mujoco-py==2.0.2.2
     - nbconvert==5.4.1
     - nbformat==4.4.0
     - notebook==5.7.6
@@ -89,7 +89,7 @@ dependencies:
     - pandas==0.24.1
     - pandocfilters==1.4.2
     - parso==0.3.4
-    - particles==0.1
+    #- particles==0.1
     - pexpect==4.6.0
     - pickleshare==0.7.5
     - prometheus-client==0.6.0
@@ -118,4 +118,5 @@ dependencies:
     - wcwidth==0.1.7
     - webencodings==0.5.1
     - widgetsnbextension==3.4.2
-
+    - py4j
+    - randopt
@@ -0,0 +1,31 @@
+###############################
+###############################
+############### ###############
+############### ###############
+############### ###############
+############### ###############
+############### ###############
+############### ###############
+############### ###############
+############### ###############
+############### ###############
+############### ###############
+############### ###############
+############### ###############
+############### ###############
+#                             #
+############### ###############
+############### ###############
+############### ###############
+############### ###############
+############### ###############
+############### ###############
+############### ###############
+############### ###############
+############### ###############
+############### ###############
+############### ###############
+############### ###############
+############### ###############
+############### ###############
+###############################
@@ -5,83 +5,87 @@
 import matplotlib.pyplot as plt
 from .utils import Render, color_interpolate
 
+
 CUR_DIR = os.path.dirname(__file__)
 
+
 def read_map(filename):
     m = []
     with open(filename) as f:
         for row in f:
             m.append(list(row.rstrip()))
     return m
 
+
 def get_grid_position(x, y):
     return int(x), int(y)
 
+
 def sample_pos(m, exclude=set(), rng=np.random):
     while True:
         x = rng.uniform(len(m))
         y = rng.uniform(len(m[0]))
         x, y = get_grid_position(x, y)
         if (m[x][y] != '#') and ((x, y) not in exclude): return np.array([x, y])
 
+
 colormap = {
     ' ': color_interpolate(0.0, plt.cm.Greys(0.02), plt.cm.Greys(0.2)),
     '@': color_interpolate(0.0, plt.cm.Greys(0.02), plt.cm.Greys(0.2)),
     '#': color_interpolate(0.0, plt.cm.Greys(0.12), plt.cm.Greys(0.3)), 
 }
 
+
 # push everything into wrapper (sample init position, sample goal, change map etc)
 # for MDP, state should contains all information, which means that to simulate in parallel n rollouts, you only need one environment and n states
 class PointMass(gym.Env):
     def __init__(
         self,
         map_name,
-        goal=None,
         init_pos=None,
         n_sub_steps=10,
-        done_threshold=0.8,
         seed=0,
     ):
         self.map = read_map(os.path.join(CUR_DIR, 'maps', '{}.txt'.format(map_name)))
         self.row, self.col = len(self.map), len(self.map[0])
         self.seed(seed)
-        if goal is None:
-            goal = sample_pos(self.map, rng=self.rng)
-        self.goal = goal
-        if init_pos is None:
-            init_pos = sample_pos(self.map, {tuple(goal)})
-        assert init_pos[0] > 0 and init_pos[0] < self.row and init_pos[1] > 0 and init_pos[1] < self.col
-        self.init_pos = np.asarray(init_pos)
+        if init_pos is not None:
+            self.load_params({'init_pos': init_pos})
+        else:
+            self._init_pos = None
         self.n_sub_steps = n_sub_steps
         self.done_threshold = done_threshold
 
         self.observation_space = gym.spaces.Box(np.array([0.0, 0.0]), np.array([self.row, self.col]))
         self.action_space = gym.spaces.Box(np.array([-1.0, -1.0]), np.array([1.0, 1.0]))
         self._render = None
 
+    def load_params(self, params):
+        if 'init_pos' in params:
+            assert 0 < init_pos[0] < self.row and 0 < init_pos[1] < self.col
+            self._init_pos = np.array(params['init_pos'])
+
     def seed(self, seed=None):
         self.rng, _ = seeding.np_random(seed)
 
     def reset(self):
-        self.pos = self.init_pos
+        assert self._init_pos is not None
+        self.pos = self._init_pos
         return self.pos
 
     def _is_blocked(self, pos):
         x, y = get_grid_position(*pos)
         return self.map[x][y] == '#'
 
-    def step(self, action):
-        assert not self._is_blocked(self.pos), 'start position in the wall'
-        action = np.clip(action, self.action_space.low, self.action_space.high)
+    def transition(self, pos, action):
+        assert not self._is_blocked(pos), 'start position in the wall'
+        action = np.clip(action, self.action_space.low, self.action_space.high) # might cause problem
         dpos = 1.0 / self.n_sub_steps
         for _ in range(self.n_sub_steps):
-            next_pos = self.pos + action * dpos
+            next_pos = pos + action * dpos
             if self._is_blocked(next_pos): break
-            self.pos = next_pos
-        dist = np.linalg.norm(self.goal - self.pos)
-        r = -dist
-        done = dist < self.done_threshold
-        return self.pos, r, done, {}
+            pos = next_pos
+        return pos
 
     def render(self, repeat=32):    
         self.init_render(repeat)
@@ -101,6 +105,63 @@ def init_render(self, repeat):
           self._render = Render(size=(self.col * repeat, self.row * repeat))
       return self
 
+
+class ReachPointMass(PointMass):
+    def __init__(
+        self,
+        map_name,
+        goal=None,
+        init_pos=None,
+        n_sub_steps=10,
+        done_threshold=0.8,
+        seed=0,
+    ):
+        super().__init__(map_name, n_sub_steps=n_sub_steps, seed=seed)
+        self.done_threshold = done_threshold
+        if init_pos is not None:
+            self.load_params({'init_pos': init_pos})
+        else:
+            self._init_pos = None
+        if goal is not None:
+            self.load_params({'goal': goal})
+        else:
+            self._goal = None
+
+    def load_params(self, params):
+        super().load_params(params)
+        if 'goal' in params:
+            self._goal = goal
+        if self._init_pos is not None and self._goal is not None:
+            assert get_grid_position(*self._init_pos) != get_grid_position(*self._goal)
+
+    def step(self, action):
+        self.pos = self.transition(self.pos, action)
+        dist = np.linalg.norm(self.goal - self.pos)
+        r = -dist
+        done = dist < self.done_threshold
+        return self.pos, r, done, {}
+
+
+class GaussianMixtureRewardPointMass(PointMass):
+    def __init__(
+        self,
+        map_name,
+        gaussians=[], # c, mean, sigma
+        init_pos=None,
+        n_sub_steps=10,
+        seed=0,
+    ):
+        super().__init__(map_name, init_pos=init_pos, n_sub_steps=n_sub_steps, seed=seed)
+        self.gaussians = gaussians
+
+    def step(self, action):
+        self.pos = self.transition(self.pos, action)
+        r = 0.0
+        for c, mean, sigma in self.gaussians:
+            r += c * np.exp(-(self.pos - mean).square().sum() / sigma)
+        return self.pos, r, False, {}
+
+
 class GaussianActionNoiseWrapper(gym.Wrapper):
     def __init__(self, env, scale, seed=None):
         super().__init__(self, env)
 
@@ -1,12 +0,0 @@
- --env lqr --exp_name cost_lqr/H_[horizon]-T[n_trajs] --n_runs 30 --seed 0 --sorter value norm group permute --horizon 20 --n_trajs 32
- --env lqr --exp_name cost_lqr/H_[horizon]-T[n_trajs] --n_runs 30 --seed 0 --sorter value norm group permute --horizon 20 --n_trajs 64
- --env lqr --exp_name cost_lqr/H_[horizon]-T[n_trajs] --n_runs 30 --seed 0 --sorter value norm group permute --horizon 20 --n_trajs 128
- --env lqr --exp_name cost_lqr/H_[horizon]-T[n_trajs] --n_runs 30 --seed 0 --sorter value norm group permute --horizon 20 --n_trajs 256
- --env lqr --exp_name cost_lqr/H_[horizon]-T[n_trajs] --n_runs 30 --seed 0 --sorter value norm group permute --horizon 20 --n_trajs 512
- --env lqr --exp_name cost_lqr/H_[horizon]-T[n_trajs] --n_runs 30 --seed 0 --sorter value norm group permute --horizon 20 --n_trajs 1024
- --env lqr --exp_name cost_lqr/H_[horizon]-T[n_trajs] --n_runs 30 --seed 0 --sorter value norm group permute --horizon 40 --n_trajs 32
- --env lqr --exp_name cost_lqr/H_[horizon]-T[n_trajs] --n_runs 30 --seed 0 --sorter value norm group permute --horizon 40 --n_trajs 64
- --env lqr --exp_name cost_lqr/H_[horizon]-T[n_trajs] --n_runs 30 --seed 0 --sorter value norm group permute --horizon 40 --n_trajs 128
- --env lqr --exp_name cost_lqr/H_[horizon]-T[n_trajs] --n_runs 30 --seed 0 --sorter value norm group permute --horizon 40 --n_trajs 256
- --env lqr --exp_name cost_lqr/H_[horizon]-T[n_trajs] --n_runs 30 --seed 0 --sorter value norm group permute --horizon 40 --n_trajs 512
- --env lqr --exp_name cost_lqr/H_[horizon]-T[n_trajs] --n_runs 30 --seed 0 --sorter value norm group permute --horizon 40 --n_trajs 1024
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-{"exp_path": "pj/qmc"}`
	`1`	`+{"exp_path": "pj/qmc/qmc"}`