Merge branch 'master' of github.com:lchenat/qmc

liychen · liychen · commit 778d5a252289 · 2019-06-16T11:31:24.000-07:00
diff --git a/gen_exps.py b/gen_exps.py
@@ -53,7 +53,7 @@ def post_variant(variant):
     generate_args('exps/search_learn_{}'.format(kwargs['--env']), args, kwargs, variants, post_variant=post_variant, shuffle=shuffle)
 
 @cmd()
-def search_network(touch: int=1, shuffle: int=0):
+def search_network_std(touch: int=1, shuffle: int=0):
     variants = {
         '--n_trajs': [60, 100, 150, 200, 300],
         '-lr': [0.0001, 0.0005, 0.001],
@@ -73,7 +73,7 @@ def search_network(touch: int=1, shuffle: int=0):
     def post_variant(variant):
         variant['--save_fn'] = 'data/search_network/{}-{}-{}-{}'.format(*[variant[k] for k in ['--n_trajs', '-lr', '-H', '--init_scale']])
         return variant
-    generate_args('exps/search_network', args, kwargs, variants, post_variant=post_variant, shuffle=shuffle)
+    generate_args('exps/search_network_std', args, kwargs, variants, post_variant=post_variant, shuffle=shuffle)
 
 
 if __name__ == "__main__":
diff --git a/models.py b/models.py
@@ -33,28 +33,28 @@ def __init__(
         state_dim,
         action_dim,
         mean_network,
-        learn_std=False,
+        learn_std=True,
     ):
         super().__init__()
         self.mean = mean_network
         self.std = torch.zeros(action_dim)
         if learn_std: self.std = nn.Parameter(self.std)
+        self.learn_std = learn_std
         self.to(Config.DEVICE)
 
     def distribution(self, obs):
         obs = tensor(obs)
-        mean = self.mean(obs)
-        dist = torch.distributions.Normal(mean, tensor(torch.ones_like(self.std)))
-        #mean = torch.tanh(self.mean(obs))
-        #dist = torch.distributions.Normal(mean, F.softplus(self.std))
-        #log_prob = dist.log_prob(action).sum(-1).unsqueeze(-1)
+        if self.learn_std:
+            dist = torch.distributions.Normal(torch.tanh(self.mean(obs)), F.softplus(self.std))
+        else:
+            dist = torch.distributions.Normal(self.mean(obs), self.std)
         return dist 
 
     def forward(self, obs, noise):
         #:: there is an issue with gpu of multiprocessing, unless you want to have one GPU each process, it is not worth it.
         obs = tensor(obs)
-        #mean = torch.tanh(self.mean(obs)) # bounded action!!!
-        mean = self.mean(obs)
-        #action = mean + tensor(noise) * F.softplus(self.std)
-        action = mean + tensor(noise)
+        if self.learn_std:
+            action = torch.tanh(self.mean(obs)) + tensor(noise) * F.softplus(self.std)
+        else:
+            action = self.mean(obs) + tensor(noise)
         return action.cpu().detach().numpy()
diff --git a/qmc.ipynb b/qmc.ipynb