remove previous temporary solution for stopping simulation when vehic…

…le offroad
huawei-noah · Apr 29, 2021 · e663062 · e663062
1 parent 1d2caa2
commit e663062
Show file tree

Hide file tree

Showing 3 changed files with 13 additions and 19 deletions.
diff --git a/examples/game_of_tag/run_checkpoint.py b/examples/game_of_tag/run_checkpoint.py
@@ -166,12 +166,6 @@ def main(scenario, headless, checkpoint_path, seed, num_episodes):
 
             observations, rewards, dones, infos = env.step(actions)
             episode.record_step(observations, rewards, dones, infos)
-            # TODO temporary solution for game of tag: stop the episode when 1 vehicle is done
-            # so that the other vehicle does not train when the opponent is not present, which
-            # causes noisy in training
-            for key in dones:
-                if dones[key]:
-                    dones["__all__"] = True
 
     env.close()
 

diff --git a/examples/game_of_tag/tag_adapters.py b/examples/game_of_tag/tag_adapters.py
@@ -181,9 +181,12 @@ def predator_reward_adapter(observations, env_reward_signal):
 
     # if no prey vehicle avaliable, have 0 reward instead
     # TODO: Test to see if this is neccessary
-    prey_vehicles = list(filter(
-        lambda v: _is_vehicle_wanted(v.id, PREY_IDS), observations.neighborhood_vehicle_states,
-    ))
+    prey_vehicles = list(
+        filter(
+            lambda v: _is_vehicle_wanted(v.id, PREY_IDS),
+            observations.neighborhood_vehicle_states,
+        )
+    )
     return rew if len(prey_vehicles) > 0 else 0
 
 
@@ -218,7 +221,10 @@ def prey_reward_adapter(observations, env_reward_signal):
 
     # if no predator vehicle avaliable, have 0 reward instead
     # TODO: Test to see if this is neccessary
-    predator_vehicles = list(filter(
-        lambda v: _is_vehicle_wanted(v.id, PREDATOR_IDS), observations.neighborhood_vehicle_states,
-    ))
+    predator_vehicles = list(
+        filter(
+            lambda v: _is_vehicle_wanted(v.id, PREDATOR_IDS),
+            observations.neighborhood_vehicle_states,
+        )
+    )
     return rew if len(predator_vehicles) > 0 else 0
diff --git a/smarts/env/rllib_hiway_env.py b/smarts/env/rllib_hiway_env.py
@@ -149,13 +149,7 @@ def step(self, agent_actions):
 
         for done in dones.values():
             self._dones_registered += 1 if done else 0
-        # TODO temporary solution for game of tag: stop the episode when 1 vehicle is done
-        # so that the other vehicle does not train when the opponent is not present, which
-        # causes noisy in training
-        dones["__all__"] = self._dones_registered > 0
-        if dones["__all__"]:
-            for id in dones:
-                dones[id] = True
+        dones["__all__"] = self._dones_registered == len(self._agent_specs)
 
         return observations, rewards, dones, infos