From e6630629ef1fdd63e19b01e3c7620afe251b432a Mon Sep 17 00:00:00 2001 From: Jeffer Peng Date: Thu, 29 Apr 2021 17:08:29 -0400 Subject: [PATCH] remove previous temporary solution for stopping simulation when vehicle offroad --- examples/game_of_tag/run_checkpoint.py | 6 ------ examples/game_of_tag/tag_adapters.py | 18 ++++++++++++------ smarts/env/rllib_hiway_env.py | 8 +------- 3 files changed, 13 insertions(+), 19 deletions(-) diff --git a/examples/game_of_tag/run_checkpoint.py b/examples/game_of_tag/run_checkpoint.py index fcd6b62b8f..53c7a767fc 100644 --- a/examples/game_of_tag/run_checkpoint.py +++ b/examples/game_of_tag/run_checkpoint.py @@ -166,12 +166,6 @@ def main(scenario, headless, checkpoint_path, seed, num_episodes): observations, rewards, dones, infos = env.step(actions) episode.record_step(observations, rewards, dones, infos) - # TODO temporary solution for game of tag: stop the episode when 1 vehicle is done - # so that the other vehicle does not train when the opponent is not present, which - # causes noisy in training - for key in dones: - if dones[key]: - dones["__all__"] = True env.close() diff --git a/examples/game_of_tag/tag_adapters.py b/examples/game_of_tag/tag_adapters.py index 5ddb0ed997..85b7e34af5 100644 --- a/examples/game_of_tag/tag_adapters.py +++ b/examples/game_of_tag/tag_adapters.py @@ -181,9 +181,12 @@ def predator_reward_adapter(observations, env_reward_signal): # if no prey vehicle avaliable, have 0 reward instead # TODO: Test to see if this is neccessary - prey_vehicles = list(filter( - lambda v: _is_vehicle_wanted(v.id, PREY_IDS), observations.neighborhood_vehicle_states, - )) + prey_vehicles = list( + filter( + lambda v: _is_vehicle_wanted(v.id, PREY_IDS), + observations.neighborhood_vehicle_states, + ) + ) return rew if len(prey_vehicles) > 0 else 0 @@ -218,7 +221,10 @@ def prey_reward_adapter(observations, env_reward_signal): # if no predator vehicle avaliable, have 0 reward instead # TODO: Test to see if this is neccessary - predator_vehicles = list(filter( - lambda v: _is_vehicle_wanted(v.id, PREDATOR_IDS), observations.neighborhood_vehicle_states, - )) + predator_vehicles = list( + filter( + lambda v: _is_vehicle_wanted(v.id, PREDATOR_IDS), + observations.neighborhood_vehicle_states, + ) + ) return rew if len(predator_vehicles) > 0 else 0 diff --git a/smarts/env/rllib_hiway_env.py b/smarts/env/rllib_hiway_env.py index 9ab781b32d..dd791ddffc 100644 --- a/smarts/env/rllib_hiway_env.py +++ b/smarts/env/rllib_hiway_env.py @@ -149,13 +149,7 @@ def step(self, agent_actions): for done in dones.values(): self._dones_registered += 1 if done else 0 - # TODO temporary solution for game of tag: stop the episode when 1 vehicle is done - # so that the other vehicle does not train when the opponent is not present, which - # causes noisy in training - dones["__all__"] = self._dones_registered > 0 - if dones["__all__"]: - for id in dones: - dones[id] = True + dones["__all__"] = self._dones_registered == len(self._agent_specs) return observations, rewards, dones, infos