diff --git a/examples/game_of_tag/README.md b/examples/game_of_tag/README.md
new file mode 100644
index 0000000000..03cd28d98e
--- /dev/null
+++ b/examples/game_of_tag/README.md
@@ -0,0 +1,49 @@
+# Game of Tag
+This directory contains a a multi-agent adversarial training demo. In the demo, there is a predator vehicle and a prey vehicle.
+The predator vehicle's goal is to catch the prey, and the prey vehicle's goal is to avoid getting caught.
+
+## Run training
+python examples/game_of_tag/game_of_tag.py examples/game_of_tag/scenarios/game_of_tag_demo_map/
+
+## Run checkpoint
+python examples/game_of_tag/run_checkpoint.py examples/game_of_tag/scenarios/game_of_tag_demo_map/
+
+## Setup:
+### Rewards
+The formula for reward is 0.5/(distance-COLLIDE_DISTANCE)^2 and capped at 10
+
+- COLLIDE_DISTANCE is the observed distance when two vehicle collides. Since the position of two vehicle is at the center, the distance when collesion happens is not exactly 0.
+
+### Common Reward:
+ Off road: -10
+
+#### Prey:
+ Collision with predator: -10
+ Distance to predator(d): 0.5/(d-COLLIDE_DISTANCE)^2
+#### Predator:
+ Collision with predator: -10
+ Distance to predator(d): 0.5/(d-COLLIDE_DISTANCE)^2
+
+### Action:
+Speed selection in m/s: [0, 3, 6, 9]
+
+Lane change selection relative to current lane: [-1, 0, 1]
+
+## Output a model:
+Currently Rllib does not have implementation for exporting a pytorch model.
+
+Replace `export_model`'s implementation in `ray/rllib/policy/torch_policy.py` to the following:
+```
+torch.save(self.model.state_dict(),f"{export_dir}/model.pt")
+```
+Then follow the steps in game_of_tag.py to export the model.
+
+## Possible next steps
+- Increase the number of agents to 2 predators and 2 prey.
+This requires modelling the reward to still be a zero sum game. The complication can be understood from
+how to model the distance reward between 2 predators and 1 prey. If the reward is only from nearest predator
+to nearest prey, the sum of predator and prey rewards will no longer be 0 because 2 predators will be getting full
+reward from 1 prey but the prey will only get full reward from 1 predator. This will require the predators to know about each
+other or the prey to know about other prey, and the prey to know about multiple predators.
+- Add an attribute in observations to display whether the ego car is in front of the target vehicle or behind it, this may
+help to let ego vehicle know whether it should slow down or speed up
\ No newline at end of file
diff --git a/examples/game_of_tag/game_of_tag.py b/examples/game_of_tag/game_of_tag.py
new file mode 100644
index 0000000000..fafd3620cb
--- /dev/null
+++ b/examples/game_of_tag/game_of_tag.py
@@ -0,0 +1,270 @@
+"""Let's play tag!
+
+A predator-prey multi-agent example built on top of RLlib to facilitate further
+developments on multi-agent support for HiWay (including design, performance,
+research, and scaling).
+
+The predator and prey use separate policies. A predator "catches" its prey when
+it collides into the other vehicle. There can be multiple predators and
+multiple prey in a map. Social vehicles act as obstacles where both the
+predator and prey must avoid them.
+"""
+import argparse
+import os
+import random
+import multiprocessing
+import ray
+
+
+import numpy as np
+from typing import List
+from ray import tune
+from ray.rllib.utils import try_import_tf
+from ray.rllib.models import ModelCatalog
+from ray.tune import Stopper
+from ray.rllib.models.tf.fcnet import FullyConnectedNetwork
+from ray.tune.schedulers import PopulationBasedTraining
+from ray.rllib.agents.ppo import PPOTrainer
+from pathlib import Path
+
+from smarts.env.rllib_hiway_env import RLlibHiWayEnv
+from smarts.core.agent import AgentSpec, Agent
+from smarts.core.controllers import ActionSpaceType
+from smarts.core.agent_interface import AgentInterface, AgentType, DoneCriteria
+from smarts.core.utils.file import copy_tree
+
+
+from examples.game_of_tag.tag_adapters import *
+from examples.game_of_tag.model import CustomFCModel
+
+
+# Add custom metrics to your tensorboard using these callbacks
+# see: https://ray.readthedocs.io/en/latest/rllib-training.html#callbacks-and-custom-metrics
+def on_episode_start(info):
+ episode = info["episode"]
+ print("episode {} started".format(episode.episode_id))
+
+
+def on_episode_step(info):
+ episode = info["episode"]
+ single_agent_id = list(episode._agent_to_last_obs)[0]
+ obs = episode.last_raw_obs_for(single_agent_id)
+
+
+def on_episode_end(info):
+ episode = info["episode"]
+
+
+def explore(config):
+ # ensure we collect enough timesteps to do sgd
+ if config["train_batch_size"] < config["sgd_minibatch_size"] * 2:
+ config["train_batch_size"] = config["sgd_minibatch_size"] * 2
+ # ensure we run at least one sgd iter
+ if config["num_sgd_iter"] < 1:
+ config["num_sgd_iter"] = 1
+ return config
+
+
+PREDATOR_POLICY = "predator_policy"
+PREY_POLICY = "prey_policy"
+
+
+def policy_mapper(agent_id):
+ if agent_id in PREDATOR_IDS:
+ return PREDATOR_POLICY
+ elif agent_id in PREY_IDS:
+ return PREY_POLICY
+
+
+class TimeStopper(Stopper):
+ def __init__(self):
+ self._start = time.time()
+ # Currently will see obvious tag behaviour in 6 hours
+ self._deadline = 48 * 60 * 60 # train for 48 hours
+
+ def __call__(self, trial_id, result):
+ return False
+
+ def stop_all(self):
+ return time.time() - self._start > self._deadline
+
+
+tf = try_import_tf()
+
+ModelCatalog.register_custom_model("CustomFCModel", CustomFCModel)
+
+rllib_agents = {}
+
+shared_interface = AgentInterface(
+ max_episode_steps=1500,
+ neighborhood_vehicles=True,
+ waypoints=True,
+ action=ActionSpaceType.LaneWithContinuousSpeed,
+)
+shared_interface.done_criteria = DoneCriteria(
+ off_route=False,
+ wrong_way=False,
+ collision=True,
+)
+
+for agent_id in PREDATOR_IDS:
+ rllib_agents[agent_id] = {
+ "agent_spec": AgentSpec(
+ interface=shared_interface,
+ agent_builder=lambda: TagModelAgent(
+ os.path.join(os.path.dirname(os.path.realpath(__file__)), "model"),
+ OBSERVATION_SPACE,
+ ),
+ observation_adapter=observation_adapter,
+ reward_adapter=predator_reward_adapter,
+ action_adapter=action_adapter,
+ ),
+ "observation_space": OBSERVATION_SPACE,
+ "action_space": ACTION_SPACE,
+ }
+
+for agent_id in PREY_IDS:
+ rllib_agents[agent_id] = {
+ "agent_spec": AgentSpec(
+ interface=shared_interface,
+ agent_builder=lambda: TagModelAgent(
+ os.path.join(os.path.dirname(os.path.realpath(__file__)), "model"),
+ OBSERVATION_SPACE,
+ ),
+ observation_adapter=observation_adapter,
+ reward_adapter=prey_reward_adapter,
+ action_adapter=action_adapter,
+ ),
+ "observation_space": OBSERVATION_SPACE,
+ "action_space": ACTION_SPACE,
+ }
+
+
+def build_tune_config(scenario, headless=True, sumo_headless=False):
+ rllib_policies = {
+ policy_mapper(agent_id): (
+ None,
+ rllib_agent["observation_space"],
+ rllib_agent["action_space"],
+ {"model": {"custom_model": "CustomFCModel"}},
+ )
+ for agent_id, rllib_agent in rllib_agents.items()
+ }
+
+ tune_config = {
+ "env": RLlibHiWayEnv,
+ "framework": "torch",
+ "log_level": "WARN",
+ "num_workers": 3,
+ "explore": True,
+ "horizon": 10000,
+ "env_config": {
+ "seed": 42,
+ "sim_name": "game_of_tag_works?",
+ "scenarios": [os.path.abspath(scenario)],
+ "headless": headless,
+ "sumo_headless": sumo_headless,
+ "agent_specs": {
+ agent_id: rllib_agent["agent_spec"]
+ for agent_id, rllib_agent in rllib_agents.items()
+ },
+ },
+ "multiagent": {
+ "policies": rllib_policies,
+ "policies_to_train": [PREDATOR_POLICY, PREY_POLICY],
+ "policy_mapping_fn": policy_mapper,
+ },
+ "callbacks": {
+ "on_episode_start": on_episode_start,
+ "on_episode_step": on_episode_step,
+ "on_episode_end": on_episode_end,
+ },
+ }
+ return tune_config
+
+
+def main(args):
+ pbt = PopulationBasedTraining(
+ time_attr="time_total_s",
+ metric="episode_reward_mean",
+ mode="max",
+ perturbation_interval=300,
+ resample_probability=0.25,
+ # Specifies the mutations of these hyperparams
+ hyperparam_mutations={
+ "lambda": lambda: random.uniform(0.9, 1.0),
+ "clip_param": lambda: random.uniform(0.01, 0.5),
+ "kl_coeff": lambda: 0.3,
+ "lr": [1e-3],
+ "sgd_minibatch_size": lambda: 128,
+ "train_batch_size": lambda: 4000,
+ "num_sgd_iter": lambda: 30,
+ },
+ custom_explore_fn=explore,
+ )
+ local_dir = os.path.expanduser(args.result_dir)
+
+ tune_config = build_tune_config(args.scenario)
+
+ tune.run(
+ PPOTrainer, # Rllib supports using PPO in multi-agent setting
+ name="lets_play_tag",
+ stop=TimeStopper(),
+ # XXX: Every X iterations perform a _ray actor_ checkpoint (this is
+ # different than _exporting_ a TF/PT checkpoint).
+ checkpoint_freq=5,
+ checkpoint_at_end=True,
+ # XXX: Beware, resuming after changing tune params will not pick up
+ # the new arguments as they are stored alongside the checkpoint.
+ resume=args.resume_training,
+ # restore="path_to_training_checkpoint/checkpoint_x/checkpoint-x",
+ local_dir=local_dir,
+ reuse_actors=True,
+ max_failures=0,
+ export_formats=["model", "checkpoint"],
+ config=tune_config,
+ scheduler=pbt,
+ )
+
+ # # To output a model
+ # # 1: comment out tune.run and uncomment the following code
+ # # 2: replace checkpoint path to training checkpoint path
+ # # 3: inject code in rllib according to README.md and run
+ # checkpoint_path = os.path.join(
+ # os.path.dirname(os.path.realpath(__file__)), "models/checkpoint_360/checkpoint-360"
+ # )
+ # ray.init(num_cpus=2)
+ # training_agent = PPOTrainer(env=RLlibHiWayEnv,config=tune_config)
+ # training_agent.restore(checkpoint_path)
+ # prefix = "model.ckpt"
+ # model_dir = os.path.join(
+ # os.path.dirname(os.path.realpath(__file__)), "models/predator_model"
+ # )
+ # training_agent.export_policy_model(model_dir, PREDATOR_POLICY)
+ # model_dir = os.path.join(
+ # os.path.dirname(os.path.realpath(__file__)), "models/prey_model"
+ # )
+ # training_agent.export_policy_model(model_dir, PREY_POLICY)
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser("rllib-example")
+ parser.add_argument(
+ "scenario",
+ type=str,
+ help="Scenario to run (see scenarios/ for some samples you can use)",
+ )
+ parser.add_argument(
+ "--resume_training",
+ default=False,
+ action="store_true",
+ help="Resume the last trained example",
+ )
+ parser.add_argument(
+ "--result_dir",
+ type=str,
+ default="~/ray_results",
+ help="Directory containing results (and checkpointing)",
+ )
+ args = parser.parse_args()
+ main(args)
diff --git a/examples/game_of_tag/model.py b/examples/game_of_tag/model.py
new file mode 100644
index 0000000000..17d0f595bb
--- /dev/null
+++ b/examples/game_of_tag/model.py
@@ -0,0 +1,37 @@
+import torch, gym
+from torch import nn
+from torch.distributions.normal import Normal
+from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
+from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFCNet
+
+
+class CustomFCModel(TorchModelV2, nn.Module):
+ """Example of interpreting repeated observations."""
+
+ def __init__(
+ self,
+ obs_space: gym.spaces.Space,
+ action_space: gym.spaces.Space,
+ num_outputs: int,
+ model_config,
+ name: str,
+ ):
+ super(CustomFCModel, self).__init__(
+ obs_space=obs_space,
+ action_space=action_space,
+ num_outputs=num_outputs,
+ model_config=model_config,
+ name=name,
+ )
+ nn.Module.__init__(self)
+
+ self.model = TorchFCNet(
+ obs_space, action_space, num_outputs, model_config, name
+ )
+
+ def forward(self, input_dict, state, seq_lens):
+
+ return self.model.forward(input_dict, state, seq_lens)
+
+ def value_function(self):
+ return self.model.value_function()
diff --git a/examples/game_of_tag/models/checkpoint_360/.is_checkpoint b/examples/game_of_tag/models/checkpoint_360/.is_checkpoint
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/examples/game_of_tag/models/checkpoint_360/checkpoint-360 b/examples/game_of_tag/models/checkpoint_360/checkpoint-360
new file mode 100644
index 0000000000..5be92c78cf
Binary files /dev/null and b/examples/game_of_tag/models/checkpoint_360/checkpoint-360 differ
diff --git a/examples/game_of_tag/models/checkpoint_360/checkpoint-360.tune_metadata b/examples/game_of_tag/models/checkpoint_360/checkpoint-360.tune_metadata
new file mode 100644
index 0000000000..29566817d4
Binary files /dev/null and b/examples/game_of_tag/models/checkpoint_360/checkpoint-360.tune_metadata differ
diff --git a/examples/game_of_tag/models/predator_model/model.pt b/examples/game_of_tag/models/predator_model/model.pt
new file mode 100644
index 0000000000..54671a2bff
Binary files /dev/null and b/examples/game_of_tag/models/predator_model/model.pt differ
diff --git a/examples/game_of_tag/models/prey_model/model.pt b/examples/game_of_tag/models/prey_model/model.pt
new file mode 100644
index 0000000000..54671a2bff
Binary files /dev/null and b/examples/game_of_tag/models/prey_model/model.pt differ
diff --git a/examples/game_of_tag/run_checkpoint.py b/examples/game_of_tag/run_checkpoint.py
new file mode 100644
index 0000000000..fcd6b62b8f
--- /dev/null
+++ b/examples/game_of_tag/run_checkpoint.py
@@ -0,0 +1,213 @@
+"""Let's play tag!
+
+A predator-prey multi-agent example built on top of RLlib to facilitate further
+developments on multi-agent support for HiWay (including design, performance,
+research, and scaling).
+
+The predator and prey use separate policies. A predator "catches" its prey when
+it collides into the other vehicle. There can be multiple predators and
+multiple prey in a map. Social vehicles act as obstacles where both the
+predator and prey must avoid them.
+"""
+import argparse
+import os
+import random
+import multiprocessing
+
+import gym
+import numpy as np
+import ray
+from ray import tune
+from ray.rllib.models import ModelCatalog
+from ray.rllib.utils import try_import_tf
+from ray.tune.schedulers import PopulationBasedTraining
+from ray.rllib.models.tf.fcnet import FullyConnectedNetwork
+from ray.rllib.agents.ppo import PPOTrainer
+
+from examples.game_of_tag.game_of_tag import shared_interface, build_tune_config
+from examples.game_of_tag.model import CustomFCModel
+from examples.game_of_tag.tag_adapters import (
+ OBSERVATION_SPACE,
+ PREDATOR_IDS,
+ PREY_IDS,
+ observation_adapter,
+ predator_reward_adapter,
+ prey_reward_adapter,
+)
+
+from smarts.env.rllib_hiway_env import RLlibHiWayEnv
+from smarts.core.agent import AgentSpec, Agent
+from smarts.core.agent_interface import AgentInterface, AgentType, DoneCriteria
+from smarts.core.utils.episodes import episodes
+from smarts.core.controllers import ActionSpaceType
+
+tf = try_import_tf()[1]
+
+# must use >3 cpus since training used 3 workers
+ray.init(num_cpus=4)
+
+
+ModelCatalog.register_custom_model("CustomFCModel", CustomFCModel)
+
+
+def action_adapter(model_action):
+ """Take in the action calculated by the model, and transform it to something that
+ SMARTS can understand.
+
+ The model returns a batched action (since it received a batched input). That is, the
+ action consists of actions for however many observations were passed to it in the
+ batch of observations it was given. We only gave it a batch of 1 observation in the
+ act(...) method of TagModelAgent.
+
+ The model outputs an action in the form of:
+ (
+ (
+ array([...]), # The speed.
+ array([...]), # The lane change.
+ ),
+ [],
+ {
+ '...': array([...]),
+ '...': array([[...]]),
+ '...': array([...]),
+ '...': array([...])
+ }
+ )
+
+ The action we care about is the first element of this tuple, get it with
+ model_action[0], so that speed = array([...]) and laneChange = array([...]). Convert
+ these arrays to scalars to index into speeds or subtract from it.
+ """
+ speed, laneChange = model_action[0]
+ speeds = [0, 3, 6, 9]
+ adapted_action = [speeds[speed.item()], laneChange.item() - 1]
+ return adapted_action
+
+
+class TagModelAgent(Agent):
+ def __init__(self, checkpoint_path, scenario, headless, policy_name):
+ assert os.path.isfile(checkpoint_path)
+ tune_config = build_tune_config(scenario, headless=headless)
+ self.agent = PPOTrainer(env=RLlibHiWayEnv, config=tune_config)
+ self.agent.restore(checkpoint_path)
+ self._policy_name = policy_name
+ self._prep = ModelCatalog.get_preprocessor_for_space(OBSERVATION_SPACE)
+
+ def act(self, observations):
+ """Receive an observation from the environment, and compute the agent's action.
+
+ The observation is a dictionary of an observation for a single agent. However,
+ the model expects a batched observation, that is, a list of observations. To fix
+ this, expand the dimensions of the observation from (n,) to (1, n) so that the
+ observation fits into the model's expected input size.
+ """
+ obs = self._prep.transform(observations)
+ obs = np.expand_dims(obs, 0)
+ action = self.agent.get_policy(self._policy_name).compute_actions(obs)
+ return action
+
+
+def main(scenario, headless, checkpoint_path, seed, num_episodes):
+ agent_specs = {}
+
+ for agent_id in PREDATOR_IDS:
+ agent_specs[agent_id] = AgentSpec(
+ interface=shared_interface,
+ agent_builder=lambda: TagModelAgent(
+ checkpoint_path, # assumes checkpoint exists
+ scenario,
+ headless,
+ "predator_policy",
+ ),
+ observation_adapter=observation_adapter,
+ reward_adapter=predator_reward_adapter,
+ action_adapter=action_adapter,
+ )
+
+ for agent_id in PREY_IDS:
+ agent_specs[agent_id] = AgentSpec(
+ interface=shared_interface,
+ agent_builder=lambda: TagModelAgent(
+ checkpoint_path, # assumes checkpoint exists
+ scenario,
+ headless,
+ "prey_policy",
+ ),
+ observation_adapter=observation_adapter,
+ reward_adapter=prey_reward_adapter,
+ action_adapter=action_adapter,
+ )
+
+ env = gym.make(
+ "smarts.env:hiway-v0",
+ scenarios=[scenario],
+ agent_specs=agent_specs,
+ sim_name="test_game_of_tag",
+ headless=True,
+ sumo_headless=False,
+ seed=seed,
+ )
+
+ agents = {
+ agent_id: agent_spec.build_agent()
+ for agent_id, agent_spec in agent_specs.items()
+ }
+
+ for episode in episodes(n=num_episodes):
+ observations = env.reset()
+ episode.record_scenario(env.scenario_log)
+
+ dones = {"__all__": False}
+ while not dones["__all__"]:
+ actions = {
+ agent_id: agents[agent_id].act(agent_obs)
+ for agent_id, agent_obs in observations.items()
+ }
+
+ observations, rewards, dones, infos = env.step(actions)
+ episode.record_step(observations, rewards, dones, infos)
+ # TODO temporary solution for game of tag: stop the episode when 1 vehicle is done
+ # so that the other vehicle does not train when the opponent is not present, which
+ # causes noisy in training
+ for key in dones:
+ if dones[key]:
+ dones["__all__"] = True
+
+ env.close()
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser("game-of-tag-example")
+ parser.add_argument(
+ "scenario",
+ type=str,
+ help="Scenario to run (see scenarios/ for some samples you can use)",
+ )
+ parser.add_argument(
+ "--headless", help="run simulation in headless mode", action="store_true"
+ )
+ parser.add_argument(
+ "--checkpoint_path",
+ help="run simulation in headless mode",
+ type=str,
+ default=os.path.join(
+ os.path.dirname(os.path.realpath(__file__)),
+ "models/checkpoint_360/checkpoint-360",
+ ),
+ )
+ parser.add_argument(
+ "--num_episodes",
+ help="number of episodes to show",
+ type=int,
+ default=10,
+ )
+ parser.add_argument("--seed", type=int, default=42)
+ args = parser.parse_args()
+
+ main(
+ scenario=args.scenario,
+ headless=args.headless,
+ checkpoint_path=args.checkpoint_path,
+ seed=args.seed,
+ num_episodes=args.num_episodes,
+ )
diff --git a/examples/game_of_tag/scenarios/game_of_tag_demo_map/map.net.xml b/examples/game_of_tag/scenarios/game_of_tag_demo_map/map.net.xml
new file mode 100644
index 0000000000..f4fb40dc79
--- /dev/null
+++ b/examples/game_of_tag/scenarios/game_of_tag_demo_map/map.net.xml
@@ -0,0 +1,267 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/examples/game_of_tag/scenarios/game_of_tag_demo_map/scenario.py b/examples/game_of_tag/scenarios/game_of_tag_demo_map/scenario.py
new file mode 100644
index 0000000000..e0487755fb
--- /dev/null
+++ b/examples/game_of_tag/scenarios/game_of_tag_demo_map/scenario.py
@@ -0,0 +1,42 @@
+import random
+from pathlib import Path
+
+from smarts.sstudio import gen_scenario
+from smarts.sstudio import types as t
+from smarts.core import seed
+
+seed(42)
+
+# traffic = t.Traffic(
+# flows=[
+# t.Flow(
+# route=t.Route(
+# begin=("-gneE69", 0, 10),
+# end=("gneE77", 0, 0),
+# ),
+# rate=60*60,
+# actors={
+# t.TrafficActor(
+# name="car",
+# vehicle_type=random.choice(
+# ["passenger", "bus", "coach", "truck", "trailer"]
+# ),
+# ): 1
+# },
+# )
+# ]
+# )
+
+# training missions
+ego_missions = [
+ t.EndlessMission(begin=("top", 2, 5)), # pred
+ t.EndlessMission(begin=("top", 2, 30)), # prey
+]
+
+
+scenario = t.Scenario(
+ # traffic={"all": traffic},
+ ego_missions=ego_missions,
+)
+
+gen_scenario(scenario, output_dir=str(Path(__file__).parent))
diff --git a/examples/game_of_tag/scenarios/game_of_tag_demo_map/shifted_map-AUTOGEN.net.xml b/examples/game_of_tag/scenarios/game_of_tag_demo_map/shifted_map-AUTOGEN.net.xml
new file mode 100644
index 0000000000..d7eade2f4c
--- /dev/null
+++ b/examples/game_of_tag/scenarios/game_of_tag_demo_map/shifted_map-AUTOGEN.net.xml
@@ -0,0 +1,267 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/examples/game_of_tag/tag_adapters.py b/examples/game_of_tag/tag_adapters.py
new file mode 100644
index 0000000000..5ddb0ed997
--- /dev/null
+++ b/examples/game_of_tag/tag_adapters.py
@@ -0,0 +1,224 @@
+import gym
+import numpy as np
+import random
+import math
+from typing import List
+import time
+from dataclasses import dataclass
+
+PREDATOR_IDS = ["PRED1"]
+PREY_IDS = ["PREY1"]
+
+
+@dataclass
+class Rewards:
+ collesion_with_target: float = 10
+ offroad: float = 10
+ collesion_with_other_deduction: float = -1.5
+
+
+global_rewards = Rewards()
+
+# vehicles collide at around 3.8 if from behind
+# colldie at 2.11 if from side
+COLLIDE_DISTANCE = 3.8
+
+ACTION_SPACE = gym.spaces.Tuple(
+ (
+ gym.spaces.Discrete(4), # 4 types of speed
+ gym.spaces.Discrete(3), # -1 0 or 1 for lane change
+ )
+)
+
+NEIGHBORHOOD_VEHICLE_STATES = gym.spaces.Dict(
+ {
+ "heading": gym.spaces.Box(low=-2 * np.pi, high=2 * np.pi, shape=(1,)),
+ "speed": gym.spaces.Box(low=-2e2, high=2e2, shape=(1,)),
+ "position": gym.spaces.Box(low=-1e4, high=1e4, shape=(2,)),
+ "distance": gym.spaces.Box(low=0, high=1e3, shape=(1,)),
+ "lane_index": gym.spaces.Discrete(5),
+ }
+)
+
+OBSERVATION_SPACE = gym.spaces.Dict(
+ {
+ "heading": gym.spaces.Box(low=-1 * np.pi, high=np.pi, shape=(1,)),
+ "speed": gym.spaces.Box(low=0, high=1e3, shape=(1,)),
+ "position": gym.spaces.Box(low=-1e3, high=1e3, shape=(2,)),
+ "lane_index": gym.spaces.Discrete(5),
+ "target_vehicles": gym.spaces.Tuple(
+ tuple([NEIGHBORHOOD_VEHICLE_STATES] * len(PREDATOR_IDS))
+ ),
+ }
+)
+
+
+def action_adapter(model_action):
+ speed, laneChange = model_action
+ speeds = [0, 3, 6, 9]
+ adapted_action = [speeds[speed], laneChange - 1]
+ return adapted_action
+
+
+def _is_vehicle_wanted(id, wanted_ids: List[str]):
+ """This function is needed since agent-id during training would be
+ 'PREY1-xxxxxxxx' instead of 'PREY1'
+ """
+ for wanted_id in wanted_ids:
+ if wanted_id in id:
+ return True
+ return False
+
+
+def get_specific_vehicle_states(nv_states, wanted_ids: List[str], ego_state):
+ """return vehicle states of vehicle that has id in wanted_ids"""
+ states = [
+ {
+ "heading": np.array([v.heading]),
+ "speed": np.array([v.speed]),
+ "position": np.array(v.position[:2]),
+ "lane_index": v.lane_index,
+ "distance": np.array(
+ [np.linalg.norm(v.position[:2] - ego_state.position[:2])]
+ ),
+ }
+ for v in nv_states
+ if _is_vehicle_wanted(v.id, wanted_ids)
+ ]
+ # ego is predator, prey went off road
+ if wanted_ids == PREY_IDS:
+ # make the last observation bad for prey to discourage off road
+ states += [
+ {
+ "heading": np.array([0]),
+ "speed": np.array([0]),
+ "position": ego_state.position[:2],
+ "lane_index": ego_state.lane_index,
+ "distance": np.array([COLLIDE_DISTANCE]), # give max reward to predator
+ }
+ ] * (len(wanted_ids) - len(states))
+ elif wanted_ids == PREDATOR_IDS:
+ # ego is prey, predator went off road
+ # make the last observation bad for predator
+ states += [
+ {
+ "heading": np.array([0]),
+ "speed": np.array([0]),
+ "position": np.array([1000, 1000]),
+ "lane_index": ego_state.lane_index,
+ "distance": np.array([1e3 - 1]), # makes position far from predator
+ }
+ ] * (len(wanted_ids) - len(states))
+
+ return states
+
+
+def min_distance_to_rival(ego_position, rival_ids, neighbour_states):
+ rival_vehicles = filter(
+ lambda v: _is_vehicle_wanted(v.id, rival_ids), neighbour_states
+ )
+ rival_positions = [p.position for p in rival_vehicles]
+
+ return min(
+ [np.linalg.norm(ego_position - prey_pos) for prey_pos in rival_positions],
+ default=0,
+ )
+
+
+def observation_adapter(observations):
+ nv_states = observations.neighborhood_vehicle_states
+ ego = observations.ego_vehicle_state
+
+ target_vehicles = None
+ if _is_vehicle_wanted(ego.id, PREY_IDS):
+ target_vehicles = get_specific_vehicle_states(nv_states, PREDATOR_IDS, ego)
+ elif _is_vehicle_wanted(ego.id, PREDATOR_IDS):
+ target_vehicles = get_specific_vehicle_states(nv_states, PREY_IDS, ego)
+
+ return {
+ "heading": np.array([ego.heading]),
+ "speed": np.array([ego.speed]),
+ "position": np.array(ego.position[:2]),
+ "lane_index": ego.lane_index,
+ "target_vehicles": tuple(target_vehicles),
+ }
+
+
+def dominant_reward(distance):
+ if distance == COLLIDE_DISTANCE:
+ return 10
+ return min(0.5 / ((distance - COLLIDE_DISTANCE) ** 2), 10)
+
+
+def predator_reward_adapter(observations, env_reward_signal):
+ rew = 0
+ ego = observations.ego_vehicle_state
+
+ # Primary reward
+ distance_to_target = min_distance_to_rival(
+ ego.position,
+ PREY_IDS,
+ observations.neighborhood_vehicle_states,
+ )
+
+ rew += dominant_reward(distance_to_target)
+
+ events = observations.events
+ for c in observations.events.collisions:
+ if _is_vehicle_wanted(c.collidee_id, PREY_IDS):
+ rew += global_rewards.collesion_with_target
+ print(
+ f"predator {ego.id} collided with prey {c.collidee_id} distance {distance_to_target}"
+ )
+ # # keeping this commented code for expanding to mutiple preys and predators in the future
+ # else:
+ # # Collided with something other than the prey
+ # rew += global_rewards.collesion_with_other_deduction
+ # print(f"predator {ego.id} collided with others {c.collidee_id}")
+
+ if events.off_road:
+ rew -= global_rewards.offroad
+
+ # if no prey vehicle avaliable, have 0 reward instead
+ # TODO: Test to see if this is neccessary
+ prey_vehicles = list(filter(
+ lambda v: _is_vehicle_wanted(v.id, PREY_IDS), observations.neighborhood_vehicle_states,
+ ))
+ return rew if len(prey_vehicles) > 0 else 0
+
+
+def prey_reward_adapter(observations, env_reward_signal):
+
+ rew = 0
+ ego = observations.ego_vehicle_state
+
+ # Primary reward
+ distance_to_target = min_distance_to_rival(
+ ego.position,
+ PREDATOR_IDS,
+ observations.neighborhood_vehicle_states,
+ )
+ rew -= dominant_reward(distance_to_target)
+
+ events = observations.events
+ for c in events.collisions:
+ if _is_vehicle_wanted(c.collidee_id, PREDATOR_IDS):
+ rew -= global_rewards.collesion_with_target
+ print(
+ f"prey {ego.id} collided with Predator {c.collidee_id} distance {distance_to_target}"
+ )
+ # # keeping this commented code for expanding to mutiple preys and predators in the future
+ # else:
+ # # Collided with something other than the prey
+ # rew += global_rewards.collesion_with_other_deduction
+ # print(f"prey {ego.id} collided with other vehicle {c.collidee_id}")
+
+ if events.off_road:
+ rew -= global_rewards.offroad
+
+ # if no predator vehicle avaliable, have 0 reward instead
+ # TODO: Test to see if this is neccessary
+ predator_vehicles = list(filter(
+ lambda v: _is_vehicle_wanted(v.id, PREDATOR_IDS), observations.neighborhood_vehicle_states,
+ ))
+ return rew if len(predator_vehicles) > 0 else 0
diff --git a/smarts/core/agent_interface.py b/smarts/core/agent_interface.py
index 35d16840f7..c75e353241 100644
--- a/smarts/core/agent_interface.py
+++ b/smarts/core/agent_interface.py
@@ -109,7 +109,7 @@ class AgentType(IntEnum):
"""All observations and continuous action space"""
Standard = 2
"""Minimal observations for dealing with waypoints and other vehicles and
- continuous action space.
+ ActuatorDynamic action space.
"""
Laner = 3
"""Agent sees waypoints and performs lane actions"""
diff --git a/smarts/core/controllers/lane_following_controller.py b/smarts/core/controllers/lane_following_controller.py
index 411effa4b0..24dd9d21a5 100644
--- a/smarts/core/controllers/lane_following_controller.py
+++ b/smarts/core/controllers/lane_following_controller.py
@@ -75,6 +75,15 @@ def perform_lane_following(
lane_change=0,
):
assert isinstance(vehicle.chassis, AckermannChassis)
+ assert isinstance(lane_change, int) or isinstance(
+ lane_change, np.integer
+ ), "lane_change action should be an integer"
+ assert (
+ lane_change == 1 or lane_change == 0 or lane_change == -1
+ ), """lane_change action should be any of the following:
+-1: change to right right
+0: stay on same lane,
+1: change to left lane"""
state = controller_state
# This lookahead value is coupled with a few calculations below, changing it
# may affect stability of the controller.
@@ -179,7 +188,7 @@ def perform_lane_following(
# directly related to the steering angle, this is added to further
# enhance the speed tracking performance. TODO: currently, the bullet
# does not provide the lateral acceleration which is needed for
- # calculating the front laterl force. we need to replace the coefficent
+ # calculating the front lateral force. we need to replace the coefficent
# with better approximation of the front lateral forces using explicit
# differention.
lateral_force_coefficient = 1.5
diff --git a/smarts/env/rllib_hiway_env.py b/smarts/env/rllib_hiway_env.py
index 0660e0c30d..9ab781b32d 100644
--- a/smarts/env/rllib_hiway_env.py
+++ b/smarts/env/rllib_hiway_env.py
@@ -80,9 +80,11 @@ def __init__(self, config):
)
self._sim_name = config.get("sim_name", None)
- self._headless = config.get("headless", False)
+ # Warnining: running rllib with envision will cause memory to run out very quickly.
+ # It is recommanded to set headless to true during training and use sumo-gui (sumo_headless=False)
+ self._headless = config.get("headless", True)
self._num_external_sumo_clients = config.get("num_external_sumo_clients", 0)
- self._sumo_headless = config.get("sumo_headless", True)
+ self._sumo_headless = config.get("sumo_headless", False)
self._sumo_port = config.get("sumo_port")
self._sumo_auto_start = config.get("sumo_auto_start", True)
self._endless_traffic = config.get("endless_traffic", True)
@@ -147,7 +149,13 @@ def step(self, agent_actions):
for done in dones.values():
self._dones_registered += 1 if done else 0
- dones["__all__"] = self._dones_registered == len(self._agent_specs)
+ # TODO temporary solution for game of tag: stop the episode when 1 vehicle is done
+ # so that the other vehicle does not train when the opponent is not present, which
+ # causes noisy in training
+ dones["__all__"] = self._dones_registered > 0
+ if dones["__all__"]:
+ for id in dones:
+ dones[id] = True
return observations, rewards, dones, infos