From ef50b5b5e018e7c6e751ce1a4b89f60fbf651a47 Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Thu, 11 Jun 2020 22:04:02 +0300 Subject: [PATCH 01/44] DQN for flow --- .../multiagent_traffic_light_grid.py | 10 ++--- examples/train.py | 40 ++++++++++--------- flow/envs/multiagent/traffic_light_grid.py | 4 +- 3 files changed, 28 insertions(+), 26 deletions(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py b/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py index b8293f638..5cfea98b2 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py +++ b/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py @@ -1,6 +1,6 @@ """Multi-agent traffic light example (single shared policy).""" -from ray.rllib.agents.ppo.ppo_policy import PPOTFPolicy +from ray.rllib.agents.ppo.ppo_policy import DQNTFPolicy from flow.envs.multiagent import MultiTrafficLightGridPOEnv from flow.networks import TrafficLightGridNetwork from flow.core.params import SumoParams, EnvParams, InitialConfig, NetParams @@ -88,7 +88,7 @@ "target_velocity": 50, "switch_time": 3, "num_observed": 2, - "discrete": False, + "discrete": True, "tl_type": "actuated", "num_local_edges": 4, "num_local_lights": 4, @@ -140,8 +140,8 @@ def gen_policy(): - """Generate a policy in RLlib.""" - return PPOTFPolicy, obs_space, act_space, {} + """Generate a policy in DQN.""" + return DQNTFPolicy, obs_space, act_space, {} # Setup PG with a single policy graph for all agents @@ -149,7 +149,7 @@ def gen_policy(): def policy_mapping_fn(_): - """Map a policy in RLlib.""" + """Map a policy in DQN.""" return 'av' diff --git a/examples/train.py b/examples/train.py index 1b2f22476..47f2fd93f 100644 --- a/examples/train.py +++ b/examples/train.py @@ -40,8 +40,8 @@ def parse_args(args): # optional input parameters parser.add_argument( - '--rl_trainer', type=str, default="rllib", - help='the RL trainer to use. either rllib or Stable-Baselines') + '--rl_trainer', type=str, default="DQN", + help='the RL trainer to use. DQN') parser.add_argument( '--num_cpus', type=int, default=1, @@ -98,13 +98,13 @@ def run_model_stablebaseline(flow_params, return train_model -def setup_exps_rllib(flow_params, +def setup_exps_dqn(flow_params, n_cpus, n_rollouts, policy_graphs=None, policy_mapping_fn=None, policies_to_train=None): - """Return the relevant components of an RLlib experiment. + """Return the relevant components of an DQN experiment. Parameters ---------- @@ -139,20 +139,22 @@ def setup_exps_rllib(flow_params, horizon = flow_params['env'].horizon - alg_run = "PPO" + alg_run = "DQN" agent_cls = get_agent_class(alg_run) config = deepcopy(agent_cls._default_config) config["num_workers"] = n_cpus config["train_batch_size"] = horizon * n_rollouts - config["gamma"] = 0.999 # discount rate - config["model"].update({"fcnet_hiddens": [32, 32, 32]}) - config["use_gae"] = True - config["lambda"] = 0.97 - config["kl_target"] = 0.02 - config["num_sgd_iter"] = 10 + config['clip_actions'] = False config["horizon"] = horizon + config["timesteps_per_iteration"] = horizon * n_rollouts + config["hiddens"] = [512] + config["lr"] = 0.0000625 # TODO: hp tune + config["grad_norm_clipping"] = 40 # TODO: hp tune + config["schedule_max_timesteps"] = 2000000 # TODO: maybe try 5e5, 1e6 + config["buffer_size"] = 1000000 # TODO: maybe try 1e5, 5e5 + config["target_network_update_freq"] = 8000 # TODO: this is too small # save the flow params for replay flow_json = json.dumps( @@ -177,8 +179,8 @@ def setup_exps_rllib(flow_params, return alg_run, gym_name, config -def train_rllib(submodule, flags): - """Train policies using the PPO algorithm in RLlib.""" +def train_DQN(submodule, flags): + """Train policies using the DQN algorithm in DQN.""" import ray from ray.tune import run_experiments @@ -189,7 +191,7 @@ def train_rllib(submodule, flags): policy_mapping_fn = getattr(submodule, "policy_mapping_fn", None) policies_to_train = getattr(submodule, "policies_to_train", None) - alg_run, gym_name, config = setup_exps_rllib( + alg_run, gym_name, config = setup_exps_dqn( flow_params, n_cpus, n_rollouts, policy_graphs, policy_mapping_fn, policies_to_train) @@ -379,24 +381,24 @@ def main(args): multiagent = False elif hasattr(module_ma, flags.exp_config): submodule = getattr(module_ma, flags.exp_config) - assert flags.rl_trainer.lower() in ["rllib", "h-baselines"], \ + assert flags.rl_trainer.lower() in ["dqn", "h-baselines"], \ "Currently, multiagent experiments are only supported through "\ - "RLlib. Try running this experiment using RLlib: " \ + "DQN. Try running this experiment using DQN: " \ "'python train.py EXP_CONFIG'" multiagent = True else: raise ValueError("Unable to find experiment config.") # Perform the training operation. - if flags.rl_trainer.lower() == "rllib": - train_rllib(submodule, flags) + if flags.rl_trainer.lower() == "dqn": + train_dqn(submodule, flags) elif flags.rl_trainer.lower() == "stable-baselines": train_stable_baselines(submodule, flags) elif flags.rl_trainer.lower() == "h-baselines": flow_params = submodule.flow_params train_h_baselines(flow_params, args, multiagent) else: - raise ValueError("rl_trainer should be either 'rllib', 'h-baselines', " + raise ValueError("rl_trainer should be either 'dqn', 'h-baselines', " "or 'stable-baselines'.") diff --git a/flow/envs/multiagent/traffic_light_grid.py b/flow/envs/multiagent/traffic_light_grid.py index a0438f828..0f4ad7bb3 100644 --- a/flow/envs/multiagent/traffic_light_grid.py +++ b/flow/envs/multiagent/traffic_light_grid.py @@ -79,7 +79,7 @@ def observation_space(self): def action_space(self): """See class definition.""" if self.discrete: - return Discrete(2) + return Discrete(3) else: return Box( low=-1, @@ -208,7 +208,7 @@ def _apply_rl_actions(self, rl_actions): for rl_id, rl_action in rl_actions.items(): i = int(rl_id.split("center")[ID_IDX]) if self.discrete: - raise NotImplementedError + action = rl_action else: # convert values less than 0.0 to zero and above to 1. 0's # indicate that we should not switch the direction From c5c5ed251eaac8294859e2ba5f4f11c971fcdbd3 Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Thu, 11 Jun 2020 22:21:10 +0300 Subject: [PATCH 02/44] correct typo --- examples/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/train.py b/examples/train.py index 47f2fd93f..632c6c70a 100644 --- a/examples/train.py +++ b/examples/train.py @@ -179,7 +179,7 @@ def setup_exps_dqn(flow_params, return alg_run, gym_name, config -def train_DQN(submodule, flags): +def train_dqn(submodule, flags): """Train policies using the DQN algorithm in DQN.""" import ray from ray.tune import run_experiments From 74cbd10aa513f959a7fc281c09ddd4de39aac215 Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Thu, 11 Jun 2020 22:25:21 +0300 Subject: [PATCH 03/44] rm trailing space --- examples/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/train.py b/examples/train.py index 632c6c70a..5ba36f4ff 100644 --- a/examples/train.py +++ b/examples/train.py @@ -146,7 +146,7 @@ def setup_exps_dqn(flow_params, config["num_workers"] = n_cpus config["train_batch_size"] = horizon * n_rollouts - config['clip_actions'] = False + config['clip_actions'] = False config["horizon"] = horizon config["timesteps_per_iteration"] = horizon * n_rollouts config["hiddens"] = [512] From 0829e9e59222efe7d6071343c80c39a07f00bc6a Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Thu, 11 Jun 2020 22:31:48 +0300 Subject: [PATCH 04/44] add reference for dqn parameters setup --- examples/train.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/train.py b/examples/train.py index 5ba36f4ff..27266b65a 100644 --- a/examples/train.py +++ b/examples/train.py @@ -149,12 +149,12 @@ def setup_exps_dqn(flow_params, config['clip_actions'] = False config["horizon"] = horizon config["timesteps_per_iteration"] = horizon * n_rollouts - config["hiddens"] = [512] - config["lr"] = 0.0000625 # TODO: hp tune - config["grad_norm_clipping"] = 40 # TODO: hp tune - config["schedule_max_timesteps"] = 2000000 # TODO: maybe try 5e5, 1e6 - config["buffer_size"] = 1000000 # TODO: maybe try 1e5, 5e5 - config["target_network_update_freq"] = 8000 # TODO: this is too small + #https://github.com/ray-project/ray/blob/master/rllib/tuned_examples/dqn/atari-dist-dqn.yaml + config["hiddens"] = [512] + config["lr"] = 0.0000625 + config["schedule_max_timesteps"] = 2000000 + config["buffer_size"] = 1000000 + config["target_network_update_freq"] = 8000 # save the flow params for replay flow_json = json.dumps( From 69236de0f942e380844587a1b7decb3f630614a1 Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Thu, 11 Jun 2020 22:35:02 +0300 Subject: [PATCH 05/44] Update train.py --- examples/train.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/train.py b/examples/train.py index 27266b65a..307ff2960 100644 --- a/examples/train.py +++ b/examples/train.py @@ -99,11 +99,11 @@ def run_model_stablebaseline(flow_params, def setup_exps_dqn(flow_params, - n_cpus, - n_rollouts, - policy_graphs=None, - policy_mapping_fn=None, - policies_to_train=None): + n_cpus, + n_rollouts, + policy_graphs=None, + policy_mapping_fn=None, + policies_to_train=None): """Return the relevant components of an DQN experiment. Parameters From b59728ee84926e79cf2b3410d326a39f6d2a61cd Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Thu, 11 Jun 2020 22:35:50 +0300 Subject: [PATCH 06/44] Update train.py --- examples/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/train.py b/examples/train.py index 307ff2960..cae58d562 100644 --- a/examples/train.py +++ b/examples/train.py @@ -150,7 +150,7 @@ def setup_exps_dqn(flow_params, config["horizon"] = horizon config["timesteps_per_iteration"] = horizon * n_rollouts #https://github.com/ray-project/ray/blob/master/rllib/tuned_examples/dqn/atari-dist-dqn.yaml - config["hiddens"] = [512] + config["hiddens"] = [512] config["lr"] = 0.0000625 config["schedule_max_timesteps"] = 2000000 config["buffer_size"] = 1000000 From d66f1ce149477d6a45e1e80518b7ef64ee5eecb2 Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Thu, 11 Jun 2020 22:40:00 +0300 Subject: [PATCH 07/44] Update train.py --- examples/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/train.py b/examples/train.py index cae58d562..1f0a1d9f3 100644 --- a/examples/train.py +++ b/examples/train.py @@ -149,7 +149,7 @@ def setup_exps_dqn(flow_params, config['clip_actions'] = False config["horizon"] = horizon config["timesteps_per_iteration"] = horizon * n_rollouts - #https://github.com/ray-project/ray/blob/master/rllib/tuned_examples/dqn/atari-dist-dqn.yaml + # https://github.com/ray-project/ray/blob/master/rllib/tuned_examples/dqn/atari-dist-dqn.yaml config["hiddens"] = [512] config["lr"] = 0.0000625 config["schedule_max_timesteps"] = 2000000 From b2628e24cc9a28883f5d03886aade645e2c1c619 Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Thu, 11 Jun 2020 23:03:58 +0300 Subject: [PATCH 08/44] fix import --- .../exp_configs/rl/multiagent/multiagent_traffic_light_grid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py b/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py index 5cfea98b2..a35041bdd 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py +++ b/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py @@ -1,6 +1,6 @@ """Multi-agent traffic light example (single shared policy).""" -from ray.rllib.agents.ppo.ppo_policy import DQNTFPolicy +from ray.rllib.agents.dqn.dqn_policy import DQNTFPolicy from flow.envs.multiagent import MultiTrafficLightGridPOEnv from flow.networks import TrafficLightGridNetwork from flow.core.params import SumoParams, EnvParams, InitialConfig, NetParams From 72f9fca127eb921128aefd26938030e6f942a78a Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Thu, 11 Jun 2020 23:31:13 +0300 Subject: [PATCH 09/44] add rllib back to avoid test error --- examples/train.py | 129 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 123 insertions(+), 6 deletions(-) diff --git a/examples/train.py b/examples/train.py index 1f0a1d9f3..82a53f05a 100644 --- a/examples/train.py +++ b/examples/train.py @@ -1,6 +1,6 @@ """Runner script for single and multi-agent reinforcement learning experiments. -This script performs an RL experiment using the PPO algorithm. Choice of +This script performs an RL experiment using the DQN algorithm. Choice of hyperparameters can be seen and adjusted from the code below. Usage @@ -40,8 +40,8 @@ def parse_args(args): # optional input parameters parser.add_argument( - '--rl_trainer', type=str, default="DQN", - help='the RL trainer to use. DQN') + '--rl_trainer', type=str, default="dqn", + help='the RL trainer to use. either dqn or rllib or Stable-Baselines') parser.add_argument( '--num_cpus', type=int, default=1, @@ -98,6 +98,121 @@ def run_model_stablebaseline(flow_params, return train_model +def setup_exps_rllib(flow_params, + n_cpus, + n_rollouts, + policy_graphs=None, + policy_mapping_fn=None, + policies_to_train=None): + """Return the relevant components of an RLlib experiment. + + Parameters + ---------- + flow_params : dict + flow-specific parameters (see flow/utils/registry.py) + n_cpus : int + number of CPUs to run the experiment over + n_rollouts : int + number of rollouts per training iteration + policy_graphs : dict, optional + TODO + policy_mapping_fn : function, optional + TODO + policies_to_train : list of str, optional + TODO + + Returns + ------- + str + name of the training algorithm + str + name of the gym environment to be trained + dict + training configuration parameters + """ + from ray import tune + from ray.tune.registry import register_env + try: + from ray.rllib.agents.agent import get_agent_class + except ImportError: + from ray.rllib.agents.registry import get_agent_class + + horizon = flow_params['env'].horizon + + alg_run = "PPO" + + agent_cls = get_agent_class(alg_run) + config = deepcopy(agent_cls._default_config) + + config["num_workers"] = n_cpus + config["train_batch_size"] = horizon * n_rollouts + config["gamma"] = 0.999 # discount rate + config["model"].update({"fcnet_hiddens": [32, 32, 32]}) + config["use_gae"] = True + config["lambda"] = 0.97 + config["kl_target"] = 0.02 + config["num_sgd_iter"] = 10 + config["horizon"] = horizon + + # save the flow params for replay + flow_json = json.dumps( + flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) + config['env_config']['flow_params'] = flow_json + config['env_config']['run'] = alg_run + + # multiagent configuration + if policy_graphs is not None: + print("policy_graphs", policy_graphs) + config['multiagent'].update({'policies': policy_graphs}) + if policy_mapping_fn is not None: + config['multiagent'].update( + {'policy_mapping_fn': tune.function(policy_mapping_fn)}) + if policies_to_train is not None: + config['multiagent'].update({'policies_to_train': policies_to_train}) + + create_env, gym_name = make_create_env(params=flow_params) + + # Register as rllib env + register_env(gym_name, create_env) + return alg_run, gym_name, config + + +def train_rllib(submodule, flags): + """Train policies using the PPO algorithm in RLlib.""" + import ray + from ray.tune import run_experiments + + flow_params = submodule.flow_params + n_cpus = submodule.N_CPUS + n_rollouts = submodule.N_ROLLOUTS + policy_graphs = getattr(submodule, "POLICY_GRAPHS", None) + policy_mapping_fn = getattr(submodule, "policy_mapping_fn", None) + policies_to_train = getattr(submodule, "policies_to_train", None) + + alg_run, gym_name, config = setup_exps_rllib( + flow_params, n_cpus, n_rollouts, + policy_graphs, policy_mapping_fn, policies_to_train) + + ray.init(num_cpus=n_cpus + 1, object_store_memory=200 * 1024 * 1024) + exp_config = { + "run": alg_run, + "env": gym_name, + "config": { + **config + }, + "checkpoint_freq": 20, + "checkpoint_at_end": True, + "max_failures": 999, + "stop": { + "training_iteration": flags.num_steps, + }, + } + + if flags.checkpoint_path is not None: + exp_config['restore'] = flags.checkpoint_path + run_experiments({flow_params["exp_tag"]: exp_config}) + + def setup_exps_dqn(flow_params, n_cpus, n_rollouts, @@ -381,9 +496,9 @@ def main(args): multiagent = False elif hasattr(module_ma, flags.exp_config): submodule = getattr(module_ma, flags.exp_config) - assert flags.rl_trainer.lower() in ["dqn", "h-baselines"], \ + assert flags.rl_trainer.lower() in ["dqn", "rllib", "h-baselines"], \ "Currently, multiagent experiments are only supported through "\ - "DQN. Try running this experiment using DQN: " \ + "DQN or RLlib. Try running this experiment using DQN or RLlib: " \ "'python train.py EXP_CONFIG'" multiagent = True else: @@ -392,13 +507,15 @@ def main(args): # Perform the training operation. if flags.rl_trainer.lower() == "dqn": train_dqn(submodule, flags) + elif flags.rl_trainer.lower() == "rllib": + train_rllib(submodule, flags) elif flags.rl_trainer.lower() == "stable-baselines": train_stable_baselines(submodule, flags) elif flags.rl_trainer.lower() == "h-baselines": flow_params = submodule.flow_params train_h_baselines(flow_params, args, multiagent) else: - raise ValueError("rl_trainer should be either 'dqn', 'h-baselines', " + raise ValueError("rl_trainer should be either 'dqn', 'rllib', 'h-baselines', " "or 'stable-baselines'.") From 6751b56f07a33d51b8a9aeae8ecaf596228d7384 Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Fri, 12 Jun 2020 00:13:46 +0300 Subject: [PATCH 10/44] change default test to dqn --- tests/fast_tests/test_examples.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/fast_tests/test_examples.py b/tests/fast_tests/test_examples.py index 0b385f28a..bb5b999f4 100644 --- a/tests/fast_tests/test_examples.py +++ b/tests/fast_tests/test_examples.py @@ -30,6 +30,7 @@ from examples.train import parse_args as parse_train_args from examples.train import run_model_stablebaseline as run_stable_baselines_model from examples.train import setup_exps_rllib as setup_rllib_exps +from examples.train import setup_exps_dqn as setup_dqn_exps from examples.train import train_h_baselines from examples.exp_configs.non_rl.bay_bridge import flow_params as non_rl_bay_bridge @@ -168,7 +169,7 @@ def test_parse_args(self): self.assertDictEqual(vars(args), { 'exp_config': 'exp_config', - 'rl_trainer': 'rllib', + 'rl_trainer': 'dqn', 'num_cpus': 1, 'num_steps': 5000, 'rollout_size': 1000, From 4c3d9d4f345b5930bae9609dcfb6bccd862e0024 Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Fri, 12 Jun 2020 00:29:18 +0300 Subject: [PATCH 11/44] add TestDQNExamples for traffic light grid examples --- tests/fast_tests/test_examples.py | 57 +++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/tests/fast_tests/test_examples.py b/tests/fast_tests/test_examples.py index bb5b999f4..444463299 100644 --- a/tests/fast_tests/test_examples.py +++ b/tests/fast_tests/test_examples.py @@ -435,6 +435,63 @@ def run_exp(flow_params, **kwargs): }) +class TestDQNExamples(unittest.TestCase): + """Tests the example traffic light scripts in examples/exp_configs/rl/singleagent and + examples/exp_configs/rl/multiagent for DQN. + + This is done by running each experiment in that folder for five time-steps + and confirming that it completes one rollout with two workers. + # FIXME(ev) this test adds several minutes to the testing scheme + """ + def setUp(self): + if not ray.is_initialized(): + ray.init(num_cpus=1) + + def test_singleagent_traffic_light_grid(self): + self.run_exp(singleagent_traffic_light_grid) + + def test_multi_traffic_light_grid(self): + from examples.exp_configs.rl.multiagent.multiagent_traffic_light_grid import POLICY_GRAPHS as mtlpg + from examples.exp_configs.rl.multiagent.multiagent_traffic_light_grid import POLICIES_TO_TRAIN as mtlpt + from examples.exp_configs.rl.multiagent.multiagent_traffic_light_grid import policy_mapping_fn as mtlpmf + + kwargs = { + "policy_graphs": mtlpg, + "policies_to_train": mtlpt, + "policy_mapping_fn": mtlpmf + } + self.run_exp(multiagent_traffic_light_grid, **kwargs) + + @staticmethod + def run_exp(flow_params, **kwargs): + alg_run, env_name, config = setup_dqn_exps(flow_params, 1, 1, **kwargs) + + try: + ray.init(num_cpus=1) + except Exception as e: + print("ERROR", e) + config['train_batch_size'] = 50 + config['horizon'] = 50 + config['sample_batch_size'] = 50 + config['num_workers'] = 0 + config['sgd_minibatch_size'] = 32 + + run_experiments({ + 'test': { + 'run': alg_run, + 'env': env_name, + 'config': { + **config + }, + + 'checkpoint_freq': 1, + 'stop': { + 'training_iteration': 1, + }, + } + }) + + if __name__ == '__main__': try: ray.init(num_cpus=1) From 3bfb9e62d807fd13720dfcedaae69f00f3ac5b4b Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Fri, 12 Jun 2020 01:04:00 +0300 Subject: [PATCH 12/44] rm light grid test for PPO --- tests/fast_tests/test_examples.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/fast_tests/test_examples.py b/tests/fast_tests/test_examples.py index 444463299..ffa10c083 100644 --- a/tests/fast_tests/test_examples.py +++ b/tests/fast_tests/test_examples.py @@ -263,8 +263,8 @@ def setUp(self): def test_singleagent_figure_eight(self): self.run_exp(singleagent_figure_eight) - def test_singleagent_traffic_light_grid(self): - self.run_exp(singleagent_traffic_light_grid) + # def test_singleagent_traffic_light_grid(self): + # self.run_exp(singleagent_traffic_light_grid) def test_singleagent_traffic_light_grid_inflows(self): pass # FIXME @@ -330,17 +330,17 @@ def test_multiagent_merge(self): } self.run_exp(multiagent_merge, **kwargs) - def test_multi_traffic_light_grid(self): - from examples.exp_configs.rl.multiagent.multiagent_traffic_light_grid import POLICY_GRAPHS as mtlpg - from examples.exp_configs.rl.multiagent.multiagent_traffic_light_grid import POLICIES_TO_TRAIN as mtlpt - from examples.exp_configs.rl.multiagent.multiagent_traffic_light_grid import policy_mapping_fn as mtlpmf - - kwargs = { - "policy_graphs": mtlpg, - "policies_to_train": mtlpt, - "policy_mapping_fn": mtlpmf - } - self.run_exp(multiagent_traffic_light_grid, **kwargs) + # def test_multi_traffic_light_grid(self): + # from examples.exp_configs.rl.multiagent.multiagent_traffic_light_grid import POLICY_GRAPHS as mtlpg + # from examples.exp_configs.rl.multiagent.multiagent_traffic_light_grid import POLICIES_TO_TRAIN as mtlpt + # from examples.exp_configs.rl.multiagent.multiagent_traffic_light_grid import policy_mapping_fn as mtlpmf + + # kwargs = { + # "policy_graphs": mtlpg, + # "policies_to_train": mtlpt, + # "policy_mapping_fn": mtlpmf + # } + # self.run_exp(multiagent_traffic_light_grid, **kwargs) def test_multi_highway(self): from examples.exp_configs.rl.multiagent.multiagent_highway import POLICY_GRAPHS as mhpg From c6b2fd1bd42a82c16fc6b30edb99d9e15368994f Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Fri, 12 Jun 2020 01:11:12 +0300 Subject: [PATCH 13/44] Update test_examples.py --- tests/fast_tests/test_examples.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/tests/fast_tests/test_examples.py b/tests/fast_tests/test_examples.py index ffa10c083..8f5df6a22 100644 --- a/tests/fast_tests/test_examples.py +++ b/tests/fast_tests/test_examples.py @@ -263,9 +263,6 @@ def setUp(self): def test_singleagent_figure_eight(self): self.run_exp(singleagent_figure_eight) - # def test_singleagent_traffic_light_grid(self): - # self.run_exp(singleagent_traffic_light_grid) - def test_singleagent_traffic_light_grid_inflows(self): pass # FIXME @@ -330,18 +327,6 @@ def test_multiagent_merge(self): } self.run_exp(multiagent_merge, **kwargs) - # def test_multi_traffic_light_grid(self): - # from examples.exp_configs.rl.multiagent.multiagent_traffic_light_grid import POLICY_GRAPHS as mtlpg - # from examples.exp_configs.rl.multiagent.multiagent_traffic_light_grid import POLICIES_TO_TRAIN as mtlpt - # from examples.exp_configs.rl.multiagent.multiagent_traffic_light_grid import policy_mapping_fn as mtlpmf - - # kwargs = { - # "policy_graphs": mtlpg, - # "policies_to_train": mtlpt, - # "policy_mapping_fn": mtlpmf - # } - # self.run_exp(multiagent_traffic_light_grid, **kwargs) - def test_multi_highway(self): from examples.exp_configs.rl.multiagent.multiagent_highway import POLICY_GRAPHS as mhpg from examples.exp_configs.rl.multiagent.multiagent_highway import POLICIES_TO_TRAIN as mhpt From 310084193cab43ce586940577f1e43767230fcd9 Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Fri, 12 Jun 2020 01:47:00 +0300 Subject: [PATCH 14/44] trial --- flow/envs/multiagent/traffic_light_grid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flow/envs/multiagent/traffic_light_grid.py b/flow/envs/multiagent/traffic_light_grid.py index 0f4ad7bb3..9cf3161b9 100644 --- a/flow/envs/multiagent/traffic_light_grid.py +++ b/flow/envs/multiagent/traffic_light_grid.py @@ -79,7 +79,7 @@ def observation_space(self): def action_space(self): """See class definition.""" if self.discrete: - return Discrete(3) + return Discrete(2) else: return Box( low=-1, From f74eb50f551d4f360e41de596068ac80d40e928d Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Fri, 12 Jun 2020 10:06:07 +0300 Subject: [PATCH 15/44] trial2 --- flow/envs/multiagent/traffic_light_grid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flow/envs/multiagent/traffic_light_grid.py b/flow/envs/multiagent/traffic_light_grid.py index 9cf3161b9..3731ab5d8 100644 --- a/flow/envs/multiagent/traffic_light_grid.py +++ b/flow/envs/multiagent/traffic_light_grid.py @@ -79,7 +79,7 @@ def observation_space(self): def action_space(self): """See class definition.""" if self.discrete: - return Discrete(2) + return Discrete(2 ** self.num_traffic_lights) else: return Box( low=-1, From 1cd579da9389b9e2fa697152f89ddece75ecf618 Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Fri, 12 Jun 2020 10:28:01 +0300 Subject: [PATCH 16/44] pass ignore_reinit_error=True --- examples/train.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/train.py b/examples/train.py index 82a53f05a..c82c4b497 100644 --- a/examples/train.py +++ b/examples/train.py @@ -193,7 +193,7 @@ def train_rllib(submodule, flags): flow_params, n_cpus, n_rollouts, policy_graphs, policy_mapping_fn, policies_to_train) - ray.init(num_cpus=n_cpus + 1, object_store_memory=200 * 1024 * 1024) + ray.init(num_cpus=n_cpus + 1, ignore_reinit_error=True, object_store_memory=200 * 1024 * 1024) exp_config = { "run": alg_run, "env": gym_name, @@ -310,7 +310,7 @@ def train_dqn(submodule, flags): flow_params, n_cpus, n_rollouts, policy_graphs, policy_mapping_fn, policies_to_train) - ray.init(num_cpus=n_cpus + 1, object_store_memory=200 * 1024 * 1024) + ray.init(num_cpus=n_cpus + 1, ignore_reinit_error=True, object_store_memory=200 * 1024 * 1024) exp_config = { "run": alg_run, "env": gym_name, From 81a203efba7d4e0f0f0f56b99038562cddee6f39 Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Fri, 12 Jun 2020 10:52:23 +0300 Subject: [PATCH 17/44] pass ray.shutdown() before ray.init --- examples/train.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/train.py b/examples/train.py index c82c4b497..58cd240c1 100644 --- a/examples/train.py +++ b/examples/train.py @@ -193,6 +193,7 @@ def train_rllib(submodule, flags): flow_params, n_cpus, n_rollouts, policy_graphs, policy_mapping_fn, policies_to_train) + ray.shutdown() ray.init(num_cpus=n_cpus + 1, ignore_reinit_error=True, object_store_memory=200 * 1024 * 1024) exp_config = { "run": alg_run, @@ -310,6 +311,7 @@ def train_dqn(submodule, flags): flow_params, n_cpus, n_rollouts, policy_graphs, policy_mapping_fn, policies_to_train) + ray.shutdown() ray.init(num_cpus=n_cpus + 1, ignore_reinit_error=True, object_store_memory=200 * 1024 * 1024) exp_config = { "run": alg_run, From e77358247966f5718a659778abbc085ce820a7cf Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Fri, 12 Jun 2020 13:55:11 +0300 Subject: [PATCH 18/44] Update traffic_light_grid.py --- flow/envs/multiagent/traffic_light_grid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flow/envs/multiagent/traffic_light_grid.py b/flow/envs/multiagent/traffic_light_grid.py index 3731ab5d8..9cf3161b9 100644 --- a/flow/envs/multiagent/traffic_light_grid.py +++ b/flow/envs/multiagent/traffic_light_grid.py @@ -79,7 +79,7 @@ def observation_space(self): def action_space(self): """See class definition.""" if self.discrete: - return Discrete(2 ** self.num_traffic_lights) + return Discrete(2) else: return Box( low=-1, From eb12328004ec87bf26a702e55d6f741eb41cab53 Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Mon, 22 Jun 2020 12:20:38 +0300 Subject: [PATCH 19/44] update --- .../exp_configs/rl/multiagent/multiagent_traffic_light_grid.py | 2 +- .../rl/singleagent/singleagent_traffic_light_grid.py | 3 ++- flow/envs/traffic_light_grid.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py b/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py index a35041bdd..256198227 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py +++ b/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py @@ -8,7 +8,7 @@ from flow.controllers import SimCarFollowingController, GridRouter from ray.tune.registry import register_env from flow.utils.registry import make_create_env - +from flow.core.params import TrafficLightParams # Experiment parameters N_ROLLOUTS = 63 # number of rollouts per training iteration N_CPUS = 63 # number of parallel workers diff --git a/examples/exp_configs/rl/singleagent/singleagent_traffic_light_grid.py b/examples/exp_configs/rl/singleagent/singleagent_traffic_light_grid.py index 085d26be9..70f340661 100644 --- a/examples/exp_configs/rl/singleagent/singleagent_traffic_light_grid.py +++ b/examples/exp_configs/rl/singleagent/singleagent_traffic_light_grid.py @@ -145,7 +145,8 @@ def get_non_flow_params(enter_speed, add_net_params): 'switch_time': 3.0, 'num_observed': 2, 'discrete': False, - 'tl_type': 'controlled' + #'tl_type': 'controlled' + 'tl_type': 'actuated' } additional_net_params = { diff --git a/flow/envs/traffic_light_grid.py b/flow/envs/traffic_light_grid.py index 8be0cb8a5..24f813ea7 100644 --- a/flow/envs/traffic_light_grid.py +++ b/flow/envs/traffic_light_grid.py @@ -19,7 +19,7 @@ "switch_time": 2.0, # whether the traffic lights should be actuated by sumo or RL # options are "controlled" and "actuated" - "tl_type": "controlled", + "tl_type": "actuated", # determines whether the action space is meant to be discrete or continuous "discrete": False, } From 97f75bba5894f8d575e960eebe8d8c0975965d52 Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Mon, 22 Jun 2020 13:00:26 +0300 Subject: [PATCH 20/44] update --- .../multiagent_traffic_light_grid.py | 9 +- .../singleagent_traffic_light_grid.py | 1 - examples/train.py | 174 ++++-------------- 3 files changed, 41 insertions(+), 143 deletions(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py b/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py index 256198227..636c7dfb1 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py +++ b/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py @@ -1,5 +1,6 @@ """Multi-agent traffic light example (single shared policy).""" +from ray.rllib.agents.ppo.ppo_policy import PPOTFPolicy from ray.rllib.agents.dqn.dqn_policy import DQNTFPolicy from flow.envs.multiagent import MultiTrafficLightGridPOEnv from flow.networks import TrafficLightGridNetwork @@ -88,7 +89,7 @@ "target_velocity": 50, "switch_time": 3, "num_observed": 2, - "discrete": True, + "discrete": True, # set False for DQN "tl_type": "actuated", "num_local_edges": 4, "num_local_lights": 4, @@ -140,8 +141,8 @@ def gen_policy(): - """Generate a policy in DQN.""" - return DQNTFPolicy, obs_space, act_space, {} + """Generate a policy in RLlib.""" + return PPOTFPolicy, obs_space, act_space, {} # Setup PG with a single policy graph for all agents @@ -149,7 +150,7 @@ def gen_policy(): def policy_mapping_fn(_): - """Map a policy in DQN.""" + """Map a policy in RLlib.""" return 'av' diff --git a/examples/exp_configs/rl/singleagent/singleagent_traffic_light_grid.py b/examples/exp_configs/rl/singleagent/singleagent_traffic_light_grid.py index 70f340661..aaf97b02e 100644 --- a/examples/exp_configs/rl/singleagent/singleagent_traffic_light_grid.py +++ b/examples/exp_configs/rl/singleagent/singleagent_traffic_light_grid.py @@ -145,7 +145,6 @@ def get_non_flow_params(enter_speed, add_net_params): 'switch_time': 3.0, 'num_observed': 2, 'discrete': False, - #'tl_type': 'controlled' 'tl_type': 'actuated' } diff --git a/examples/train.py b/examples/train.py index 58cd240c1..9f5211d5c 100644 --- a/examples/train.py +++ b/examples/train.py @@ -1,6 +1,6 @@ """Runner script for single and multi-agent reinforcement learning experiments. -This script performs an RL experiment using the DQN algorithm. Choice of +This script performs an RL experiment using the PPO algorithm. Choice of hyperparameters can be seen and adjusted from the code below. Usage @@ -40,9 +40,13 @@ def parse_args(args): # optional input parameters parser.add_argument( - '--rl_trainer', type=str, default="dqn", - help='the RL trainer to use. either dqn or rllib or Stable-Baselines') - + '--rl_trainer', type=str, default="rllib", + help='the RL trainer to use. either rllib or Stable-Baselines') + parser.add_argument( + '--algorithm', type=str, default="PPO", + help='RL algorithm to use. Options are PPO and DQN ' + ' right now.' + ) parser.add_argument( '--num_cpus', type=int, default=1, help='How many CPUs to use') @@ -101,9 +105,11 @@ def run_model_stablebaseline(flow_params, def setup_exps_rllib(flow_params, n_cpus, n_rollouts, + flags, policy_graphs=None, policy_mapping_fn=None, - policies_to_train=None): + policies_to_train=None, + ): """Return the relevant components of an RLlib experiment. Parameters @@ -114,6 +120,8 @@ def setup_exps_rllib(flow_params, number of CPUs to run the experiment over n_rollouts : int number of rollouts per training iteration + flags: + custom arguments policy_graphs : dict, optional TODO policy_mapping_fn : function, optional @@ -139,21 +147,32 @@ def setup_exps_rllib(flow_params, horizon = flow_params['env'].horizon - alg_run = "PPO" + alg_run = flags.algorithm.upper() agent_cls = get_agent_class(alg_run) config = deepcopy(agent_cls._default_config) - config["num_workers"] = n_cpus config["train_batch_size"] = horizon * n_rollouts - config["gamma"] = 0.999 # discount rate - config["model"].update({"fcnet_hiddens": [32, 32, 32]}) - config["use_gae"] = True - config["lambda"] = 0.97 - config["kl_target"] = 0.02 - config["num_sgd_iter"] = 10 config["horizon"] = horizon + if alg_run == "PPO": + + config["gamma"] = 0.999 # discount rate + config["model"].update({"fcnet_hiddens": [32, 32, 32]}) + config["use_gae"] = True + config["lambda"] = 0.97 + config["kl_target"] = 0.02 + config["num_sgd_iter"] = 10 + elif alg_run == "DQN": + config['clip_actions'] = False + config["timesteps_per_iteration"] = horizon * n_rollouts + # https://github.com/ray-project/ray/blob/master/rllib/tuned_examples/dqn/atari-dist-dqn.yaml + config["hiddens"] = [512] + config["lr"] = 0.0000625 + config["schedule_max_timesteps"] = 2000000 + config["buffer_size"] = 1000000 + config["target_network_update_freq"] = 8000 + # save the flow params for replay flow_json = json.dumps( flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) @@ -193,125 +212,6 @@ def train_rllib(submodule, flags): flow_params, n_cpus, n_rollouts, policy_graphs, policy_mapping_fn, policies_to_train) - ray.shutdown() - ray.init(num_cpus=n_cpus + 1, ignore_reinit_error=True, object_store_memory=200 * 1024 * 1024) - exp_config = { - "run": alg_run, - "env": gym_name, - "config": { - **config - }, - "checkpoint_freq": 20, - "checkpoint_at_end": True, - "max_failures": 999, - "stop": { - "training_iteration": flags.num_steps, - }, - } - - if flags.checkpoint_path is not None: - exp_config['restore'] = flags.checkpoint_path - run_experiments({flow_params["exp_tag"]: exp_config}) - - -def setup_exps_dqn(flow_params, - n_cpus, - n_rollouts, - policy_graphs=None, - policy_mapping_fn=None, - policies_to_train=None): - """Return the relevant components of an DQN experiment. - - Parameters - ---------- - flow_params : dict - flow-specific parameters (see flow/utils/registry.py) - n_cpus : int - number of CPUs to run the experiment over - n_rollouts : int - number of rollouts per training iteration - policy_graphs : dict, optional - TODO - policy_mapping_fn : function, optional - TODO - policies_to_train : list of str, optional - TODO - - Returns - ------- - str - name of the training algorithm - str - name of the gym environment to be trained - dict - training configuration parameters - """ - from ray import tune - from ray.tune.registry import register_env - try: - from ray.rllib.agents.agent import get_agent_class - except ImportError: - from ray.rllib.agents.registry import get_agent_class - - horizon = flow_params['env'].horizon - - alg_run = "DQN" - - agent_cls = get_agent_class(alg_run) - config = deepcopy(agent_cls._default_config) - - config["num_workers"] = n_cpus - config["train_batch_size"] = horizon * n_rollouts - config['clip_actions'] = False - config["horizon"] = horizon - config["timesteps_per_iteration"] = horizon * n_rollouts - # https://github.com/ray-project/ray/blob/master/rllib/tuned_examples/dqn/atari-dist-dqn.yaml - config["hiddens"] = [512] - config["lr"] = 0.0000625 - config["schedule_max_timesteps"] = 2000000 - config["buffer_size"] = 1000000 - config["target_network_update_freq"] = 8000 - - # save the flow params for replay - flow_json = json.dumps( - flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) - config['env_config']['flow_params'] = flow_json - config['env_config']['run'] = alg_run - - # multiagent configuration - if policy_graphs is not None: - print("policy_graphs", policy_graphs) - config['multiagent'].update({'policies': policy_graphs}) - if policy_mapping_fn is not None: - config['multiagent'].update( - {'policy_mapping_fn': tune.function(policy_mapping_fn)}) - if policies_to_train is not None: - config['multiagent'].update({'policies_to_train': policies_to_train}) - - create_env, gym_name = make_create_env(params=flow_params) - - # Register as rllib env - register_env(gym_name, create_env) - return alg_run, gym_name, config - - -def train_dqn(submodule, flags): - """Train policies using the DQN algorithm in DQN.""" - import ray - from ray.tune import run_experiments - - flow_params = submodule.flow_params - n_cpus = submodule.N_CPUS - n_rollouts = submodule.N_ROLLOUTS - policy_graphs = getattr(submodule, "POLICY_GRAPHS", None) - policy_mapping_fn = getattr(submodule, "policy_mapping_fn", None) - policies_to_train = getattr(submodule, "policies_to_train", None) - - alg_run, gym_name, config = setup_exps_dqn( - flow_params, n_cpus, n_rollouts, - policy_graphs, policy_mapping_fn, policies_to_train) - - ray.shutdown() ray.init(num_cpus=n_cpus + 1, ignore_reinit_error=True, object_store_memory=200 * 1024 * 1024) exp_config = { "run": alg_run, @@ -498,18 +398,16 @@ def main(args): multiagent = False elif hasattr(module_ma, flags.exp_config): submodule = getattr(module_ma, flags.exp_config) - assert flags.rl_trainer.lower() in ["dqn", "rllib", "h-baselines"], \ + assert flags.rl_trainer.lower() in ["rllib", "h-baselines"], \ "Currently, multiagent experiments are only supported through "\ - "DQN or RLlib. Try running this experiment using DQN or RLlib: " \ + "RLlib. Try running this experiment using RLlib: " \ "'python train.py EXP_CONFIG'" multiagent = True else: raise ValueError("Unable to find experiment config.") # Perform the training operation. - if flags.rl_trainer.lower() == "dqn": - train_dqn(submodule, flags) - elif flags.rl_trainer.lower() == "rllib": + if flags.rl_trainer.lower() == "rllib": train_rllib(submodule, flags) elif flags.rl_trainer.lower() == "stable-baselines": train_stable_baselines(submodule, flags) @@ -517,7 +415,7 @@ def main(args): flow_params = submodule.flow_params train_h_baselines(flow_params, args, multiagent) else: - raise ValueError("rl_trainer should be either 'dqn', 'rllib', 'h-baselines', " + raise ValueError("rl_trainer should be either 'rllib', 'h-baselines', " "or 'stable-baselines'.") From e7c7ea0e7848a884fa5f964d735aaf7d95671eab Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Mon, 22 Jun 2020 13:09:14 +0300 Subject: [PATCH 21/44] update --- examples/train.py | 4 +- tests/fast_tests/test_examples.py | 75 +++++++------------------------ 2 files changed, 17 insertions(+), 62 deletions(-) diff --git a/examples/train.py b/examples/train.py index 9f5211d5c..20dc00a82 100644 --- a/examples/train.py +++ b/examples/train.py @@ -44,9 +44,7 @@ def parse_args(args): help='the RL trainer to use. either rllib or Stable-Baselines') parser.add_argument( '--algorithm', type=str, default="PPO", - help='RL algorithm to use. Options are PPO and DQN ' - ' right now.' - ) + help='RL algorithm to use. Options are PPO and DQN right now.') parser.add_argument( '--num_cpus', type=int, default=1, help='How many CPUs to use') diff --git a/tests/fast_tests/test_examples.py b/tests/fast_tests/test_examples.py index 8f5df6a22..0b385f28a 100644 --- a/tests/fast_tests/test_examples.py +++ b/tests/fast_tests/test_examples.py @@ -30,7 +30,6 @@ from examples.train import parse_args as parse_train_args from examples.train import run_model_stablebaseline as run_stable_baselines_model from examples.train import setup_exps_rllib as setup_rllib_exps -from examples.train import setup_exps_dqn as setup_dqn_exps from examples.train import train_h_baselines from examples.exp_configs.non_rl.bay_bridge import flow_params as non_rl_bay_bridge @@ -169,7 +168,7 @@ def test_parse_args(self): self.assertDictEqual(vars(args), { 'exp_config': 'exp_config', - 'rl_trainer': 'dqn', + 'rl_trainer': 'rllib', 'num_cpus': 1, 'num_steps': 5000, 'rollout_size': 1000, @@ -263,6 +262,9 @@ def setUp(self): def test_singleagent_figure_eight(self): self.run_exp(singleagent_figure_eight) + def test_singleagent_traffic_light_grid(self): + self.run_exp(singleagent_traffic_light_grid) + def test_singleagent_traffic_light_grid_inflows(self): pass # FIXME @@ -327,6 +329,18 @@ def test_multiagent_merge(self): } self.run_exp(multiagent_merge, **kwargs) + def test_multi_traffic_light_grid(self): + from examples.exp_configs.rl.multiagent.multiagent_traffic_light_grid import POLICY_GRAPHS as mtlpg + from examples.exp_configs.rl.multiagent.multiagent_traffic_light_grid import POLICIES_TO_TRAIN as mtlpt + from examples.exp_configs.rl.multiagent.multiagent_traffic_light_grid import policy_mapping_fn as mtlpmf + + kwargs = { + "policy_graphs": mtlpg, + "policies_to_train": mtlpt, + "policy_mapping_fn": mtlpmf + } + self.run_exp(multiagent_traffic_light_grid, **kwargs) + def test_multi_highway(self): from examples.exp_configs.rl.multiagent.multiagent_highway import POLICY_GRAPHS as mhpg from examples.exp_configs.rl.multiagent.multiagent_highway import POLICIES_TO_TRAIN as mhpt @@ -420,63 +434,6 @@ def run_exp(flow_params, **kwargs): }) -class TestDQNExamples(unittest.TestCase): - """Tests the example traffic light scripts in examples/exp_configs/rl/singleagent and - examples/exp_configs/rl/multiagent for DQN. - - This is done by running each experiment in that folder for five time-steps - and confirming that it completes one rollout with two workers. - # FIXME(ev) this test adds several minutes to the testing scheme - """ - def setUp(self): - if not ray.is_initialized(): - ray.init(num_cpus=1) - - def test_singleagent_traffic_light_grid(self): - self.run_exp(singleagent_traffic_light_grid) - - def test_multi_traffic_light_grid(self): - from examples.exp_configs.rl.multiagent.multiagent_traffic_light_grid import POLICY_GRAPHS as mtlpg - from examples.exp_configs.rl.multiagent.multiagent_traffic_light_grid import POLICIES_TO_TRAIN as mtlpt - from examples.exp_configs.rl.multiagent.multiagent_traffic_light_grid import policy_mapping_fn as mtlpmf - - kwargs = { - "policy_graphs": mtlpg, - "policies_to_train": mtlpt, - "policy_mapping_fn": mtlpmf - } - self.run_exp(multiagent_traffic_light_grid, **kwargs) - - @staticmethod - def run_exp(flow_params, **kwargs): - alg_run, env_name, config = setup_dqn_exps(flow_params, 1, 1, **kwargs) - - try: - ray.init(num_cpus=1) - except Exception as e: - print("ERROR", e) - config['train_batch_size'] = 50 - config['horizon'] = 50 - config['sample_batch_size'] = 50 - config['num_workers'] = 0 - config['sgd_minibatch_size'] = 32 - - run_experiments({ - 'test': { - 'run': alg_run, - 'env': env_name, - 'config': { - **config - }, - - 'checkpoint_freq': 1, - 'stop': { - 'training_iteration': 1, - }, - } - }) - - if __name__ == '__main__': try: ray.init(num_cpus=1) From bdb9ccc70be3399fca6577b4156133702ebd6ce6 Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Mon, 22 Jun 2020 13:11:05 +0300 Subject: [PATCH 22/44] update --- .../exp_configs/rl/multiagent/multiagent_traffic_light_grid.py | 2 +- .../rl/singleagent/singleagent_traffic_light_grid.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py b/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py index 636c7dfb1..7fbdaf703 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py +++ b/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py @@ -9,7 +9,7 @@ from flow.controllers import SimCarFollowingController, GridRouter from ray.tune.registry import register_env from flow.utils.registry import make_create_env -from flow.core.params import TrafficLightParams + # Experiment parameters N_ROLLOUTS = 63 # number of rollouts per training iteration N_CPUS = 63 # number of parallel workers diff --git a/examples/exp_configs/rl/singleagent/singleagent_traffic_light_grid.py b/examples/exp_configs/rl/singleagent/singleagent_traffic_light_grid.py index aaf97b02e..53a474452 100644 --- a/examples/exp_configs/rl/singleagent/singleagent_traffic_light_grid.py +++ b/examples/exp_configs/rl/singleagent/singleagent_traffic_light_grid.py @@ -144,7 +144,7 @@ def get_non_flow_params(enter_speed, add_net_params): 'target_velocity': 50, 'switch_time': 3.0, 'num_observed': 2, - 'discrete': False, + 'discrete': False, # set True for DQN 'tl_type': 'actuated' } From c36d2235d03d570f61687f42808256eb737d1382 Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Mon, 22 Jun 2020 13:13:31 +0300 Subject: [PATCH 23/44] typo --- .../exp_configs/rl/multiagent/multiagent_traffic_light_grid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py b/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py index 7fbdaf703..017ebd649 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py +++ b/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py @@ -89,7 +89,7 @@ "target_velocity": 50, "switch_time": 3, "num_observed": 2, - "discrete": True, # set False for DQN + "discrete": False, # set True for DQN "tl_type": "actuated", "num_local_edges": 4, "num_local_lights": 4, From 720bcad4710e4b2947fc5ea0c5c7cbbb0c50feec Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Mon, 22 Jun 2020 13:31:58 +0300 Subject: [PATCH 24/44] update --- .../multiagent_traffic_light_grid.py | 5 +++-- examples/train.py | 18 ++++++++++-------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py b/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py index 017ebd649..cf556c231 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py +++ b/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py @@ -89,7 +89,7 @@ "target_velocity": 50, "switch_time": 3, "num_observed": 2, - "discrete": False, # set True for DQN + "discrete": True, # set True for DQN "tl_type": "actuated", "num_local_edges": 4, "num_local_lights": 4, @@ -142,7 +142,8 @@ def gen_policy(): """Generate a policy in RLlib.""" - return PPOTFPolicy, obs_space, act_space, {} + #return PPOTFPolicy, obs_space, act_space, {} + return DQNTFPolicy, obs_space, act_space, {} # Setup PG with a single policy graph for all agents diff --git a/examples/train.py b/examples/train.py index 20dc00a82..d2d2ece50 100644 --- a/examples/train.py +++ b/examples/train.py @@ -138,6 +138,7 @@ def setup_exps_rllib(flow_params, """ from ray import tune from ray.tune.registry import register_env + from ray.rllib.env.group_agents_wrapper import _GroupAgentsWrapper try: from ray.rllib.agents.agent import get_agent_class except ImportError: @@ -147,14 +148,9 @@ def setup_exps_rllib(flow_params, alg_run = flags.algorithm.upper() - agent_cls = get_agent_class(alg_run) - config = deepcopy(agent_cls._default_config) - config["num_workers"] = n_cpus - config["train_batch_size"] = horizon * n_rollouts - config["horizon"] = horizon - if alg_run == "PPO": - + agent_cls = get_agent_class(alg_run) + config = deepcopy(agent_cls._default_config) config["gamma"] = 0.999 # discount rate config["model"].update({"fcnet_hiddens": [32, 32, 32]}) config["use_gae"] = True @@ -162,6 +158,8 @@ def setup_exps_rllib(flow_params, config["kl_target"] = 0.02 config["num_sgd_iter"] = 10 elif alg_run == "DQN": + agent_cls = get_agent_class(alg_run) + config = deepcopy(agent_cls._default_config) config['clip_actions'] = False config["timesteps_per_iteration"] = horizon * n_rollouts # https://github.com/ray-project/ray/blob/master/rllib/tuned_examples/dqn/atari-dist-dqn.yaml @@ -171,6 +169,10 @@ def setup_exps_rllib(flow_params, config["buffer_size"] = 1000000 config["target_network_update_freq"] = 8000 + config["num_workers"] = n_cpus + config["train_batch_size"] = horizon * n_rollouts + config["horizon"] = horizon + # save the flow params for replay flow_json = json.dumps( flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) @@ -207,7 +209,7 @@ def train_rllib(submodule, flags): policies_to_train = getattr(submodule, "policies_to_train", None) alg_run, gym_name, config = setup_exps_rllib( - flow_params, n_cpus, n_rollouts, + flow_params, n_cpus, n_rollouts, flags, policy_graphs, policy_mapping_fn, policies_to_train) ray.init(num_cpus=n_cpus + 1, ignore_reinit_error=True, object_store_memory=200 * 1024 * 1024) From 91a0a0ba5af451496a7b35c6851e6a8842c895f3 Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Mon, 22 Jun 2020 13:36:25 +0300 Subject: [PATCH 25/44] Update singleagent_traffic_light_grid.py --- .../rl/singleagent/singleagent_traffic_light_grid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/exp_configs/rl/singleagent/singleagent_traffic_light_grid.py b/examples/exp_configs/rl/singleagent/singleagent_traffic_light_grid.py index 53a474452..11f94023d 100644 --- a/examples/exp_configs/rl/singleagent/singleagent_traffic_light_grid.py +++ b/examples/exp_configs/rl/singleagent/singleagent_traffic_light_grid.py @@ -144,7 +144,7 @@ def get_non_flow_params(enter_speed, add_net_params): 'target_velocity': 50, 'switch_time': 3.0, 'num_observed': 2, - 'discrete': False, # set True for DQN + 'discrete': False, # set True for DQN 'tl_type': 'actuated' } From 50be0ae772acfd7730249def4afd40956e846ef0 Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Mon, 22 Jun 2020 13:45:38 +0300 Subject: [PATCH 26/44] Update multiagent_traffic_light_grid.py --- .../exp_configs/rl/multiagent/multiagent_traffic_light_grid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py b/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py index cf556c231..ae6a4040a 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py +++ b/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py @@ -142,7 +142,7 @@ def gen_policy(): """Generate a policy in RLlib.""" - #return PPOTFPolicy, obs_space, act_space, {} + # return PPOTFPolicy, obs_space, act_space, {} return DQNTFPolicy, obs_space, act_space, {} From 6303be2bdeb735cb3a57e4fedc7a4ba64bb07b3b Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Tue, 23 Jun 2020 21:59:10 +0300 Subject: [PATCH 27/44] Update train.py --- examples/train.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/train.py b/examples/train.py index d2d2ece50..529875dc5 100644 --- a/examples/train.py +++ b/examples/train.py @@ -138,7 +138,6 @@ def setup_exps_rllib(flow_params, """ from ray import tune from ray.tune.registry import register_env - from ray.rllib.env.group_agents_wrapper import _GroupAgentsWrapper try: from ray.rllib.agents.agent import get_agent_class except ImportError: From 658b9cb735f5a0834eacc49cce6c8e0ef58ec9fc Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Tue, 23 Jun 2020 22:01:26 +0300 Subject: [PATCH 28/44] Update multiagent_traffic_light_grid.py --- .../rl/multiagent/multiagent_traffic_light_grid.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py b/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py index ae6a4040a..589206319 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py +++ b/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py @@ -1,7 +1,7 @@ """Multi-agent traffic light example (single shared policy).""" from ray.rllib.agents.ppo.ppo_policy import PPOTFPolicy -from ray.rllib.agents.dqn.dqn_policy import DQNTFPolicy +# from ray.rllib.agents.dqn.dqn_policy import DQNTFPolicy from flow.envs.multiagent import MultiTrafficLightGridPOEnv from flow.networks import TrafficLightGridNetwork from flow.core.params import SumoParams, EnvParams, InitialConfig, NetParams @@ -89,7 +89,7 @@ "target_velocity": 50, "switch_time": 3, "num_observed": 2, - "discrete": True, # set True for DQN + "discrete": False, # set True for DQN "tl_type": "actuated", "num_local_edges": 4, "num_local_lights": 4, @@ -142,8 +142,8 @@ def gen_policy(): """Generate a policy in RLlib.""" - # return PPOTFPolicy, obs_space, act_space, {} - return DQNTFPolicy, obs_space, act_space, {} + return PPOTFPolicy, obs_space, act_space, {} + # return DQNTFPolicy, obs_space, act_space, {} # Setup PG with a single policy graph for all agents From 4984ffc3623237be502dfbbb3a66f0ef09150480 Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Tue, 23 Jun 2020 22:38:49 +0300 Subject: [PATCH 29/44] rm flow-project from travis.yml --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 297281bc7..82c9a7283 100644 --- a/.travis.yml +++ b/.travis.yml @@ -41,13 +41,13 @@ before_install: - source activate flow # [sumo] dependencies and binaries - - pushd $HOME/build/flow-project + - pushd $HOME/build - ./flow/scripts/setup_sumo_ubuntu1604.sh - popd - source ~/.bashrc # [aimsun] install the conda env and update the path to the env - - pushd $HOME/build/flow-project + - pushd $HOME/build - ./flow/scripts/setup_aimsun.sh - popd - source ~/.bashrc From 2535b0c7bac5a88fb652eaabb1bea1a875cf6ac4 Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Tue, 8 Sep 2020 13:06:36 +0300 Subject: [PATCH 30/44] Update .travis.yml --- .travis.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 82c9a7283..fd98761f7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -41,13 +41,11 @@ before_install: - source activate flow # [sumo] dependencies and binaries - - pushd $HOME/build - ./flow/scripts/setup_sumo_ubuntu1604.sh - popd - source ~/.bashrc # [aimsun] install the conda env and update the path to the env - - pushd $HOME/build - ./flow/scripts/setup_aimsun.sh - popd - source ~/.bashrc From 95d9182f503bf5c9034bd365066e49bd2605adea Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Tue, 8 Sep 2020 13:07:33 +0300 Subject: [PATCH 31/44] Update multiagent_traffic_light_grid.py --- .../exp_configs/rl/multiagent/multiagent_traffic_light_grid.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py b/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py index 589206319..44dec14c1 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py +++ b/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py @@ -143,7 +143,6 @@ def gen_policy(): """Generate a policy in RLlib.""" return PPOTFPolicy, obs_space, act_space, {} - # return DQNTFPolicy, obs_space, act_space, {} # Setup PG with a single policy graph for all agents From b3876467df47dfea5f4ca1dbfd6cefcbd5738d0b Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Tue, 8 Sep 2020 13:08:48 +0300 Subject: [PATCH 32/44] Update train.py --- examples/train.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/train.py b/examples/train.py index 0a20ff8e1..be72c34eb 100644 --- a/examples/train.py +++ b/examples/train.py @@ -106,8 +106,7 @@ def setup_exps_rllib(flow_params, flags, policy_graphs=None, policy_mapping_fn=None, - policies_to_train=None, - ): + policies_to_train=None): """Return the relevant components of an RLlib experiment. Parameters From c0d41cf1ebfd7d0f8a66ea4c2c4aca058ab8c236 Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Fri, 18 Dec 2020 17:48:52 +0200 Subject: [PATCH 33/44] Update README.md --- README.md | 45 ++++++++++----------------------------------- 1 file changed, 10 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index 7d37223c5..1adc1e0e6 100644 --- a/README.md +++ b/README.md @@ -1,45 +1,20 @@ - +# Decentralized Reinforcement Learning Traffic Light Control -[![Build Status](https://travis-ci.com/flow-project/flow.svg?branch=master)](https://travis-ci.com/flow-project/flow) -[![Docs](https://readthedocs.org/projects/flow/badge)](http://flow.readthedocs.org/en/latest/) -[![Coverage Status](https://coveralls.io/repos/github/flow-project/flow/badge.svg?branch=master)](https://coveralls.io/github/flow-project/flow?branch=master) -[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/flow-project/flow/binder) -[![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/flow-project/flow/blob/master/LICENSE.md) +DQN branch contains the code for multi-agent DQN controlled intelligent traffic lights. -# Flow +Currently DQN branch has been approved by the original Flow community and waiting to be merged. -[Flow](https://flow-project.github.io/) is a computational framework for deep RL and control experiments for traffic microsimulation. +For detailed changes compared to original Flow code, please refer to the [PR](https://github.com/flow-project/flow/pull/964) -See [our website](https://flow-project.github.io/) for more information on the application of Flow to several mixed-autonomy traffic scenarios. Other [results and videos](https://sites.google.com/view/ieee-tro-flow/home) are available as well. +# Citing -# More information +For more theoretical details such as optima proof, system design, and performance comparison, please refer and cite our paper: -- [Documentation](https://flow.readthedocs.org/en/latest/) -- [Installation instructions](http://flow.readthedocs.io/en/latest/flow_setup.html) -- [Tutorials](https://github.com/flow-project/flow/tree/master/tutorials) -- [Binder Build (beta)](https://mybinder.org/v2/gh/flow-project/flow/binder) +P. Zhou, X. Chen, Z. Liu, T. Braud, P. Hui and J. Kangasharju, "DRLE: Decentralized Reinforcement Learning at the Edge for Traffic Light Control in the IoV," in IEEE Transactions on Intelligent Transportation Systems, doi: 10.1109/TITS.2020.3035841. -# Technical questions +or -If you have a bug, please report it. Otherwise, join the [Flow Users group](https://join.slack.com/t/flow-users/shared_invite/enQtODQ0NDYxMTQyNDY2LTY1ZDVjZTljM2U0ODIxNTY5NTQ2MmUxMzYzNzc5NzU4ZTlmNGI2ZjFmNGU4YjVhNzE3NjcwZTBjNzIxYTg5ZmY) on Slack! +Zhou, P., Chen, X., Liu, Z., Braud, T., Hui, P. and Kangasharju, J., 2020. DRLE: Decentralized Reinforcement Learning at the Edge for Traffic Light Control. arXiv preprint arXiv:2009.01502. -# Getting involved -We welcome your contributions. - -- Please report bugs and improvements by submitting [GitHub issue](https://github.com/flow-project/flow/issues). -- Submit your contributions using [pull requests](https://github.com/flow-project/flow/pulls). Please use [this template](https://github.com/flow-project/flow/blob/master/.github/PULL_REQUEST_TEMPLATE.md) for your pull requests. - -# Citing Flow - -If you use Flow for academic research, you are highly encouraged to cite our paper: - -C. Wu, A. Kreidieh, K. Parvate, E. Vinitsky, A. Bayen, "Flow: Architecture and Benchmarking for Reinforcement Learning in Traffic Control," CoRR, vol. abs/1710.05465, 2017. [Online]. Available: https://arxiv.org/abs/1710.05465 - -If you use the benchmarks, you are highly encouraged to cite our paper: - -Vinitsky, E., Kreidieh, A., Le Flem, L., Kheterpal, N., Jang, K., Wu, F., ... & Bayen, A. M, Benchmarks for reinforcement learning in mixed-autonomy traffic. In Conference on Robot Learning (pp. 399-409). Available: http://proceedings.mlr.press/v87/vinitsky18a.html - -# Contributors - -Flow is supported by the [Mobile Sensing Lab](http://bayen.eecs.berkeley.edu/) at UC Berkeley and Amazon AWS Machine Learning research grants. The contributors are listed in [Flow Team Page](https://flow-project.github.io/team.html). +Original instructions, please refer to [Flow](https://flow-project.github.io/) From 00c5c6c3de8598feb89aabaa7699784c51575528 Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Fri, 18 Dec 2020 17:49:26 +0200 Subject: [PATCH 34/44] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1adc1e0e6..2f66265b2 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ For detailed changes compared to original Flow code, please refer to the [PR](ht # Citing -For more theoretical details such as optima proof, system design, and performance comparison, please refer and cite our paper: +For more theoretical details such as optima proof, system design, and performance comparison, please refer and cite our [paper](https://arxiv.org/pdf/2009.01502.pdf): P. Zhou, X. Chen, Z. Liu, T. Braud, P. Hui and J. Kangasharju, "DRLE: Decentralized Reinforcement Learning at the Edge for Traffic Light Control in the IoV," in IEEE Transactions on Intelligent Transportation Systems, doi: 10.1109/TITS.2020.3035841. From 85a84e4d472af07d86378d143723100f6b91e7d2 Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Sat, 19 Dec 2020 02:07:53 +0200 Subject: [PATCH 35/44] Set theme jekyll-theme-midnight --- _config.yml | 1 + 1 file changed, 1 insertion(+) create mode 100644 _config.yml diff --git a/_config.yml b/_config.yml new file mode 100644 index 000000000..18854876c --- /dev/null +++ b/_config.yml @@ -0,0 +1 @@ +theme: jekyll-theme-midnight \ No newline at end of file From ef777f737496bb4bcded1d4e457fd3508b4960a8 Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Sat, 19 Dec 2020 02:08:27 +0200 Subject: [PATCH 36/44] Set theme jekyll-theme-midnight From 04631ad00cd2ea2389006f831b38f1ee7255828c Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Sat, 19 Dec 2020 02:10:58 +0200 Subject: [PATCH 37/44] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 2f66265b2..c17f4bf38 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,8 @@ Currently DQN branch has been approved by the original Flow community and waitin For detailed changes compared to original Flow code, please refer to the [PR](https://github.com/flow-project/flow/pull/964) +A quick [Demo](https://youtu.be/p2sMtN_mW8s) + # Citing For more theoretical details such as optima proof, system design, and performance comparison, please refer and cite our [paper](https://arxiv.org/pdf/2009.01502.pdf): From bdf80f2496b81307a5d4f4a2077a862cd0ced757 Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Sat, 19 Dec 2020 02:14:08 +0200 Subject: [PATCH 38/44] Set theme jekyll-theme-cayman --- _config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_config.yml b/_config.yml index 18854876c..c4192631f 100644 --- a/_config.yml +++ b/_config.yml @@ -1 +1 @@ -theme: jekyll-theme-midnight \ No newline at end of file +theme: jekyll-theme-cayman \ No newline at end of file From 3b998de416be3f5795fa09ab65524371cac976b0 Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Sat, 19 Dec 2020 02:28:08 +0200 Subject: [PATCH 39/44] Update README.md --- README.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/README.md b/README.md index c17f4bf38..afd498947 100644 --- a/README.md +++ b/README.md @@ -14,9 +14,5 @@ For more theoretical details such as optima proof, system design, and performanc P. Zhou, X. Chen, Z. Liu, T. Braud, P. Hui and J. Kangasharju, "DRLE: Decentralized Reinforcement Learning at the Edge for Traffic Light Control in the IoV," in IEEE Transactions on Intelligent Transportation Systems, doi: 10.1109/TITS.2020.3035841. -or - -Zhou, P., Chen, X., Liu, Z., Braud, T., Hui, P. and Kangasharju, J., 2020. DRLE: Decentralized Reinforcement Learning at the Edge for Traffic Light Control. arXiv preprint arXiv:2009.01502. - Original instructions, please refer to [Flow](https://flow-project.github.io/) From c9a7377ea9f7fb6ebe481528e642701fd7ae092b Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Sat, 19 Dec 2020 02:53:16 +0200 Subject: [PATCH 40/44] Update README.md --- README.md | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index afd498947..b4738081f 100644 --- a/README.md +++ b/README.md @@ -6,13 +6,20 @@ Currently DQN branch has been approved by the original Flow community and waitin For detailed changes compared to original Flow code, please refer to the [PR](https://github.com/flow-project/flow/pull/964) -A quick [Demo](https://youtu.be/p2sMtN_mW8s) - # Citing For more theoretical details such as optima proof, system design, and performance comparison, please refer and cite our [paper](https://arxiv.org/pdf/2009.01502.pdf): - -P. Zhou, X. Chen, Z. Liu, T. Braud, P. Hui and J. Kangasharju, "DRLE: Decentralized Reinforcement Learning at the Edge for Traffic Light Control in the IoV," in IEEE Transactions on Intelligent Transportation Systems, doi: 10.1109/TITS.2020.3035841. +``` +@ARTICLE{9275391, + author={P. {Zhou} and X. {Chen} and Z. {Liu} and T. {Braud} and P. {Hui} and J. {Kangasharju}}, + journal={IEEE Transactions on Intelligent Transportation Systems}, + title={DRLE: Decentralized Reinforcement Learning at the Edge for Traffic Light Control in the IoV}, + year={2020}, + doi={10.1109/TITS.2020.3035841}} +``` + +A quick Demo: +[![Video](https://i9.ytimg.com/vi/p2sMtN_mW8s/maxresdefault.jpg?time=1608339000000&sqp=CLic9f4F&rs=AOn4CLDlfAcq4ONYwic9lK3Bx7MDsLbq1A)](https://youtu.be/p2sMtN_mW8s) Original instructions, please refer to [Flow](https://flow-project.github.io/) From d0ab1b07fd23f29bae1f0f2e2db97a8355a0eb7c Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Sat, 19 Dec 2020 06:46:48 +0200 Subject: [PATCH 41/44] Update README.md --- README.md | 54 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index b4738081f..7d37223c5 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,45 @@ -# Decentralized Reinforcement Learning Traffic Light Control + -DQN branch contains the code for multi-agent DQN controlled intelligent traffic lights. +[![Build Status](https://travis-ci.com/flow-project/flow.svg?branch=master)](https://travis-ci.com/flow-project/flow) +[![Docs](https://readthedocs.org/projects/flow/badge)](http://flow.readthedocs.org/en/latest/) +[![Coverage Status](https://coveralls.io/repos/github/flow-project/flow/badge.svg?branch=master)](https://coveralls.io/github/flow-project/flow?branch=master) +[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/flow-project/flow/binder) +[![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/flow-project/flow/blob/master/LICENSE.md) -Currently DQN branch has been approved by the original Flow community and waiting to be merged. +# Flow -For detailed changes compared to original Flow code, please refer to the [PR](https://github.com/flow-project/flow/pull/964) +[Flow](https://flow-project.github.io/) is a computational framework for deep RL and control experiments for traffic microsimulation. -# Citing +See [our website](https://flow-project.github.io/) for more information on the application of Flow to several mixed-autonomy traffic scenarios. Other [results and videos](https://sites.google.com/view/ieee-tro-flow/home) are available as well. -For more theoretical details such as optima proof, system design, and performance comparison, please refer and cite our [paper](https://arxiv.org/pdf/2009.01502.pdf): -``` -@ARTICLE{9275391, - author={P. {Zhou} and X. {Chen} and Z. {Liu} and T. {Braud} and P. {Hui} and J. {Kangasharju}}, - journal={IEEE Transactions on Intelligent Transportation Systems}, - title={DRLE: Decentralized Reinforcement Learning at the Edge for Traffic Light Control in the IoV}, - year={2020}, - doi={10.1109/TITS.2020.3035841}} -``` +# More information -A quick Demo: -[![Video](https://i9.ytimg.com/vi/p2sMtN_mW8s/maxresdefault.jpg?time=1608339000000&sqp=CLic9f4F&rs=AOn4CLDlfAcq4ONYwic9lK3Bx7MDsLbq1A)](https://youtu.be/p2sMtN_mW8s) +- [Documentation](https://flow.readthedocs.org/en/latest/) +- [Installation instructions](http://flow.readthedocs.io/en/latest/flow_setup.html) +- [Tutorials](https://github.com/flow-project/flow/tree/master/tutorials) +- [Binder Build (beta)](https://mybinder.org/v2/gh/flow-project/flow/binder) +# Technical questions -Original instructions, please refer to [Flow](https://flow-project.github.io/) +If you have a bug, please report it. Otherwise, join the [Flow Users group](https://join.slack.com/t/flow-users/shared_invite/enQtODQ0NDYxMTQyNDY2LTY1ZDVjZTljM2U0ODIxNTY5NTQ2MmUxMzYzNzc5NzU4ZTlmNGI2ZjFmNGU4YjVhNzE3NjcwZTBjNzIxYTg5ZmY) on Slack! + +# Getting involved + +We welcome your contributions. + +- Please report bugs and improvements by submitting [GitHub issue](https://github.com/flow-project/flow/issues). +- Submit your contributions using [pull requests](https://github.com/flow-project/flow/pulls). Please use [this template](https://github.com/flow-project/flow/blob/master/.github/PULL_REQUEST_TEMPLATE.md) for your pull requests. + +# Citing Flow + +If you use Flow for academic research, you are highly encouraged to cite our paper: + +C. Wu, A. Kreidieh, K. Parvate, E. Vinitsky, A. Bayen, "Flow: Architecture and Benchmarking for Reinforcement Learning in Traffic Control," CoRR, vol. abs/1710.05465, 2017. [Online]. Available: https://arxiv.org/abs/1710.05465 + +If you use the benchmarks, you are highly encouraged to cite our paper: + +Vinitsky, E., Kreidieh, A., Le Flem, L., Kheterpal, N., Jang, K., Wu, F., ... & Bayen, A. M, Benchmarks for reinforcement learning in mixed-autonomy traffic. In Conference on Robot Learning (pp. 399-409). Available: http://proceedings.mlr.press/v87/vinitsky18a.html + +# Contributors + +Flow is supported by the [Mobile Sensing Lab](http://bayen.eecs.berkeley.edu/) at UC Berkeley and Amazon AWS Machine Learning research grants. The contributors are listed in [Flow Team Page](https://flow-project.github.io/team.html). From 1172cb3440563b173f252a1d9298a0d0db32457e Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Mon, 21 Dec 2020 01:48:55 +0200 Subject: [PATCH 42/44] Update .travis.yml --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index fd98761f7..297281bc7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -41,11 +41,13 @@ before_install: - source activate flow # [sumo] dependencies and binaries + - pushd $HOME/build/flow-project - ./flow/scripts/setup_sumo_ubuntu1604.sh - popd - source ~/.bashrc # [aimsun] install the conda env and update the path to the env + - pushd $HOME/build/flow-project - ./flow/scripts/setup_aimsun.sh - popd - source ~/.bashrc From c01258b20b9515689230f9ccc89874a3589d6360 Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Mon, 21 Dec 2020 01:51:50 +0200 Subject: [PATCH 43/44] Delete _config.yml --- _config.yml | 1 - 1 file changed, 1 deletion(-) delete mode 100644 _config.yml diff --git a/_config.yml b/_config.yml deleted file mode 100644 index c4192631f..000000000 --- a/_config.yml +++ /dev/null @@ -1 +0,0 @@ -theme: jekyll-theme-cayman \ No newline at end of file From 1282c67a6d114f08aa38f44e12ac0465a70bc3eb Mon Sep 17 00:00:00 2001 From: Pengyuan Zhou Date: Mon, 21 Dec 2020 01:52:18 +0200 Subject: [PATCH 44/44] Update multiagent_traffic_light_grid.py --- .../exp_configs/rl/multiagent/multiagent_traffic_light_grid.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py b/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py index 44dec14c1..88c412946 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py +++ b/examples/exp_configs/rl/multiagent/multiagent_traffic_light_grid.py @@ -1,7 +1,6 @@ """Multi-agent traffic light example (single shared policy).""" from ray.rllib.agents.ppo.ppo_policy import PPOTFPolicy -# from ray.rllib.agents.dqn.dqn_policy import DQNTFPolicy from flow.envs.multiagent import MultiTrafficLightGridPOEnv from flow.networks import TrafficLightGridNetwork from flow.core.params import SumoParams, EnvParams, InitialConfig, NetParams