Skip to content

Commit

Permalink
Add an option for a local reward that just computes speed of the AV a…
Browse files Browse the repository at this point in the history
…nd its follower
  • Loading branch information
eugenevinitsky committed Mar 29, 2020
1 parent 91144ca commit 942fe56
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 33 deletions.
2 changes: 1 addition & 1 deletion examples/exp_configs/non_rl/i210_subnetwork.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@
edge_id = "119257908#1-AddedOnRampEdge"
custom_callables = {
"avg_merge_speed": lambda env: np.nan_to_num(np.mean(
env.k.vehicle.get_speed(env.k.vehicle.get_ids_by_edge(edge_id)))),
env.k.vehicle.get_speed(env.k.vehicle.get_ids()))),
"avg_outflow": lambda env: np.nan_to_num(
env.k.vehicle.get_outflow_rate(120)),
# we multiply by 5 to account for the vehicle length and by 1000 to convert
Expand Down
1 change: 0 additions & 1 deletion examples/exp_configs/rl/multiagent/multiagent_i210.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
"""
import os

from ray.rllib.agents.ppo.ppo_policy import PPOTFPolicy
from ray.tune.registry import register_env

from flow.controllers import RLController
Expand Down
4 changes: 2 additions & 2 deletions examples/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@

import ray
from ray import tune
from ray.tune import run_experiments
from ray.tune.registry import register_env
try:
from ray.rllib.agents.agent import get_agent_class
Expand All @@ -36,9 +35,9 @@
from flow.utils.registry import make_create_env



def parse_args(args):
"""Parse training options user can specify in command line.
Returns
-------
argparse.Namespace
Expand Down Expand Up @@ -140,6 +139,7 @@ def setup_exps_rllib(flow_params,
policies_to_train=None,
):
"""Return the relevant components of an RLlib experiment.
Parameters
----------
flow_params : dict
Expand Down
72 changes: 43 additions & 29 deletions flow/envs/multiagent/i210.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
"max_decel": 1,
# whether we use an obs space that contains adjacent lane info or just the lead obs
"lead_obs": True,
# whether the reward should come from local vehicles instead of global rewards
"local_reward": True
}


Expand Down Expand Up @@ -137,35 +139,47 @@ def compute_reward(self, rl_actions, **kwargs):
return {}

rewards = {}
for rl_id in self.k.vehicle.get_rl_ids():
if self.env_params.evaluate:
# reward is speed of vehicle if we are in evaluation mode
reward = self.k.vehicle.get_speed(rl_id)
elif kwargs['fail']:
# reward is 0 if a collision occurred
reward = 0
else:
# reward high system-level velocities
cost1 = average_velocity(self, fail=kwargs['fail'])

# penalize small time headways
cost2 = 0
t_min = 1 # smallest acceptable time headway

lead_id = self.k.vehicle.get_leader(rl_id)
if lead_id not in ["", None] \
and self.k.vehicle.get_speed(rl_id) > 0:
t_headway = max(
self.k.vehicle.get_headway(rl_id) /
self.k.vehicle.get_speed(rl_id), 0)
cost2 += min((t_headway - t_min) / t_min, 0)

# weights for cost1, cost2, and cost3, respectively
eta1, eta2 = 1.00, 0.10

reward = max(eta1 * cost1 + eta2 * cost2, 0)

rewards[rl_id] = reward
if self.env_params.additional_params["local_reward"]:
for rl_id in self.k.vehicle.get_rl_ids():
rewards[rl_id] = 0
speeds = []
follow_speed = self.k.vehicle.get_speed(self.k.vehicle.get_follower(rl_id))
speeds.extend([speed for speed in follow_speed if speed >= 0])
if self.k.vehicle.get_speed(rl_id) >= 0:
speeds.append(self.k.vehicle.get_speed(rl_id))
if len(speeds) > 0:
# rescale so the q function can estimate it quickly
rewards[rl_id] = np.mean(speeds) / 500.0
else:
for rl_id in self.k.vehicle.get_rl_ids():
if self.env_params.evaluate:
# reward is speed of vehicle if we are in evaluation mode
reward = self.k.vehicle.get_speed(rl_id)
elif kwargs['fail']:
# reward is 0 if a collision occurred
reward = 0
else:
# reward high system-level velocities
cost1 = average_velocity(self, fail=kwargs['fail'])

# penalize small time headways
cost2 = 0
t_min = 1 # smallest acceptable time headway

lead_id = self.k.vehicle.get_leader(rl_id)
if lead_id not in ["", None] \
and self.k.vehicle.get_speed(rl_id) > 0:
t_headway = max(
self.k.vehicle.get_headway(rl_id) /
self.k.vehicle.get_speed(rl_id), 0)
cost2 += min((t_headway - t_min) / t_min, 0)

# weights for cost1, cost2, and cost3, respectively
eta1, eta2 = 1.00, 0.10

reward = max(eta1 * cost1 + eta2 * cost2, 0)

rewards[rl_id] = reward
return rewards

def additional_command(self):
Expand Down

0 comments on commit 942fe56

Please sign in to comment.