ai4co
diff --git a/‎rl4co/envs/__init__.py‎
Lines changed: 7 additions & 3 deletions b/‎rl4co/envs/__init__.py‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎rl4co/envs/routing/__init__.py‎
Lines changed: 3 additions & 0 deletions b/‎rl4co/envs/routing/__init__.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎rl4co/envs/routing/cvrpmvc/env.py‎
Lines changed: 101 additions & 0 deletions b/‎rl4co/envs/routing/cvrpmvc/env.py‎
Lines changed: 101 additions & 0 deletions
diff --git a/‎rl4co/envs/routing/shpp/env.py‎
Lines changed: 188 additions & 0 deletions b/‎rl4co/envs/routing/shpp/env.py‎
Lines changed: 188 additions & 0 deletions
@@ -4,10 +4,14 @@
 # EDA
 from rl4co.envs.eda import DPPEnv, MDPPEnv
 
+# Graph
+from rl4co.envs.graph import FLPEnv, MCPEnv
+
 # Routing
 from rl4co.envs.routing import (
     ATSPEnv,
     CVRPEnv,
+    CVRPMVCEnv,
     CVRPTWEnv,
     DenseRewardTSPEnv,
     MDCPDPEnv,
@@ -18,6 +22,7 @@
     PDPEnv,
     PDPRuinRepairEnv,
     SDVRPEnv,
+    SHPPEnv,
     SPCTSPEnv,
     SVRPEnv,
     TSPEnv,
@@ -28,14 +33,12 @@
 from rl4co.envs.scheduling import FFSPEnv, FJSPEnv, SMTWTPEnv
 from rl4co.envs.scheduling.jssp.env import JSSPEnv
 
-# Graph
-from rl4co.envs.graph import MCPEnv, FLPEnv
-
 # Register environments
 ENV_REGISTRY = {
     "atsp": ATSPEnv,
     "cvrp": CVRPEnv,
     "cvrptw": CVRPTWEnv,
+    "cvrpmvc": CVRPMVCEnv,
     "dpp": DPPEnv,
     "ffsp": FFSPEnv,
     "jssp": JSSPEnv,
@@ -47,6 +50,7 @@
     "pdp": PDPEnv,
     "pdp_ruin_repair": PDPRuinRepairEnv,
     "sdvrp": SDVRPEnv,
+    "shpp": SHPPEnv,
     "svrp": SVRPEnv,
     "spctsp": SPCTSPEnv,
     "tsp": TSPEnv,
 
@@ -2,6 +2,7 @@
 from rl4co.envs.routing.atsp.generator import ATSPGenerator
 from rl4co.envs.routing.cvrp.env import CVRPEnv
 from rl4co.envs.routing.cvrp.generator import CVRPGenerator
+from rl4co.envs.routing.cvrpmvc.env import CVRPMVCEnv
 from rl4co.envs.routing.cvrptw.env import CVRPTWEnv
 from rl4co.envs.routing.cvrptw.generator import CVRPTWGenerator
 from rl4co.envs.routing.mdcpdp.env import MDCPDPEnv
@@ -17,6 +18,8 @@
 from rl4co.envs.routing.pdp.env import PDPEnv, PDPRuinRepairEnv
 from rl4co.envs.routing.pdp.generator import PDPGenerator
 from rl4co.envs.routing.sdvrp.env import SDVRPEnv
+from rl4co.envs.routing.shpp.env import SHPPEnv
+from rl4co.envs.routing.shpp.generator import SHPPGenerator
 from rl4co.envs.routing.spctsp.env import SPCTSPEnv
 from rl4co.envs.routing.svrp.env import SVRPEnv
 from rl4co.envs.routing.svrp.generator import SVRPGenerator
 
@@ -0,0 +1,101 @@
+import torch
+
+from tensordict.tensordict import TensorDict
+
+from rl4co.envs.routing.cvrp.env import CVRPEnv
+from rl4co.utils.ops import gather_by_index
+from rl4co.utils.pylogger import get_pylogger
+
+log = get_pylogger(__name__)
+
+
+class CVRPMVCEnv(CVRPEnv):
+    """Capacitated Vehicle Routing Problem (CVRP) with maximum vehicle constraint environment."""
+
+    name = "cvrpmvc"
+
+    def _step(self, td: TensorDict) -> TensorDict:
+        vehicles_used = td["vehicles_used"] + (
+            (td["action"].unsqueeze(-1) == 0) & (td["current_node"] != 0)
+        )
+
+        current_node = td["action"][:, None]  # Add dimension for step
+        n_loc = td["demand"].size(-1)  # Excludes depot
+
+        # Not selected_demand is demand of first node (by clamp) so incorrect for nodes that visit depot!
+        selected_demand = gather_by_index(
+            td["demand"], torch.clamp(current_node - 1, 0, n_loc - 1), squeeze=False
+        )
+
+        # Increase capacity if depot is not visited, otherwise set to 0
+        used_capacity = (td["used_capacity"] + selected_demand) * (
+            current_node != 0
+        ).float()
+
+        demand_remaining = td["demand_remaining"] - selected_demand
+
+        # Note: here we do not subtract one as we have to scatter so the first column allows scattering depot
+        # Add one dimension since we write a single value
+        visited = td["visited"].scatter(-1, current_node, 1)
+
+        # SECTION: get done
+        done = visited.sum(-1) == visited.size(-1)
+        reward = torch.zeros_like(done)
+
+        td.update(
+            {
+                "current_node": current_node,
+                "used_capacity": used_capacity,
+                "vehicles_used": vehicles_used,
+                "demand_remaining": demand_remaining,
+                "visited": visited,
+                "reward": reward,
+                "done": done,
+            }
+        )
+        td.set("action_mask", self.get_action_mask(td))
+        return td
+
+    def _reset(
+        self, td: TensorDict | None = None, batch_size: list | None = None
+    ) -> TensorDict:
+        td = super()._reset(td, batch_size)
+        batch_size = batch_size or list(td.batch_size)
+        td.set(
+            "vehicles_used",
+            torch.ones((*batch_size, 1), dtype=torch.int, device=td.device),
+        )
+        td.set("demand_remaining", td["demand"].sum(-1, keepdim=True))
+        td.set(
+            "max_vehicle", torch.ceil(td["demand_remaining"] / td["vehicle_capacity"]) + 1
+        )
+        return td
+
+    @staticmethod
+    def get_action_mask(td: TensorDict) -> torch.Tensor:
+        # For demand steps_dim is inserted by indexing with id, for used_capacity insert node dim for broadcasting
+        exceeds_cap = td["demand"] + td["used_capacity"] > td["vehicle_capacity"]
+
+        # Nodes that cannot be visited are already visited or too much demand to be served now
+        mask_loc = td["visited"][..., 1:].to(exceeds_cap.dtype) | exceeds_cap
+
+        if "vehicles_used" in td.keys():
+            max_vehicle = td["max_vehicle"]
+            demand_remaining = td["demand_remaining"]
+            capacity_remaining = (max_vehicle - td["vehicles_used"]) * td[
+                "vehicle_capacity"
+            ]
+            mask_depot = (  # mask the depot
+                (td["current_node"] == 0)  # if the depot is just visited
+                | (
+                    demand_remaining > capacity_remaining
+                )  # or the unassigned vehicles' capacity can't sastify remaining demands
+            ) & ~torch.all(
+                mask_loc, dim=-1, keepdim=True
+            )  # unless there's no other choices
+        else:
+            # Cannot visit the depot if just visited and still unserved nodes
+            mask_depot = (td["current_node"] == 0) & ~torch.all(
+                mask_loc, dim=-1, keepdim=True
+            )
+        return ~torch.cat((mask_depot, mask_loc), -1)
@@ -0,0 +1,188 @@
+from typing import Optional
+
+import torch
+
+from tensordict.tensordict import TensorDict
+from torchrl.data import (
+    BoundedTensorSpec,
+    CompositeSpec,
+    UnboundedContinuousTensorSpec,
+    UnboundedDiscreteTensorSpec,
+)
+
+from rl4co.envs.common.base import RL4COEnvBase
+from rl4co.utils.ops import gather_by_index, get_tour_length
+from rl4co.utils.pylogger import get_pylogger
+
+from .generator import SHPPGenerator
+from .render import render
+
+log = get_pylogger(__name__)
+
+
+class SHPPEnv(RL4COEnvBase):
+    """
+    Shortest Hamiltonian Path Problem (SHPP)
+    SHPP is referred to the open-loop Traveling Salesman Problem (TSP) in the literature.
+    The goal of the SHPP is to find the shortest Hamiltonian path in a given graph with
+    given fixed starting/terminating nodes (they can be different nodes). A Hamiltonian
+    path visits all other nodes exactly once. At each step, the agent chooses a city to visit.
+    The reward is 0 unless the agent visits all the cities. In that case, the reward is
+    (-)length of the path: maximizing the reward is equivalent to minimizing the path length.
+
+    Observation:
+        - locations of each customer
+        - starting node and terminating node
+        - the current location of the vehicle
+
+    Constraints:
+        - the first node is the starting node
+        - the last node is the terminating node
+        - each node is visited exactly once
+
+    Finish condition:
+        - the agent has visited all the customers and reached the terminating node
+
+    Reward:
+        - (minus) the length of the path
+
+    Args:
+        generator: SHPPGenerator instance as the generator
+        generator_params: parameters for the generator
+    """
+
+    name = "shpp"
+
+    def __init__(
+        self,
+        generator: SHPPGenerator = None,
+        generator_params: dict = {},
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        if generator is None:
+            generator = SHPPGenerator(**generator_params)
+        self.generator = generator
+        self._make_spec(self.generator)
+
+    @staticmethod
+    def _step(td: TensorDict) -> TensorDict:
+        current_node = td["action"]
+        first_node = current_node if td["i"].all() == 0 else td["first_node"]
+
+        # Set not visited to 0 (i.e., we visited the node)
+        available = td["available"].scatter(
+            -1, current_node.unsqueeze(-1).expand_as(td["action_mask"]), 0
+        )
+
+        # If all other nodes are visited, the terminating node will be available
+        action_mask = available.clone()
+        action_mask[..., -1] = ~available[..., :-1].any(dim=-1)
+
+        # We are done there are no unvisited locations
+        done = torch.sum(available, dim=-1) == 0
+
+        # The reward is calculated outside via get_reward for efficiency, so we set it to 0 here
+        reward = torch.zeros_like(done)
+
+        td.update(
+            {
+                "first_node": first_node,
+                "current_node": current_node,
+                "i": td["i"] + 1,
+                "available": available,
+                "action_mask": action_mask,
+                "reward": reward,
+                "done": done,
+            },
+        )
+        return td
+
+    def _reset(self, td: Optional[TensorDict] = None, batch_size=None) -> TensorDict:
+        """Note: the first node is the starting node; the last node is the terminating node"""
+        device = td.device
+        locs = td["locs"]
+
+        # We do not enforce loading from self for flexibility
+        num_loc = locs.shape[-2]
+
+        # Other variables
+        current_node = torch.zeros((batch_size), dtype=torch.int64, device=device)
+        last_node = torch.full(
+            (batch_size), num_loc - 1, dtype=torch.int64, device=device
+        )
+        available = torch.ones(
+            (*batch_size, num_loc), dtype=torch.bool, device=device
+        )  # 1 means not visited, i.e. action is allowed
+        action_mask = torch.zeros((*batch_size, num_loc), dtype=torch.bool, device=device)
+        action_mask[..., 0] = 1  # Only the start point is availabe at the beginning
+        i = torch.zeros((*batch_size, 1), dtype=torch.int64, device=device)
+
+        return TensorDict(
+            {
+                "locs": locs,
+                "first_node": current_node,
+                "last_node": last_node,
+                "current_node": current_node,
+                "i": i,
+                "available": available,
+                "action_mask": action_mask,
+                "reward": torch.zeros((*batch_size, 1), dtype=torch.float32),
+            },
+            batch_size=batch_size,
+        )
+
+    def _get_reward(self, td, actions) -> TensorDict:
+        # Gather locations in order of tour and return distance between them (i.e., -reward)
+        locs_ordered = gather_by_index(td["locs"], actions)
+        return -get_tour_length(locs_ordered)
+
+    @staticmethod
+    def check_solution_validity(td: TensorDict, actions: torch.Tensor):
+        """Check that solution is valid: nodes are visited exactly once"""
+        assert (
+            torch.arange(actions.size(1), out=actions.data.new())
+            .view(1, -1)
+            .expand_as(actions)
+            == actions.data.sort(1)[0]
+        ).all(), "Invalid tour"
+
+    @staticmethod
+    def render(td: TensorDict, actions: torch.Tensor = None, ax=None):
+        return render(td, actions, ax)
+
+    def _make_spec(self, generator):
+        """Make the observation and action specs from the parameters"""
+        self.observation_spec = CompositeSpec(
+            locs=BoundedTensorSpec(
+                low=generator.min_loc,
+                high=generator.max_loc,
+                shape=(generator.num_loc, 2),
+                dtype=torch.float32,
+            ),
+            first_node=UnboundedDiscreteTensorSpec(
+                shape=(1),
+                dtype=torch.int64,
+            ),
+            current_node=UnboundedDiscreteTensorSpec(
+                shape=(1),
+                dtype=torch.int64,
+            ),
+            i=UnboundedDiscreteTensorSpec(
+                shape=(1),
+                dtype=torch.int64,
+            ),
+            action_mask=UnboundedDiscreteTensorSpec(
+                shape=(generator.num_loc),
+                dtype=torch.bool,
+            ),
+            shape=(),
+        )
+        self.action_spec = BoundedTensorSpec(
+            shape=(1,),
+            dtype=torch.int64,
+            low=0,
+            high=generator.num_loc,
+        )
+        self.reward_spec = UnboundedContinuousTensorSpec(shape=(1,))
+        self.done_spec = UnboundedDiscreteTensorSpec(shape=(1,), dtype=torch.bool)