diff --git a/configs/experiment/routing/deepaco.yaml b/configs/experiment/routing/deepaco.yaml index 65cb9632..5a5d370e 100644 --- a/configs/experiment/routing/deepaco.yaml +++ b/configs/experiment/routing/deepaco.yaml @@ -19,37 +19,51 @@ logger: name: deepaco-${env.name}${env.generator_params.num_loc} model: - batch_size: 512 - val_batch_size: 1000 - test_batch_size: 1000 - train_data_size: 128_000 - val_data_size: 10_000 - test_data_size: 10_000 + batch_size: 20 + val_batch_size: 20 + test_batch_size: 20 + train_data_size: 400 + val_data_size: 20 + test_data_size: 100 + optimizer: "AdamW" optimizer_kwargs: - lr: 1e-4 + lr: 1e-3 weight_decay: 0 lr_scheduler: "MultiStepLR" lr_scheduler_kwargs: - milestones: [25, 35] + milestones: [15, 35] gamma: 0.1 + train_with_local_search: True + ls_reward_aug_W: 0.99 + policy_kwargs: n_ants: - train: 50 - val: 20 - test: 20 + train: 30 + val: 30 + test: 100 n_iterations: train: 1 # unused value - val: 20 - test: 20 + val: 5 + test: 10 + temperature: 1.0 + top_p: 0.0 + top_k: 0 aco_kwargs: alpha: 1.0 beta: 1.0 decay: 0.95 - + use_local_search: True + use_nls: True + n_perturbations: 5 + local_search_params: + max_iterations: 1000 + perturbation_params: + max_iterations: 20 + k_sparse: 5 # this should be adjusted based on the `num_loc` value trainer: - max_epochs: 40 + max_epochs: 50 -seed: 1234 \ No newline at end of file +seed: 1234 diff --git a/configs/experiment/routing/gfacs.yaml b/configs/experiment/routing/gfacs.yaml new file mode 100644 index 00000000..eea73b7f --- /dev/null +++ b/configs/experiment/routing/gfacs.yaml @@ -0,0 +1,73 @@ +# @package _global_ + +defaults: + - override /model: gfacs.yaml + - override /env: tsp.yaml + - override /callbacks: default.yaml + - override /trainer: default.yaml + - override /logger: wandb.yaml + +env: + generator_params: + num_loc: 50 + +logger: + wandb: + project: "rl4co" + tags: ["gfacs", "${env.name}"] + group: ${env.name}${env.generator_params.num_loc} + name: gfacs-${env.name}${env.generator_params.num_loc} + +model: + batch_size: 20 + val_batch_size: 20 + test_batch_size: 20 + train_data_size: 400 + val_data_size: 20 + test_data_size: 100 + optimizer: "AdamW" + optimizer_kwargs: + lr: 1e-3 + weight_decay: 0 + lr_scheduler: + "MultiStepLR" + lr_scheduler_kwargs: + milestones: [15, 35] + gamma: 0.1 + + train_with_local_search: True + ls_reward_aug_W: 0.99 + + policy_kwargs: + n_ants: + train: 30 + val: 30 + test: 100 + n_iterations: + train: 1 # unused value + val: 5 + test: 10 + temperature: 1.0 + top_p: 0.0 + top_k: 0 + aco_kwargs: + alpha: 1.0 + beta: 1.0 + decay: 0.95 + use_local_search: True + use_nls: True + n_perturbations: 5 + local_search_params: + max_iterations: 1000 + perturbation_params: + max_iterations: 20 + k_sparse: 5 # this should be adjusted based on the `num_loc` value + + beta_min: 100 + beta_max: 500 + beta_flat_epochs: 5 + +trainer: + max_epochs: 50 + +seed: 1234 diff --git a/configs/model/deepaco.yaml b/configs/model/deepaco.yaml index 5a49bcf4..85ece64f 100644 --- a/configs/model/deepaco.yaml +++ b/configs/model/deepaco.yaml @@ -1,3 +1,3 @@ _target_: rl4co.models.DeepACO -baseline: "exponential" +baseline: "no" diff --git a/configs/model/gfacs.yaml b/configs/model/gfacs.yaml new file mode 100644 index 00000000..87924bbf --- /dev/null +++ b/configs/model/gfacs.yaml @@ -0,0 +1,3 @@ +_target_: rl4co.models.GFACS + +baseline: "no" diff --git a/pyproject.toml b/pyproject.toml index c7f275cb..b819cd4b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -80,6 +80,7 @@ dev = [ "pytest", "pytest-cov", ] + graph = ["torch_geometric"] routing = [ "numba>=0.58.1", diff --git a/rl4co/envs/routing/cvrp/HGS-CVRP/.gitignore b/rl4co/envs/routing/cvrp/HGS-CVRP/.gitignore new file mode 100644 index 00000000..9be99bb7 --- /dev/null +++ b/rl4co/envs/routing/cvrp/HGS-CVRP/.gitignore @@ -0,0 +1,55 @@ + +# Test build files +Test/CMakeFiles/ +Test/CMakeCache.txt + +# build files +build/ + +# IDE files +.vscode +.idea +cmake-build-* +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +[Xx]64/ +[Xx]86/ +*.user +.vs/ + + +# Prerequisites +*.d + +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Fortran module files +*.mod +*.smod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app + diff --git a/rl4co/envs/routing/cvrp/HGS-CVRP/CMakeLists.txt b/rl4co/envs/routing/cvrp/HGS-CVRP/CMakeLists.txt new file mode 100644 index 00000000..a83f6c6d --- /dev/null +++ b/rl4co/envs/routing/cvrp/HGS-CVRP/CMakeLists.txt @@ -0,0 +1,41 @@ +cmake_minimum_required(VERSION 3.15) +project(HGS_CVRP) +set(CMAKE_CXX_STANDARD 17) + +set( + src_files + Program/Genetic.cpp + Program/Individual.cpp + Program/LocalSearch.cpp + Program/Params.cpp + Program/Population.cpp + Program/Split.cpp + Program/InstanceCVRPLIB.cpp + Program/AlgorithmParameters.cpp + Program/C_Interface.cpp) + +if (MSVC) + set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) +endif (MSVC) + +include_directories(Program) + +# Build Executable +add_executable(bin + Program/main.cpp + ${src_files}) + +set_target_properties(bin PROPERTIES OUTPUT_NAME hgs) + +# Build Library +add_library(lib SHARED ${src_files}) +set_target_properties(lib PROPERTIES OUTPUT_NAME hgscvrp) + + +# Install +install(TARGETS lib + DESTINATION lib) +install(TARGETS bin + DESTINATION bin) +install(FILES Program/AlgorithmParameters.h Program/C_Interface.h + DESTINATION include) \ No newline at end of file diff --git a/rl4co/envs/routing/cvrp/HGS-CVRP/LICENSE b/rl4co/envs/routing/cvrp/HGS-CVRP/LICENSE new file mode 100644 index 00000000..23605460 --- /dev/null +++ b/rl4co/envs/routing/cvrp/HGS-CVRP/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 Thibaut Vidal + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/rl4co/envs/routing/cvrp/HGS-CVRP/Program/AlgorithmParameters.cpp b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/AlgorithmParameters.cpp new file mode 100644 index 00000000..9dc5a067 --- /dev/null +++ b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/AlgorithmParameters.cpp @@ -0,0 +1,50 @@ +// +// Created by chkwon on 3/23/22. +// + +#include "AlgorithmParameters.h" +#include + +extern "C" +struct AlgorithmParameters default_algorithm_parameters() { + struct AlgorithmParameters ap{}; + + ap.nbGranular = 20; + ap.mu = 25; + ap.lambda = 40; + ap.nbElite = 4; + ap.nbClose = 5; + + ap.nbIterPenaltyManagement = 100; + ap.targetFeasible = 0.2; + ap.penaltyDecrease = 0.85; + ap.penaltyIncrease = 1.2; + + ap.seed = 0; + ap.nbIter = 20000; + ap.nbIterTraces = 500; + ap.timeLimit = 0; + ap.useSwapStar = 1; + + return ap; +} + +void print_algorithm_parameters(const AlgorithmParameters & ap) +{ + std::cout << "=========== Algorithm Parameters =================" << std::endl; + std::cout << "---- nbGranular is set to " << ap.nbGranular << std::endl; + std::cout << "---- mu is set to " << ap.mu << std::endl; + std::cout << "---- lambda is set to " << ap.lambda << std::endl; + std::cout << "---- nbElite is set to " << ap.nbElite << std::endl; + std::cout << "---- nbClose is set to " << ap.nbClose << std::endl; + std::cout << "---- nbIterPenaltyManagement is set to " << ap.nbIterPenaltyManagement << std::endl; + std::cout << "---- targetFeasible is set to " << ap.targetFeasible << std::endl; + std::cout << "---- penaltyDecrease is set to " << ap.penaltyDecrease << std::endl; + std::cout << "---- penaltyIncrease is set to " << ap.penaltyIncrease << std::endl; + std::cout << "---- seed is set to " << ap.seed << std::endl; + std::cout << "---- nbIter is set to " << ap.nbIter << std::endl; + std::cout << "---- nbIterTraces is set to " << ap.nbIterTraces << std::endl; + std::cout << "---- timeLimit is set to " << ap.timeLimit << std::endl; + std::cout << "---- useSwapStar is set to " << ap.useSwapStar << std::endl; + std::cout << "==================================================" << std::endl; +} diff --git a/rl4co/envs/routing/cvrp/HGS-CVRP/Program/AlgorithmParameters.h b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/AlgorithmParameters.h new file mode 100644 index 00000000..707256a0 --- /dev/null +++ b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/AlgorithmParameters.h @@ -0,0 +1,38 @@ +// +// Created by chkwon on 3/23/22. +// + +// This header file must be readable in C. + +#ifndef ALGORITHMPARAMETERS_H +#define ALGORITHMPARAMETERS_H + +struct AlgorithmParameters { + int nbGranular; // Granular search parameter, limits the number of moves in the RI local search + int mu; // Minimum population size + int lambda; // Number of solutions created before reaching the maximum population size (i.e., generation size) + int nbElite; // Number of elite individuals + int nbClose; // Number of closest solutions/individuals considered when calculating diversity contribution + + int nbIterPenaltyManagement; // Number of iterations between penalty updates + double targetFeasible; // Reference proportion for the number of feasible individuals, used for the adaptation of the penalty parameters + double penaltyDecrease; // Multiplier used to decrease penalty parameters if there are sufficient feasible individuals + double penaltyIncrease; // Multiplier used to increase penalty parameters if there are insufficient feasible individuals + + int seed; // Random seed. Default value: 0 + int nbIter; // Nb iterations without improvement until termination (or restart if a time limit is specified). Default value: 20,000 iterations + int nbIterTraces; // Number of iterations between traces display during HGS execution + double timeLimit; // CPU time limit until termination in seconds. Default value: 0 (i.e., inactive) + int useSwapStar; // Use SWAP* local search or not. Default value: 1. Only available when coordinates are provided. +}; + +#ifdef __cplusplus +extern "C" +#endif +struct AlgorithmParameters default_algorithm_parameters(); + +#ifdef __cplusplus +void print_algorithm_parameters(const AlgorithmParameters & ap); +#endif + +#endif //ALGORITHMPARAMETERS_H diff --git a/rl4co/envs/routing/cvrp/HGS-CVRP/Program/C_Interface.cpp b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/C_Interface.cpp new file mode 100644 index 00000000..5afc7c46 --- /dev/null +++ b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/C_Interface.cpp @@ -0,0 +1,181 @@ +// +// Created by chkwon on 3/23/22. +// + +#include "C_Interface.h" +#include "Population.h" +#include "Params.h" +#include "Genetic.h" +#include +#include +#include +#include + +Solution *prepare_solution(Population &population, Params ¶ms) +{ + // Preparing the best solution + Solution *sol = new Solution; + sol->time = (double)(clock() - params.startTime) / (double)CLOCKS_PER_SEC; + + if (population.getBestFound() != nullptr) { + // Best individual + auto best = population.getBestFound(); + + // setting the cost + sol->cost = best->eval.penalizedCost; + + // finding out the number of routes in the best individual + int n_routes = 0; + for (int k = 0; k < params.nbVehicles; k++) + if (!best->chromR[k].empty()) ++n_routes; + + // filling out the route information + sol->n_routes = n_routes; + sol->routes = new SolutionRoute[n_routes]; + for (int k = 0; k < n_routes; k++) { + sol->routes[k].length = (int)best->chromR[k].size(); + sol->routes[k].path = new int[sol->routes[k].length]; + std::copy(best->chromR[k].begin(), best->chromR[k].end(), sol->routes[k].path); + } + } + else { + sol->cost = 0.0; + sol->n_routes = 0; + sol->routes = nullptr; + } + return sol; +} + + +extern "C" Solution *solve_cvrp( + int n, double *x, double *y, double *serv_time, double *dem, + double vehicleCapacity, double durationLimit, char isRoundingInteger, char isDurationConstraint, + int max_nbVeh, const AlgorithmParameters *ap, char verbose) +{ + Solution *result; + + try { + std::vector x_coords(x, x + n); + std::vector y_coords(y, y + n); + std::vector service_time(serv_time, serv_time + n); + std::vector demands(dem, dem + n); + + std::vector > distance_matrix(n, std::vector(n)); + for (int i = 0; i < n; i++) + { + for (int j = 0; j < n; j++) + { + distance_matrix[i][j] = std::sqrt( + (x_coords[i] - x_coords[j])*(x_coords[i] - x_coords[j]) + + (y_coords[i] - y_coords[j])*(y_coords[i] - y_coords[j]) + ); + if (isRoundingInteger) + distance_matrix[i][j] = std::round(distance_matrix[i][j]); + } + } + + Params params(x_coords,y_coords,distance_matrix,service_time,demands,vehicleCapacity,durationLimit,max_nbVeh,isDurationConstraint,verbose,*ap); + + // Running HGS and returning the result + Genetic solver(params); + solver.run(); + result = prepare_solution(solver.population, params); + } + catch (const std::string &e) { std::cout << "EXCEPTION | " << e << std::endl; } + catch (const std::exception &e) { std::cout << "EXCEPTION | " << e.what() << std::endl; } + + return result; +} + +extern "C" Solution *solve_cvrp_dist_mtx( + int n, double *x, double *y, double *dist_mtx, double *serv_time, double *dem, + double vehicleCapacity, double durationLimit, char isDurationConstraint, + int max_nbVeh, const AlgorithmParameters *ap, char verbose) +{ + Solution *result; + std::vector x_coords; + std::vector y_coords; + + try { + if (x != nullptr && y != nullptr) { + x_coords = {x, x + n}; + y_coords = {y, y + n}; + } + + std::vector service_time(serv_time, serv_time + n); + std::vector demands(dem, dem + n); + + std::vector > distance_matrix(n, std::vector(n)); + for (int i = 0; i < n; i++) { // row + for (int j = 0; j < n; j++) { // column + distance_matrix[i][j] = dist_mtx[n * i + j]; + } + } + + Params params(x_coords,y_coords,distance_matrix,service_time,demands,vehicleCapacity,durationLimit,max_nbVeh,isDurationConstraint,verbose,*ap); + + // Running HGS and returning the result + Genetic solver(params); + solver.run(); + result = prepare_solution(solver.population, params); + } + catch (const std::string &e) { std::cout << "EXCEPTION | " << e << std::endl; } + catch (const std::exception &e) { std::cout << "EXCEPTION | " << e.what() << std::endl; } + + return result; +} + +extern "C" int local_search( + int n, double *x, double *y, double *dist_mtx, double *serv_time, double *dem, + double vehicleCapacity, double durationLimit, char isDurationConstraint, + int max_nbVeh, const AlgorithmParameters *ap, char verbose, int callid, int count) +{ + Solution *result; + std::vector x_coords; + std::vector y_coords; + + try { + if (x != nullptr && y != nullptr) { + x_coords = {x, x + n}; + y_coords = {y, y + n}; + } + + std::vector service_time(serv_time, serv_time + n); + std::vector demands(dem, dem + n); + + std::vector > distance_matrix(n, std::vector(n)); + for (int i = 0; i < n; i++) { // row + for (int j = 0; j < n; j++) { // column + distance_matrix[i][j] = dist_mtx[n * i + j]; + } + } + + Params params(x_coords,y_coords,distance_matrix,service_time,demands,vehicleCapacity,durationLimit,max_nbVeh,isDurationConstraint,verbose,*ap); + + char buff[100] = {}; + snprintf(buff, sizeof(buff), "/tmp/route-%i", callid); + std::string path = buff; + + Individual individual(params, path); + LocalSearch solver(params); + solver.run(individual, params.penaltyCapacity*10., params.penaltyDuration*10., count); + + char buff2[100] = {}; + snprintf(buff2, sizeof(buff2), "/tmp/swapstar-result-%i", callid); + std::string returnpath = buff2; + individual.exportCVRPLibFormat(returnpath); + } + catch (const std::string &e) { std::cout << "EXCEPTION | " << e << std::endl; } + catch (const std::exception &e) { std::cout << "EXCEPTION | " << e.what() << std::endl; } + + return 1; +} + +extern "C" void delete_solution(Solution *sol) +{ + for (int i = 0; i < sol->n_routes; ++i) + delete[] sol->routes[i].path; + + delete[] sol->routes; + delete sol; +} \ No newline at end of file diff --git a/rl4co/envs/routing/cvrp/HGS-CVRP/Program/C_Interface.h b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/C_Interface.h new file mode 100644 index 00000000..ac26cb10 --- /dev/null +++ b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/C_Interface.h @@ -0,0 +1,53 @@ +// +// Created by chkwon on 3/23/22. +// + +#ifndef C_INTERFACE_H +#define C_INTERFACE_H +#include "AlgorithmParameters.h" + +struct SolutionRoute +{ + int length; + int * path; +}; + +struct Solution +{ + double cost; + double time; + int n_routes; + struct SolutionRoute * routes; +}; + +#ifdef __cplusplus +extern "C" +#endif +struct Solution * solve_cvrp( + int n, double* x, double* y, double* serv_time, double* dem, + double vehicleCapacity, double durationLimit, char isRoundingInteger, char isDurationConstraint, + int max_nbVeh, const struct AlgorithmParameters* ap, char verbose); + +#ifdef __cplusplus +extern "C" +#endif +struct Solution *solve_cvrp_dist_mtx( + int n, double* x, double* y, double *dist_mtx, double *serv_time, double *dem, + double vehicleCapacity, double durationLimit, char isDurationConstraint, + int max_nbVeh, const struct AlgorithmParameters *ap, char verbose); + +#ifdef __cplusplus +extern "C" +#endif +int local_search( + int n, double* x, double* y, double *dist_mtx, double *serv_time, double *dem, + double vehicleCapacity, double durationLimit, char isDurationConstraint, + int max_nbVeh, const struct AlgorithmParameters *ap, char verbose, int callid, int count); + +#ifdef __cplusplus +extern "C" +#endif +void delete_solution(struct Solution * sol); + + +#endif //C_INTERFACE_H diff --git a/rl4co/envs/routing/cvrp/HGS-CVRP/Program/CircleSector.h b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/CircleSector.h new file mode 100644 index 00000000..11853426 --- /dev/null +++ b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/CircleSector.h @@ -0,0 +1,54 @@ +#ifndef CIRCLESECTOR_H +#define CIRCLESECTOR_H + +// Simple data structure to represent circle sectors +// Angles are measured in [0,65535] instead of [0,359], in such a way that modulo operations are much faster (since 2^16 = 65536) +// Credit to Fabian Giesen at "https://web.archive.org/web/20200912191950/https://fgiesen.wordpress.com/2015/09/24/intervals-in-modular-arithmetic/" for useful implementation tips regarding interval overlaps in modular arithmetics +struct CircleSector +{ + int start; + int end; + + // Positive modulo 65536 + static int positive_mod(int i) + { + // 1) Using the formula positive_mod(n,x) = (n % x + x) % x + // 2) Moreover, remark that "n % 65536" should be automatically compiled in an optimized form as "n & 0xffff" for faster calculations + return (i % 65536 + 65536) % 65536; + } + + // Initialize a circle sector from a single point + void initialize(int point) + { + start = point; + end = point; + } + + // Tests if a point is enclosed in the circle sector + bool isEnclosed(int point) + { + return (positive_mod(point - start) <= positive_mod(end - start)); + } + + // Tests overlap of two circle sectors + static bool overlap(const CircleSector & sector1, const CircleSector & sector2) + { + return ((positive_mod(sector2.start - sector1.start) <= positive_mod(sector1.end - sector1.start)) + || (positive_mod(sector1.start - sector2.start) <= positive_mod(sector2.end - sector2.start))); + } + + // Extends the circle sector to include an additional point + // Done in a "greedy" way, such that the resulting circle sector is the smallest + void extend(int point) + { + if (!isEnclosed(point)) + { + if (positive_mod(point - end) <= positive_mod(start - point)) + end = point; + else + start = point; + } + } +}; + +#endif diff --git a/rl4co/envs/routing/cvrp/HGS-CVRP/Program/Genetic.cpp b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/Genetic.cpp new file mode 100644 index 00000000..60352231 --- /dev/null +++ b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/Genetic.cpp @@ -0,0 +1,86 @@ +#include "Genetic.h" + +void Genetic::run() +{ + /* INITIAL POPULATION */ + population.generatePopulation(); + + int nbIter; + int nbIterNonProd = 1; + if (params.verbose) std::cout << "----- STARTING GENETIC ALGORITHM" << std::endl; + for (nbIter = 0 ; nbIterNonProd <= params.ap.nbIter && (params.ap.timeLimit == 0 || (double)(clock()-params.startTime)/(double)CLOCKS_PER_SEC < params.ap.timeLimit) ; nbIter++) + { + /* SELECTION AND CROSSOVER */ + crossoverOX(offspring, population.getBinaryTournament(),population.getBinaryTournament()); + + /* LOCAL SEARCH */ + localSearch.run(offspring, params.penaltyCapacity, params.penaltyDuration); + bool isNewBest = population.addIndividual(offspring,true); + if (!offspring.eval.isFeasible && params.ran()%2 == 0) // Repair half of the solutions in case of infeasibility + { + localSearch.run(offspring, params.penaltyCapacity*10., params.penaltyDuration*10.); + if (offspring.eval.isFeasible) isNewBest = (population.addIndividual(offspring,false) || isNewBest); + } + + /* TRACKING THE NUMBER OF ITERATIONS SINCE LAST SOLUTION IMPROVEMENT */ + if (isNewBest) nbIterNonProd = 1; + else nbIterNonProd ++ ; + + /* DIVERSIFICATION, PENALTY MANAGEMENT AND TRACES */ + if (nbIter % params.ap.nbIterPenaltyManagement == 0) population.managePenalties(); + if (nbIter % params.ap.nbIterTraces == 0) population.printState(nbIter, nbIterNonProd); + + /* FOR TESTS INVOLVING SUCCESSIVE RUNS UNTIL A TIME LIMIT: WE RESET THE ALGORITHM/POPULATION EACH TIME maxIterNonProd IS ATTAINED*/ + if (params.ap.timeLimit != 0 && nbIterNonProd == params.ap.nbIter) + { + population.restart(); + nbIterNonProd = 1; + } + } + if (params.verbose) std::cout << "----- GENETIC ALGORITHM FINISHED AFTER " << nbIter << " ITERATIONS. TIME SPENT: " << (double)(clock() - params.startTime) / (double)CLOCKS_PER_SEC << std::endl; +} + +void Genetic::crossoverOX(Individual & result, const Individual & parent1, const Individual & parent2) +{ + // Frequency table to track the customers which have been already inserted + std::vector freqClient = std::vector (params.nbClients + 1, false); + + // Picking the beginning and end of the crossover zone + std::uniform_int_distribution<> distr(0, params.nbClients-1); + int start = distr(params.ran); + int end = distr(params.ran); + + // Avoid that start and end coincide by accident + while (end == start) end = distr(params.ran); + + // Copy from start to end + int j = start; + while (j % params.nbClients != (end + 1) % params.nbClients) + { + result.chromT[j % params.nbClients] = parent1.chromT[j % params.nbClients]; + freqClient[result.chromT[j % params.nbClients]] = true; + j++; + } + + // Fill the remaining elements in the order given by the second parent + for (int i = 1; i <= params.nbClients; i++) + { + int temp = parent2.chromT[(end + i) % params.nbClients]; + if (freqClient[temp] == false) + { + result.chromT[j % params.nbClients] = temp; + j++; + } + } + + // Complete the individual with the Split algorithm + split.generalSplit(result, parent1.eval.nbRoutes); +} + +Genetic::Genetic(Params & params) : + params(params), + split(params), + localSearch(params), + population(params,this->split,this->localSearch), + offspring(params){} + diff --git a/rl4co/envs/routing/cvrp/HGS-CVRP/Program/Genetic.h b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/Genetic.h new file mode 100644 index 00000000..1e6353b4 --- /dev/null +++ b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/Genetic.h @@ -0,0 +1,49 @@ +/*MIT License + +Copyright(c) 2020 Thibaut Vidal + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files(the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions : + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE.*/ + +#ifndef GENETIC_H +#define GENETIC_H + +#include "Population.h" +#include "Individual.h" + +class Genetic +{ +public: + + Params & params; // Problem parameters + Split split; // Split algorithm + LocalSearch localSearch; // Local Search structure + Population population; // Population (public for now to give access to the solutions, but should be be improved later on) + Individual offspring; // First individual to be used as input for the crossover + + // OX Crossover + void crossoverOX(Individual & result, const Individual & parent1, const Individual & parent2); + + // Running the genetic algorithm until maxIterNonProd consecutive iterations or a time limit + void run() ; + + // Constructor + Genetic(Params & params); +}; + +#endif diff --git a/rl4co/envs/routing/cvrp/HGS-CVRP/Program/Individual.cpp b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/Individual.cpp new file mode 100644 index 00000000..ff4d1fb5 --- /dev/null +++ b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/Individual.cpp @@ -0,0 +1,102 @@ +#include "Individual.h" + +void Individual::evaluateCompleteCost(const Params & params) +{ + eval = EvalIndiv(); + for (int r = 0; r < params.nbVehicles; r++) + { + if (!chromR[r].empty()) + { + double distance = params.timeCost[0][chromR[r][0]]; + double load = params.cli[chromR[r][0]].demand; + double service = params.cli[chromR[r][0]].serviceDuration; + predecessors[chromR[r][0]] = 0; + for (int i = 1; i < (int)chromR[r].size(); i++) + { + distance += params.timeCost[chromR[r][i-1]][chromR[r][i]]; + load += params.cli[chromR[r][i]].demand; + service += params.cli[chromR[r][i]].serviceDuration; + predecessors[chromR[r][i]] = chromR[r][i-1]; + successors[chromR[r][i-1]] = chromR[r][i]; + } + successors[chromR[r][chromR[r].size()-1]] = 0; + distance += params.timeCost[chromR[r][chromR[r].size()-1]][0]; + eval.distance += distance; + eval.nbRoutes++; + if (load > params.vehicleCapacity) eval.capacityExcess += load - params.vehicleCapacity; + if (distance + service > params.durationLimit) eval.durationExcess += distance + service - params.durationLimit; + } + } + + eval.penalizedCost = eval.distance + eval.capacityExcess*params.penaltyCapacity + eval.durationExcess*params.penaltyDuration; + eval.isFeasible = (eval.capacityExcess < MY_EPSILON && eval.durationExcess < MY_EPSILON); +} + +Individual::Individual(Params & params) +{ + successors = std::vector (params.nbClients + 1); + predecessors = std::vector (params.nbClients + 1); + chromR = std::vector < std::vector >(params.nbVehicles); + chromT = std::vector (params.nbClients); + for (int i = 0; i < params.nbClients; i++) chromT[i] = i + 1; + std::shuffle(chromT.begin(), chromT.end(), params.ran); + eval.penalizedCost = 1.e30; +} + +Individual::Individual(Params & params, std::string fileName) : Individual(params) +{ + double readCost; + chromT.clear(); + std::ifstream inputFile(fileName); + if (inputFile.is_open()) + { + std::string inputString; + inputFile >> inputString; + // Loops in the input file as long as the first line keyword is "Route" + for (int r = 0; inputString == "Route"; r++) + { + inputFile >> inputString; + getline(inputFile, inputString); + std::stringstream ss(inputString); + int inputCustomer; + while (ss >> inputCustomer) // Loops as long as there is an integer to read in this route + { + chromT.push_back(inputCustomer); + chromR[r].push_back(inputCustomer); + } + inputFile >> inputString; + } + // if (inputString == "Cost") inputFile >> readCost; + // else throw std::string("Unexpected token in input solution"); + + // Some safety checks and printouts + evaluateCompleteCost(params); + if ((int)chromT.size() != params.nbClients) throw std::string("Input solution does not contain the correct number of clients"); + if (!eval.isFeasible) throw std::string("Input solution is infeasible"); + // if (eval.penalizedCost != readCost)throw std::string("Input solution has a different cost than announced in the file"); + if (params.verbose) std::cout << "----- INPUT SOLUTION HAS BEEN SUCCESSFULLY READ WITH COST " // << eval.penalizedCost + << std::endl; + } + else + throw std::string("Impossible to open solution file provided in input in : " + fileName); +} + + +void Individual::exportCVRPLibFormat(std::string fileName) +{ + std::ofstream myfile(fileName); + if (myfile.is_open()) + { + for (int k = 0; k < (int)chromR.size(); k++) + { + if (!chromR[k].empty()) + { + myfile << "Route #" << k + 1 << ":"; // Route IDs start at 1 in the file format + for (int i : chromR[k]) myfile << " " << i; + myfile << std::endl; + } + } + myfile << "Cost " << eval.penalizedCost << std::endl; + } + else std::cout << "----- IMPOSSIBLE TO OPEN: " << fileName << std::endl; +} diff --git a/rl4co/envs/routing/cvrp/HGS-CVRP/Program/Individual.h b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/Individual.h new file mode 100644 index 00000000..aa0c32df --- /dev/null +++ b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/Individual.h @@ -0,0 +1,61 @@ +/*MIT License + +Copyright(c) 2020 Thibaut Vidal + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files(the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions : + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE.*/ + +#ifndef INDIVIDUAL_H +#define INDIVIDUAL_H + +#include "Params.h" + +struct EvalIndiv +{ + double penalizedCost = 0.; // Penalized cost of the solution + int nbRoutes = 0; // Number of routes + double distance = 0.; // Total distance + double capacityExcess = 0.; // Sum of excess load in all routes + double durationExcess = 0.; // Sum of excess duration in all routes + bool isFeasible = false; // Feasibility status of the individual +}; + +class Individual +{ +public: + + EvalIndiv eval; // Solution cost parameters + std::vector < int > chromT ; // Giant tour representing the individual + std::vector < std::vector > chromR ; // For each vehicle, the associated sequence of deliveries (complete solution) + std::vector < int > successors ; // For each node, the successor in the solution (can be the depot 0) + std::vector < int > predecessors ; // For each node, the predecessor in the solution (can be the depot 0) + std::multiset < std::pair < double, Individual* > > indivsPerProximity ; // The other individuals in the population, ordered by increasing proximity (the set container follows a natural ordering based on the first value of the pair) + double biasedFitness; // Biased fitness of the solution + + // Measuring cost and feasibility of an Individual from the information of chromR (needs chromR filled and access to Params) + void evaluateCompleteCost(const Params & params); + + void exportCVRPLibFormat(std::string fileName); + + // Constructor of a random individual containing only a giant tour with a shuffled visit order + Individual(Params & params); + + // Constructor of an individual from a file in CVRPLib solution format as produced by the algorithm (useful if a user wishes to input an initial solution) + Individual(Params & params, std::string fileName); +}; +#endif diff --git a/rl4co/envs/routing/cvrp/HGS-CVRP/Program/InstanceCVRPLIB.cpp b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/InstanceCVRPLIB.cpp new file mode 100644 index 00000000..e1cd3ca0 --- /dev/null +++ b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/InstanceCVRPLIB.cpp @@ -0,0 +1,84 @@ +// +// Created by chkwon on 3/22/22. +// + +#include +#include +#include "InstanceCVRPLIB.h" + +InstanceCVRPLIB::InstanceCVRPLIB(std::string pathToInstance, bool isRoundingInteger = true) +{ + std::string content, content2, content3; + double serviceTimeData = 0.; + + // Read INPUT dataset + std::ifstream inputFile(pathToInstance); + if (inputFile.is_open()) + { + getline(inputFile, content); + getline(inputFile, content); + getline(inputFile, content); + for (inputFile >> content ; content != "NODE_COORD_SECTION" ; inputFile >> content) + { + if (content == "DIMENSION") { inputFile >> content2 >> nbClients; nbClients--; } // Need to substract the depot from the number of nodes + else if (content == "EDGE_WEIGHT_TYPE") inputFile >> content2 >> content3; + else if (content == "CAPACITY") inputFile >> content2 >> vehicleCapacity; + else if (content == "DISTANCE") { inputFile >> content2 >> durationLimit; isDurationConstraint = true; } + else if (content == "SERVICE_TIME") inputFile >> content2 >> serviceTimeData; + else throw std::string("Unexpected data in input file: " + content); + } + if (nbClients <= 0) throw std::string("Number of nodes is undefined"); + if (vehicleCapacity == 1.e30) throw std::string("Vehicle capacity is undefined"); + + x_coords = std::vector(nbClients + 1); + y_coords = std::vector(nbClients + 1); + demands = std::vector(nbClients + 1); + service_time = std::vector(nbClients + 1); + + // Reading node coordinates + // depot must be the first element + // - i = 0 in the for-loop below, or + // - node_number = 1 in the .vrp file + // customers are + // - i = 1, 2, ..., nbClients in the for-loop below, or + // - node_number = 2, 3, ..., nb_Clients in the .vrp file + int node_number; + for (int i = 0; i <= nbClients; i++) + { + inputFile >> node_number >> x_coords[i] >> y_coords[i]; + if (node_number != i + 1) throw std::string("The node numbering is not in order."); + } + + // Reading demand information + inputFile >> content; + if (content != "DEMAND_SECTION") throw std::string("Unexpected data in input file: " + content); + for (int i = 0; i <= nbClients; i++) + { + inputFile >> content >> demands[i]; + service_time[i] = (i == 0) ? 0. : serviceTimeData ; + } + + // Calculating 2D Euclidean Distance + dist_mtx = std::vector < std::vector< double > >(nbClients + 1, std::vector (nbClients + 1)); + for (int i = 0; i <= nbClients; i++) + { + for (int j = 0; j <= nbClients; j++) + { + dist_mtx[i][j] = std::sqrt( + (x_coords[i] - x_coords[j]) * (x_coords[i] - x_coords[j]) + + (y_coords[i] - y_coords[j]) * (y_coords[i] - y_coords[j]) + ); + + if (isRoundingInteger) dist_mtx[i][j] = round(dist_mtx[i][j]); + } + } + + // Reading depot information (in all current instances the depot is represented as node 1, the program will return an error otherwise) + inputFile >> content >> content2 >> content3 >> content3; + if (content != "DEPOT_SECTION") throw std::string("Unexpected data in input file: " + content); + if (content2 != "1") throw std::string("Expected depot index 1 instead of " + content2); + if (content3 != "EOF") throw std::string("Unexpected data in input file: " + content3); + } + else + throw std::string("Impossible to open instance file: " + pathToInstance); +} diff --git a/rl4co/envs/routing/cvrp/HGS-CVRP/Program/InstanceCVRPLIB.h b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/InstanceCVRPLIB.h new file mode 100644 index 00000000..509aa8bb --- /dev/null +++ b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/InstanceCVRPLIB.h @@ -0,0 +1,27 @@ +// +// Created by chkwon on 3/22/22. +// + +#ifndef INSTANCECVRPLIB_H +#define INSTANCECVRPLIB_H +#include +#include + +class InstanceCVRPLIB +{ +public: + std::vector x_coords; + std::vector y_coords; + std::vector< std::vector > dist_mtx; + std::vector service_time; + std::vector demands; + double durationLimit = 1.e30; // Route duration limit + double vehicleCapacity = 1.e30; // Capacity limit + bool isDurationConstraint = false; // Indicates if the problem includes duration constraints + int nbClients ; // Number of clients (excluding the depot) + + InstanceCVRPLIB(std::string pathToInstance, bool isRoundingInteger); +}; + + +#endif //INSTANCECVRPLIB_H diff --git a/rl4co/envs/routing/cvrp/HGS-CVRP/Program/LocalSearch.cpp b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/LocalSearch.cpp new file mode 100644 index 00000000..0fb666a3 --- /dev/null +++ b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/LocalSearch.cpp @@ -0,0 +1,813 @@ +#include "LocalSearch.h" + +void LocalSearch::run(Individual & indiv, double penaltyCapacityLS, double penaltyDurationLS, int count) +{ + this->penaltyCapacityLS = penaltyCapacityLS; + this->penaltyDurationLS = penaltyDurationLS; + loadIndividual(indiv); + + // Shuffling the order of the nodes explored by the LS to allow for more diversity in the search + std::shuffle(orderNodes.begin(), orderNodes.end(), params.ran); + std::shuffle(orderRoutes.begin(), orderRoutes.end(), params.ran); + for (int i = 1; i <= params.nbClients; i++) + if (params.ran() % params.ap.nbGranular == 0) // O(n/nbGranular) calls to the inner function on average, to achieve linear-time complexity overall + std::shuffle(params.correlatedVertices[i].begin(), params.correlatedVertices[i].end(), params.ran); + + searchCompleted = false; + for (loopID = 0; !searchCompleted && loopID<=count; loopID++) + { + if (loopID > 1) // Allows at least two loops since some moves involving empty routes are not checked at the first loop + searchCompleted = true; + + /* CLASSICAL ROUTE IMPROVEMENT (RI) MOVES SUBJECT TO A PROXIMITY RESTRICTION */ + for (int posU = 0; posU < params.nbClients; posU++) + { + nodeU = &clients[orderNodes[posU]]; + int lastTestRINodeU = nodeU->whenLastTestedRI; + nodeU->whenLastTestedRI = nbMoves; + for (int posV = 0; posV < (int)params.correlatedVertices[nodeU->cour].size(); posV++) + { + nodeV = &clients[params.correlatedVertices[nodeU->cour][posV]]; + if (loopID == 0 || std::max(nodeU->route->whenLastModified, nodeV->route->whenLastModified) > lastTestRINodeU) // only evaluate moves involving routes that have been modified since last move evaluations for nodeU + { + // Randomizing the order of the neighborhoods within this loop does not matter much as we are already randomizing the order of the node pairs (and it's not very common to find improving moves of different types for the same node pair) + setLocalVariablesRouteU(); + setLocalVariablesRouteV(); + if (move1()) continue; // RELOCATE + if (move2()) continue; // RELOCATE + if (move3()) continue; // RELOCATE + if (nodeUIndex <= nodeVIndex && move4()) continue; // SWAP + if (move5()) continue; // SWAP + if (nodeUIndex <= nodeVIndex && move6()) continue; // SWAP + if (intraRouteMove && move7()) continue; // 2-OPT + if (!intraRouteMove && move8()) continue; // 2-OPT* + if (!intraRouteMove && move9()) continue; // 2-OPT* + + // Trying moves that insert nodeU directly after the depot + if (nodeV->prev->isDepot) + { + nodeV = nodeV->prev; + setLocalVariablesRouteV(); + if (move1()) continue; // RELOCATE + if (move2()) continue; // RELOCATE + if (move3()) continue; // RELOCATE + if (!intraRouteMove && move8()) continue; // 2-OPT* + if (!intraRouteMove && move9()) continue; // 2-OPT* + } + } + } + + /* MOVES INVOLVING AN EMPTY ROUTE -- NOT TESTED IN THE FIRST LOOP TO AVOID INCREASING TOO MUCH THE FLEET SIZE */ + if (loopID > 0 && !emptyRoutes.empty()) + { + nodeV = routes[*emptyRoutes.begin()].depot; + setLocalVariablesRouteU(); + setLocalVariablesRouteV(); + if (move1()) continue; // RELOCATE + if (move2()) continue; // RELOCATE + if (move3()) continue; // RELOCATE + if (move9()) continue; // 2-OPT* + } + } + + if (params.ap.useSwapStar == 1 && params.areCoordinatesProvided) + { + /* (SWAP*) MOVES LIMITED TO ROUTE PAIRS WHOSE CIRCLE SECTORS OVERLAP */ + for (int rU = 0; rU < params.nbVehicles; rU++) + { + routeU = &routes[orderRoutes[rU]]; + int lastTestSWAPStarRouteU = routeU->whenLastTestedSWAPStar; + routeU->whenLastTestedSWAPStar = nbMoves; + for (int rV = 0; rV < params.nbVehicles; rV++) + { + routeV = &routes[orderRoutes[rV]]; + if (routeU->nbCustomers > 0 && routeV->nbCustomers > 0 && routeU->cour < routeV->cour + && (loopID == 0 || std::max(routeU->whenLastModified, routeV->whenLastModified) + > lastTestSWAPStarRouteU)) + if (CircleSector::overlap(routeU->sector, routeV->sector)) + swapStar(); + } + } + } + } + + // Register the solution produced by the LS in the individual + exportIndividual(indiv); +} + +void LocalSearch::setLocalVariablesRouteU() +{ + routeU = nodeU->route; + nodeX = nodeU->next; + nodeXNextIndex = nodeX->next->cour; + nodeUIndex = nodeU->cour; + nodeUPrevIndex = nodeU->prev->cour; + nodeXIndex = nodeX->cour; + loadU = params.cli[nodeUIndex].demand; + serviceU = params.cli[nodeUIndex].serviceDuration; + loadX = params.cli[nodeXIndex].demand; + serviceX = params.cli[nodeXIndex].serviceDuration; +} + +void LocalSearch::setLocalVariablesRouteV() +{ + routeV = nodeV->route; + nodeY = nodeV->next; + nodeYNextIndex = nodeY->next->cour; + nodeVIndex = nodeV->cour; + nodeVPrevIndex = nodeV->prev->cour; + nodeYIndex = nodeY->cour; + loadV = params.cli[nodeVIndex].demand; + serviceV = params.cli[nodeVIndex].serviceDuration; + loadY = params.cli[nodeYIndex].demand; + serviceY = params.cli[nodeYIndex].serviceDuration; + intraRouteMove = (routeU == routeV); +} + +bool LocalSearch::move1() +{ + double costSuppU = params.timeCost[nodeUPrevIndex][nodeXIndex] - params.timeCost[nodeUPrevIndex][nodeUIndex] - params.timeCost[nodeUIndex][nodeXIndex]; + double costSuppV = params.timeCost[nodeVIndex][nodeUIndex] + params.timeCost[nodeUIndex][nodeYIndex] - params.timeCost[nodeVIndex][nodeYIndex]; + + if (!intraRouteMove) + { + // Early move pruning to save CPU time. Guarantees that this move cannot improve without checking additional (load, duration...) constraints + if (costSuppU + costSuppV >= routeU->penalty + routeV->penalty) return false; + + costSuppU += penaltyExcessDuration(routeU->duration + costSuppU - serviceU) + + penaltyExcessLoad(routeU->load - loadU) + - routeU->penalty; + + costSuppV += penaltyExcessDuration(routeV->duration + costSuppV + serviceU) + + penaltyExcessLoad(routeV->load + loadU) + - routeV->penalty; + } + + if (costSuppU + costSuppV > -MY_EPSILON) return false; + if (nodeUIndex == nodeYIndex) return false; + + insertNode(nodeU, nodeV); + nbMoves++; // Increment move counter before updating route data + searchCompleted = false; + updateRouteData(routeU); + if (!intraRouteMove) updateRouteData(routeV); + return true; +} + +bool LocalSearch::move2() +{ + double costSuppU = params.timeCost[nodeUPrevIndex][nodeXNextIndex] - params.timeCost[nodeUPrevIndex][nodeUIndex] - params.timeCost[nodeXIndex][nodeXNextIndex]; + double costSuppV = params.timeCost[nodeVIndex][nodeUIndex] + params.timeCost[nodeXIndex][nodeYIndex] - params.timeCost[nodeVIndex][nodeYIndex]; + + if (!intraRouteMove) + { + // Early move pruning to save CPU time. Guarantees that this move cannot improve without checking additional (load, duration...) constraints + if (costSuppU + costSuppV >= routeU->penalty + routeV->penalty) return false; + + costSuppU += penaltyExcessDuration(routeU->duration + costSuppU - params.timeCost[nodeUIndex][nodeXIndex] - serviceU - serviceX) + + penaltyExcessLoad(routeU->load - loadU - loadX) + - routeU->penalty; + + costSuppV += penaltyExcessDuration(routeV->duration + costSuppV + params.timeCost[nodeUIndex][nodeXIndex] + serviceU + serviceX) + + penaltyExcessLoad(routeV->load + loadU + loadX) + - routeV->penalty; + } + + if (costSuppU + costSuppV > -MY_EPSILON) return false; + if (nodeU == nodeY || nodeV == nodeX || nodeX->isDepot) return false; + + insertNode(nodeU, nodeV); + insertNode(nodeX, nodeU); + nbMoves++; // Increment move counter before updating route data + searchCompleted = false; + updateRouteData(routeU); + if (!intraRouteMove) updateRouteData(routeV); + return true; +} + +bool LocalSearch::move3() +{ + double costSuppU = params.timeCost[nodeUPrevIndex][nodeXNextIndex] - params.timeCost[nodeUPrevIndex][nodeUIndex] - params.timeCost[nodeUIndex][nodeXIndex] - params.timeCost[nodeXIndex][nodeXNextIndex]; + double costSuppV = params.timeCost[nodeVIndex][nodeXIndex] + params.timeCost[nodeXIndex][nodeUIndex] + params.timeCost[nodeUIndex][nodeYIndex] - params.timeCost[nodeVIndex][nodeYIndex]; + + if (!intraRouteMove) + { + // Early move pruning to save CPU time. Guarantees that this move cannot improve without checking additional (load, duration...) constraints + if (costSuppU + costSuppV >= routeU->penalty + routeV->penalty) return false; + + costSuppU += penaltyExcessDuration(routeU->duration + costSuppU - serviceU - serviceX) + + penaltyExcessLoad(routeU->load - loadU - loadX) + - routeU->penalty; + + costSuppV += penaltyExcessDuration(routeV->duration + costSuppV + serviceU + serviceX) + + penaltyExcessLoad(routeV->load + loadU + loadX) + - routeV->penalty; + } + + if (costSuppU + costSuppV > -MY_EPSILON) return false; + if (nodeU == nodeY || nodeX == nodeV || nodeX->isDepot) return false; + + insertNode(nodeX, nodeV); + insertNode(nodeU, nodeX); + nbMoves++; // Increment move counter before updating route data + searchCompleted = false; + updateRouteData(routeU); + if (!intraRouteMove) updateRouteData(routeV); + return true; +} + +bool LocalSearch::move4() +{ + double costSuppU = params.timeCost[nodeUPrevIndex][nodeVIndex] + params.timeCost[nodeVIndex][nodeXIndex] - params.timeCost[nodeUPrevIndex][nodeUIndex] - params.timeCost[nodeUIndex][nodeXIndex]; + double costSuppV = params.timeCost[nodeVPrevIndex][nodeUIndex] + params.timeCost[nodeUIndex][nodeYIndex] - params.timeCost[nodeVPrevIndex][nodeVIndex] - params.timeCost[nodeVIndex][nodeYIndex]; + + if (!intraRouteMove) + { + // Early move pruning to save CPU time. Guarantees that this move cannot improve without checking additional (load, duration...) constraints + if (costSuppU + costSuppV >= routeU->penalty + routeV->penalty) return false; + + costSuppU += penaltyExcessDuration(routeU->duration + costSuppU + serviceV - serviceU) + + penaltyExcessLoad(routeU->load + loadV - loadU) + - routeU->penalty; + + costSuppV += penaltyExcessDuration(routeV->duration + costSuppV - serviceV + serviceU) + + penaltyExcessLoad(routeV->load + loadU - loadV) + - routeV->penalty; + } + + if (costSuppU + costSuppV > -MY_EPSILON) return false; + if (nodeUIndex == nodeVPrevIndex || nodeUIndex == nodeYIndex) return false; + + swapNode(nodeU, nodeV); + nbMoves++; // Increment move counter before updating route data + searchCompleted = false; + updateRouteData(routeU); + if (!intraRouteMove) updateRouteData(routeV); + return true; +} + +bool LocalSearch::move5() +{ + double costSuppU = params.timeCost[nodeUPrevIndex][nodeVIndex] + params.timeCost[nodeVIndex][nodeXNextIndex] - params.timeCost[nodeUPrevIndex][nodeUIndex] - params.timeCost[nodeXIndex][nodeXNextIndex]; + double costSuppV = params.timeCost[nodeVPrevIndex][nodeUIndex] + params.timeCost[nodeXIndex][nodeYIndex] - params.timeCost[nodeVPrevIndex][nodeVIndex] - params.timeCost[nodeVIndex][nodeYIndex]; + + if (!intraRouteMove) + { + // Early move pruning to save CPU time. Guarantees that this move cannot improve without checking additional (load, duration...) constraints + if (costSuppU + costSuppV >= routeU->penalty + routeV->penalty) return false; + + costSuppU += penaltyExcessDuration(routeU->duration + costSuppU - params.timeCost[nodeUIndex][nodeXIndex] + serviceV - serviceU - serviceX) + + penaltyExcessLoad(routeU->load + loadV - loadU - loadX) + - routeU->penalty; + + costSuppV += penaltyExcessDuration(routeV->duration + costSuppV + params.timeCost[nodeUIndex][nodeXIndex] - serviceV + serviceU + serviceX) + + penaltyExcessLoad(routeV->load + loadU + loadX - loadV) + - routeV->penalty; + } + + if (costSuppU + costSuppV > -MY_EPSILON) return false; + if (nodeU == nodeV->prev || nodeX == nodeV->prev || nodeU == nodeY || nodeX->isDepot) return false; + + swapNode(nodeU, nodeV); + insertNode(nodeX, nodeU); + nbMoves++; // Increment move counter before updating route data + searchCompleted = false; + updateRouteData(routeU); + if (!intraRouteMove) updateRouteData(routeV); + return true; +} + +bool LocalSearch::move6() +{ + double costSuppU = params.timeCost[nodeUPrevIndex][nodeVIndex] + params.timeCost[nodeYIndex][nodeXNextIndex] - params.timeCost[nodeUPrevIndex][nodeUIndex] - params.timeCost[nodeXIndex][nodeXNextIndex]; + double costSuppV = params.timeCost[nodeVPrevIndex][nodeUIndex] + params.timeCost[nodeXIndex][nodeYNextIndex] - params.timeCost[nodeVPrevIndex][nodeVIndex] - params.timeCost[nodeYIndex][nodeYNextIndex]; + + if (!intraRouteMove) + { + // Early move pruning to save CPU time. Guarantees that this move cannot improve without checking additional (load, duration...) constraints + if (costSuppU + costSuppV >= routeU->penalty + routeV->penalty) return false; + + costSuppU += penaltyExcessDuration(routeU->duration + costSuppU - params.timeCost[nodeUIndex][nodeXIndex] + params.timeCost[nodeVIndex][nodeYIndex] + serviceV + serviceY - serviceU - serviceX) + + penaltyExcessLoad(routeU->load + loadV + loadY - loadU - loadX) + - routeU->penalty; + + costSuppV += penaltyExcessDuration(routeV->duration + costSuppV + params.timeCost[nodeUIndex][nodeXIndex] - params.timeCost[nodeVIndex][nodeYIndex] - serviceV - serviceY + serviceU + serviceX) + + penaltyExcessLoad(routeV->load + loadU + loadX - loadV - loadY) + - routeV->penalty; + } + + if (costSuppU + costSuppV > -MY_EPSILON) return false; + if (nodeX->isDepot || nodeY->isDepot || nodeY == nodeU->prev || nodeU == nodeY || nodeX == nodeV || nodeV == nodeX->next) return false; + + swapNode(nodeU, nodeV); + swapNode(nodeX, nodeY); + nbMoves++; // Increment move counter before updating route data + searchCompleted = false; + updateRouteData(routeU); + if (!intraRouteMove) updateRouteData(routeV); + return true; +} + +bool LocalSearch::move7() +{ + if (nodeU->position > nodeV->position) return false; + + double cost = params.timeCost[nodeUIndex][nodeVIndex] + params.timeCost[nodeXIndex][nodeYIndex] - params.timeCost[nodeUIndex][nodeXIndex] - params.timeCost[nodeVIndex][nodeYIndex] + nodeV->cumulatedReversalDistance - nodeX->cumulatedReversalDistance; + + if (cost > -MY_EPSILON) return false; + if (nodeU->next == nodeV) return false; + + Node * nodeNum = nodeX->next; + nodeX->prev = nodeNum; + nodeX->next = nodeY; + + while (nodeNum != nodeV) + { + Node * temp = nodeNum->next; + nodeNum->next = nodeNum->prev; + nodeNum->prev = temp; + nodeNum = temp; + } + + nodeV->next = nodeV->prev; + nodeV->prev = nodeU; + nodeU->next = nodeV; + nodeY->prev = nodeX; + + nbMoves++; // Increment move counter before updating route data + searchCompleted = false; + updateRouteData(routeU); + return true; +} + +bool LocalSearch::move8() +{ + double cost = params.timeCost[nodeUIndex][nodeVIndex] + params.timeCost[nodeXIndex][nodeYIndex] - params.timeCost[nodeUIndex][nodeXIndex] - params.timeCost[nodeVIndex][nodeYIndex] + + nodeV->cumulatedReversalDistance + routeU->reversalDistance - nodeX->cumulatedReversalDistance + - routeU->penalty - routeV->penalty; + + // Early move pruning to save CPU time. Guarantees that this move cannot improve without checking additional (load, duration...) constraints + if (cost >= 0) return false; + + cost += penaltyExcessDuration(nodeU->cumulatedTime + nodeV->cumulatedTime + nodeV->cumulatedReversalDistance + params.timeCost[nodeUIndex][nodeVIndex]) + + penaltyExcessDuration(routeU->duration - nodeU->cumulatedTime - params.timeCost[nodeUIndex][nodeXIndex] + routeU->reversalDistance - nodeX->cumulatedReversalDistance + routeV->duration - nodeV->cumulatedTime - params.timeCost[nodeVIndex][nodeYIndex] + params.timeCost[nodeXIndex][nodeYIndex]) + + penaltyExcessLoad(nodeU->cumulatedLoad + nodeV->cumulatedLoad) + + penaltyExcessLoad(routeU->load + routeV->load - nodeU->cumulatedLoad - nodeV->cumulatedLoad); + + if (cost > -MY_EPSILON) return false; + + Node * depotU = routeU->depot; + Node * depotV = routeV->depot; + Node * depotUFin = routeU->depot->prev; + Node * depotVFin = routeV->depot->prev; + Node * depotVSuiv = depotV->next; + + Node * temp; + Node * xx = nodeX; + Node * vv = nodeV; + + while (!xx->isDepot) + { + temp = xx->next; + xx->next = xx->prev; + xx->prev = temp; + xx->route = routeV; + xx = temp; + } + + while (!vv->isDepot) + { + temp = vv->prev; + vv->prev = vv->next; + vv->next = temp; + vv->route = routeU; + vv = temp; + } + + nodeU->next = nodeV; + nodeV->prev = nodeU; + nodeX->next = nodeY; + nodeY->prev = nodeX; + + if (nodeX->isDepot) + { + depotUFin->next = depotU; + depotUFin->prev = depotVSuiv; + depotUFin->prev->next = depotUFin; + depotV->next = nodeY; + nodeY->prev = depotV; + } + else if (nodeV->isDepot) + { + depotV->next = depotUFin->prev; + depotV->next->prev = depotV; + depotV->prev = depotVFin; + depotUFin->prev = nodeU; + nodeU->next = depotUFin; + } + else + { + depotV->next = depotUFin->prev; + depotV->next->prev = depotV; + depotUFin->prev = depotVSuiv; + depotUFin->prev->next = depotUFin; + } + + nbMoves++; // Increment move counter before updating route data + searchCompleted = false; + updateRouteData(routeU); + updateRouteData(routeV); + return true; +} + +bool LocalSearch::move9() +{ + double cost = params.timeCost[nodeUIndex][nodeYIndex] + params.timeCost[nodeVIndex][nodeXIndex] - params.timeCost[nodeUIndex][nodeXIndex] - params.timeCost[nodeVIndex][nodeYIndex] + - routeU->penalty - routeV->penalty; + + // Early move pruning to save CPU time. Guarantees that this move cannot improve without checking additional (load, duration...) constraints + if (cost >= 0) return false; + + cost += penaltyExcessDuration(nodeU->cumulatedTime + routeV->duration - nodeV->cumulatedTime - params.timeCost[nodeVIndex][nodeYIndex] + params.timeCost[nodeUIndex][nodeYIndex]) + + penaltyExcessDuration(routeU->duration - nodeU->cumulatedTime - params.timeCost[nodeUIndex][nodeXIndex] + nodeV->cumulatedTime + params.timeCost[nodeVIndex][nodeXIndex]) + + penaltyExcessLoad(nodeU->cumulatedLoad + routeV->load - nodeV->cumulatedLoad) + + penaltyExcessLoad(nodeV->cumulatedLoad + routeU->load - nodeU->cumulatedLoad); + + if (cost > -MY_EPSILON) return false; + + Node * depotU = routeU->depot; + Node * depotV = routeV->depot; + Node * depotUFin = depotU->prev; + Node * depotVFin = depotV->prev; + Node * depotUpred = depotUFin->prev; + + Node * count = nodeY; + while (!count->isDepot) + { + count->route = routeU; + count = count->next; + } + + count = nodeX; + while (!count->isDepot) + { + count->route = routeV; + count = count->next; + } + + nodeU->next = nodeY; + nodeY->prev = nodeU; + nodeV->next = nodeX; + nodeX->prev = nodeV; + + if (nodeX->isDepot) + { + depotUFin->prev = depotVFin->prev; + depotUFin->prev->next = depotUFin; + nodeV->next = depotVFin; + depotVFin->prev = nodeV; + } + else + { + depotUFin->prev = depotVFin->prev; + depotUFin->prev->next = depotUFin; + depotVFin->prev = depotUpred; + depotVFin->prev->next = depotVFin; + } + + nbMoves++; // Increment move counter before updating route data + searchCompleted = false; + updateRouteData(routeU); + updateRouteData(routeV); + return true; +} + +bool LocalSearch::swapStar() +{ + SwapStarElement myBestSwapStar; + + // Preprocessing insertion costs + preprocessInsertions(routeU, routeV); + preprocessInsertions(routeV, routeU); + + // Evaluating the moves + for (nodeU = routeU->depot->next; !nodeU->isDepot; nodeU = nodeU->next) + { + for (nodeV = routeV->depot->next; !nodeV->isDepot; nodeV = nodeV->next) + { + double deltaPenRouteU = penaltyExcessLoad(routeU->load + params.cli[nodeV->cour].demand - params.cli[nodeU->cour].demand) - routeU->penalty; + double deltaPenRouteV = penaltyExcessLoad(routeV->load + params.cli[nodeU->cour].demand - params.cli[nodeV->cour].demand) - routeV->penalty; + + // Quick filter: possibly early elimination of many SWAP* due to the capacity constraints/penalties and bounds on insertion costs + if (deltaPenRouteU + nodeU->deltaRemoval + deltaPenRouteV + nodeV->deltaRemoval <= 0) + { + SwapStarElement mySwapStar; + mySwapStar.U = nodeU; + mySwapStar.V = nodeV; + + // Evaluate best reinsertion cost of U in the route of V where V has been removed + double extraV = getCheapestInsertSimultRemoval(nodeU, nodeV, mySwapStar.bestPositionU); + + // Evaluate best reinsertion cost of V in the route of U where U has been removed + double extraU = getCheapestInsertSimultRemoval(nodeV, nodeU, mySwapStar.bestPositionV); + + // Evaluating final cost + mySwapStar.moveCost = deltaPenRouteU + nodeU->deltaRemoval + extraU + deltaPenRouteV + nodeV->deltaRemoval + extraV + + penaltyExcessDuration(routeU->duration + nodeU->deltaRemoval + extraU + params.cli[nodeV->cour].serviceDuration - params.cli[nodeU->cour].serviceDuration) + + penaltyExcessDuration(routeV->duration + nodeV->deltaRemoval + extraV - params.cli[nodeV->cour].serviceDuration + params.cli[nodeU->cour].serviceDuration); + + if (mySwapStar.moveCost < myBestSwapStar.moveCost) + myBestSwapStar = mySwapStar; + } + } + } + + // Including RELOCATE from nodeU towards routeV (costs nothing to include in the evaluation at this step since we already have the best insertion location) + // Moreover, since the granularity criterion is different, this can lead to different improving moves + for (nodeU = routeU->depot->next; !nodeU->isDepot; nodeU = nodeU->next) + { + SwapStarElement mySwapStar; + mySwapStar.U = nodeU; + mySwapStar.bestPositionU = bestInsertClient[routeV->cour][nodeU->cour].bestLocation[0]; + double deltaDistRouteU = params.timeCost[nodeU->prev->cour][nodeU->next->cour] - params.timeCost[nodeU->prev->cour][nodeU->cour] - params.timeCost[nodeU->cour][nodeU->next->cour]; + double deltaDistRouteV = bestInsertClient[routeV->cour][nodeU->cour].bestCost[0]; + mySwapStar.moveCost = deltaDistRouteU + deltaDistRouteV + + penaltyExcessLoad(routeU->load - params.cli[nodeU->cour].demand) - routeU->penalty + + penaltyExcessLoad(routeV->load + params.cli[nodeU->cour].demand) - routeV->penalty + + penaltyExcessDuration(routeU->duration + deltaDistRouteU - params.cli[nodeU->cour].serviceDuration) + + penaltyExcessDuration(routeV->duration + deltaDistRouteV + params.cli[nodeU->cour].serviceDuration); + + if (mySwapStar.moveCost < myBestSwapStar.moveCost) + myBestSwapStar = mySwapStar; + } + + // Including RELOCATE from nodeV towards routeU + for (nodeV = routeV->depot->next; !nodeV->isDepot; nodeV = nodeV->next) + { + SwapStarElement mySwapStar; + mySwapStar.V = nodeV; + mySwapStar.bestPositionV = bestInsertClient[routeU->cour][nodeV->cour].bestLocation[0]; + double deltaDistRouteU = bestInsertClient[routeU->cour][nodeV->cour].bestCost[0]; + double deltaDistRouteV = params.timeCost[nodeV->prev->cour][nodeV->next->cour] - params.timeCost[nodeV->prev->cour][nodeV->cour] - params.timeCost[nodeV->cour][nodeV->next->cour]; + mySwapStar.moveCost = deltaDistRouteU + deltaDistRouteV + + penaltyExcessLoad(routeU->load + params.cli[nodeV->cour].demand) - routeU->penalty + + penaltyExcessLoad(routeV->load - params.cli[nodeV->cour].demand) - routeV->penalty + + penaltyExcessDuration(routeU->duration + deltaDistRouteU + params.cli[nodeV->cour].serviceDuration) + + penaltyExcessDuration(routeV->duration + deltaDistRouteV - params.cli[nodeV->cour].serviceDuration); + + if (mySwapStar.moveCost < myBestSwapStar.moveCost) + myBestSwapStar = mySwapStar; + } + + if (myBestSwapStar.moveCost > -MY_EPSILON) return false; + + // Applying the best move in case of improvement + if (myBestSwapStar.bestPositionU != NULL) insertNode(myBestSwapStar.U, myBestSwapStar.bestPositionU); + if (myBestSwapStar.bestPositionV != NULL) insertNode(myBestSwapStar.V, myBestSwapStar.bestPositionV); + nbMoves++; // Increment move counter before updating route data + searchCompleted = false; + updateRouteData(routeU); + updateRouteData(routeV); + return true; +} + +double LocalSearch::getCheapestInsertSimultRemoval(Node * U, Node * V, Node *& bestPosition) +{ + ThreeBestInsert * myBestInsert = &bestInsertClient[V->route->cour][U->cour]; + bool found = false; + + // Find best insertion in the route such that V is not next or pred (can only belong to the top three locations) + bestPosition = myBestInsert->bestLocation[0]; + double bestCost = myBestInsert->bestCost[0]; + found = (bestPosition != V && bestPosition->next != V); + if (!found && myBestInsert->bestLocation[1] != NULL) + { + bestPosition = myBestInsert->bestLocation[1]; + bestCost = myBestInsert->bestCost[1]; + found = (bestPosition != V && bestPosition->next != V); + if (!found && myBestInsert->bestLocation[2] != NULL) + { + bestPosition = myBestInsert->bestLocation[2]; + bestCost = myBestInsert->bestCost[2]; + found = true; + } + } + + // Compute insertion in the place of V + double deltaCost = params.timeCost[V->prev->cour][U->cour] + params.timeCost[U->cour][V->next->cour] - params.timeCost[V->prev->cour][V->next->cour]; + if (!found || deltaCost < bestCost) + { + bestPosition = V->prev; + bestCost = deltaCost; + } + + return bestCost; +} + +void LocalSearch::preprocessInsertions(Route * R1, Route * R2) +{ + for (Node * U = R1->depot->next; !U->isDepot; U = U->next) + { + // Performs the preprocessing + U->deltaRemoval = params.timeCost[U->prev->cour][U->next->cour] - params.timeCost[U->prev->cour][U->cour] - params.timeCost[U->cour][U->next->cour]; + if (R2->whenLastModified > bestInsertClient[R2->cour][U->cour].whenLastCalculated) + { + bestInsertClient[R2->cour][U->cour].reset(); + bestInsertClient[R2->cour][U->cour].whenLastCalculated = nbMoves; + bestInsertClient[R2->cour][U->cour].bestCost[0] = params.timeCost[0][U->cour] + params.timeCost[U->cour][R2->depot->next->cour] - params.timeCost[0][R2->depot->next->cour]; + bestInsertClient[R2->cour][U->cour].bestLocation[0] = R2->depot; + for (Node * V = R2->depot->next; !V->isDepot; V = V->next) + { + double deltaCost = params.timeCost[V->cour][U->cour] + params.timeCost[U->cour][V->next->cour] - params.timeCost[V->cour][V->next->cour]; + bestInsertClient[R2->cour][U->cour].compareAndAdd(deltaCost, V); + } + } + } +} + +void LocalSearch::insertNode(Node * U, Node * V) +{ + U->prev->next = U->next; + U->next->prev = U->prev; + V->next->prev = U; + U->prev = V; + U->next = V->next; + V->next = U; + U->route = V->route; +} + +void LocalSearch::swapNode(Node * U, Node * V) +{ + Node * myVPred = V->prev; + Node * myVSuiv = V->next; + Node * myUPred = U->prev; + Node * myUSuiv = U->next; + Route * myRouteU = U->route; + Route * myRouteV = V->route; + + myUPred->next = V; + myUSuiv->prev = V; + myVPred->next = U; + myVSuiv->prev = U; + + U->prev = myVPred; + U->next = myVSuiv; + V->prev = myUPred; + V->next = myUSuiv; + + U->route = myRouteV; + V->route = myRouteU; +} + +void LocalSearch::updateRouteData(Route * myRoute) +{ + int myplace = 0; + double myload = 0.; + double mytime = 0.; + double myReversalDistance = 0.; + double cumulatedX = 0.; + double cumulatedY = 0.; + + Node * mynode = myRoute->depot; + mynode->position = 0; + mynode->cumulatedLoad = 0.; + mynode->cumulatedTime = 0.; + mynode->cumulatedReversalDistance = 0.; + + bool firstIt = true; + while (!mynode->isDepot || firstIt) + { + mynode = mynode->next; + myplace++; + mynode->position = myplace; + myload += params.cli[mynode->cour].demand; + mytime += params.timeCost[mynode->prev->cour][mynode->cour] + params.cli[mynode->cour].serviceDuration; + myReversalDistance += params.timeCost[mynode->cour][mynode->prev->cour] - params.timeCost[mynode->prev->cour][mynode->cour] ; + mynode->cumulatedLoad = myload; + mynode->cumulatedTime = mytime; + mynode->cumulatedReversalDistance = myReversalDistance; + if (!mynode->isDepot) + { + cumulatedX += params.cli[mynode->cour].coordX; + cumulatedY += params.cli[mynode->cour].coordY; + if (firstIt) myRoute->sector.initialize(params.cli[mynode->cour].polarAngle); + else myRoute->sector.extend(params.cli[mynode->cour].polarAngle); + } + firstIt = false; + } + + myRoute->duration = mytime; + myRoute->load = myload; + myRoute->penalty = penaltyExcessDuration(mytime) + penaltyExcessLoad(myload); + myRoute->nbCustomers = myplace-1; + myRoute->reversalDistance = myReversalDistance; + // Remember "when" this route has been last modified (will be used to filter unnecessary move evaluations) + myRoute->whenLastModified = nbMoves ; + + if (myRoute->nbCustomers == 0) + { + myRoute->polarAngleBarycenter = 1.e30; + emptyRoutes.insert(myRoute->cour); + } + else + { + myRoute->polarAngleBarycenter = atan2(cumulatedY/(double)myRoute->nbCustomers - params.cli[0].coordY, cumulatedX/(double)myRoute->nbCustomers - params.cli[0].coordX); + emptyRoutes.erase(myRoute->cour); + } +} + +void LocalSearch::loadIndividual(const Individual & indiv) +{ + emptyRoutes.clear(); + nbMoves = 0; + for (int r = 0; r < params.nbVehicles; r++) + { + Node * myDepot = &depots[r]; + Node * myDepotFin = &depotsEnd[r]; + Route * myRoute = &routes[r]; + myDepot->prev = myDepotFin; + myDepotFin->next = myDepot; + if (!indiv.chromR[r].empty()) + { + Node * myClient = &clients[indiv.chromR[r][0]]; + myClient->route = myRoute; + myClient->prev = myDepot; + myDepot->next = myClient; + for (int i = 1; i < (int)indiv.chromR[r].size(); i++) + { + Node * myClientPred = myClient; + myClient = &clients[indiv.chromR[r][i]]; + myClient->prev = myClientPred; + myClientPred->next = myClient; + myClient->route = myRoute; + } + myClient->next = myDepotFin; + myDepotFin->prev = myClient; + } + else + { + myDepot->next = myDepotFin; + myDepotFin->prev = myDepot; + } + updateRouteData(&routes[r]); + routes[r].whenLastTestedSWAPStar = -1; + for (int i = 1; i <= params.nbClients; i++) // Initializing memory structures + bestInsertClient[r][i].whenLastCalculated = -1; + } + + for (int i = 1; i <= params.nbClients; i++) // Initializing memory structures + clients[i].whenLastTestedRI = -1; +} + +void LocalSearch::exportIndividual(Individual & indiv) +{ + std::vector < std::pair > routePolarAngles ; + for (int r = 0; r < params.nbVehicles; r++) + routePolarAngles.push_back(std::pair (routes[r].polarAngleBarycenter, r)); + std::sort(routePolarAngles.begin(), routePolarAngles.end()); // empty routes have a polar angle of 1.e30, and therefore will always appear at the end + + int pos = 0; + for (int r = 0; r < params.nbVehicles; r++) + { + indiv.chromR[r].clear(); + Node * node = depots[routePolarAngles[r].second].next; + while (!node->isDepot) + { + indiv.chromT[pos] = node->cour; + indiv.chromR[r].push_back(node->cour); + node = node->next; + pos++; + } + } + + indiv.evaluateCompleteCost(params); +} + +LocalSearch::LocalSearch(Params & params) : params (params) +{ + clients = std::vector < Node >(params.nbClients + 1); + routes = std::vector < Route >(params.nbVehicles); + depots = std::vector < Node >(params.nbVehicles); + depotsEnd = std::vector < Node >(params.nbVehicles); + bestInsertClient = std::vector < std::vector >(params.nbVehicles, std::vector (params.nbClients + 1)); + + for (int i = 0; i <= params.nbClients; i++) + { + clients[i].cour = i; + clients[i].isDepot = false; + } + for (int i = 0; i < params.nbVehicles; i++) + { + routes[i].cour = i; + routes[i].depot = &depots[i]; + depots[i].cour = 0; + depots[i].isDepot = true; + depots[i].route = &routes[i]; + depotsEnd[i].cour = 0; + depotsEnd[i].isDepot = true; + depotsEnd[i].route = &routes[i]; + } + for (int i = 1 ; i <= params.nbClients ; i++) orderNodes.push_back(i); + for (int r = 0 ; r < params.nbVehicles ; r++) orderRoutes.push_back(r); +} + diff --git a/rl4co/envs/routing/cvrp/HGS-CVRP/Program/LocalSearch.h b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/LocalSearch.h new file mode 100644 index 00000000..61eb1d18 --- /dev/null +++ b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/LocalSearch.h @@ -0,0 +1,193 @@ +/*MIT License + +Copyright(c) 2020 Thibaut Vidal + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files(the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions : + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE.*/ + +#ifndef LOCALSEARCH_H +#define LOCALSEARCH_H + +#include "Individual.h" + +struct Node ; + +// Structure containing a route +struct Route +{ + int cour; // Route index + int nbCustomers; // Number of customers visited in the route + int whenLastModified; // "When" this route has been last modified + int whenLastTestedSWAPStar; // "When" the SWAP* moves for this route have been last tested + Node * depot; // Pointer to the associated depot + double duration; // Total time on the route + double load; // Total load on the route + double reversalDistance; // Difference of cost if the route is reversed + double penalty; // Current sum of load and duration penalties + double polarAngleBarycenter; // Polar angle of the barycenter of the route + CircleSector sector; // Circle sector associated to the set of customers +}; + +struct Node +{ + bool isDepot; // Tells whether this node represents a depot or not + int cour; // Node index + int position; // Position in the route + int whenLastTestedRI; // "When" the RI moves for this node have been last tested + Node * next; // Next node in the route order + Node * prev; // Previous node in the route order + Route * route; // Pointer towards the associated route + double cumulatedLoad; // Cumulated load on this route until the customer (including itself) + double cumulatedTime; // Cumulated time on this route until the customer (including itself) + double cumulatedReversalDistance; // Difference of cost if the segment of route (0...cour) is reversed (useful for 2-opt moves with asymmetric problems) + double deltaRemoval; // Difference of cost in the current route if the node is removed (used in SWAP*) +}; + +// Structure used in SWAP* to remember the three best insertion positions of a customer in a given route +struct ThreeBestInsert +{ + int whenLastCalculated; + double bestCost[3]; + Node * bestLocation[3]; + + void compareAndAdd(double costInsert, Node * placeInsert) + { + if (costInsert >= bestCost[2]) return; + else if (costInsert >= bestCost[1]) + { + bestCost[2] = costInsert; bestLocation[2] = placeInsert; + } + else if (costInsert >= bestCost[0]) + { + bestCost[2] = bestCost[1]; bestLocation[2] = bestLocation[1]; + bestCost[1] = costInsert; bestLocation[1] = placeInsert; + } + else + { + bestCost[2] = bestCost[1]; bestLocation[2] = bestLocation[1]; + bestCost[1] = bestCost[0]; bestLocation[1] = bestLocation[0]; + bestCost[0] = costInsert; bestLocation[0] = placeInsert; + } + } + + // Resets the structure (no insertion calculated) + void reset() + { + bestCost[0] = 1.e30; bestLocation[0] = NULL; + bestCost[1] = 1.e30; bestLocation[1] = NULL; + bestCost[2] = 1.e30; bestLocation[2] = NULL; + } + + ThreeBestInsert() { reset(); }; +}; + +// Structured used to keep track of the best SWAP* move +struct SwapStarElement +{ + double moveCost = 1.e30 ; + Node * U = NULL ; + Node * bestPositionU = NULL; + Node * V = NULL; + Node * bestPositionV = NULL; +}; + +// Main local learch structure +class LocalSearch +{ + +private: + + Params & params ; // Problem parameters + bool searchCompleted; // Tells whether all moves have been evaluated without success + int nbMoves; // Total number of moves (RI and SWAP*) applied during the local search. Attention: this is not only a simple counter, it is also used to avoid repeating move evaluations + std::vector < int > orderNodes; // Randomized order for checking the nodes in the RI local search + std::vector < int > orderRoutes; // Randomized order for checking the routes in the SWAP* local search + std::set < int > emptyRoutes; // indices of all empty routes + int loopID; // Current loop index + + /* THE SOLUTION IS REPRESENTED AS A LINKED LIST OF ELEMENTS */ + std::vector < Node > clients; // Elements representing clients (clients[0] is a sentinel and should not be accessed) + std::vector < Node > depots; // Elements representing depots + std::vector < Node > depotsEnd; // Duplicate of the depots to mark the end of the routes + std::vector < Route > routes; // Elements representing routes + std::vector < std::vector < ThreeBestInsert > > bestInsertClient; // (SWAP*) For each route and node, storing the cheapest insertion cost + + /* TEMPORARY VARIABLES USED IN THE LOCAL SEARCH LOOPS */ + // nodeUPrev -> nodeU -> nodeX -> nodeXNext + // nodeVPrev -> nodeV -> nodeY -> nodeYNext + Node * nodeU ; + Node * nodeX ; + Node * nodeV ; + Node * nodeY ; + Route * routeU ; + Route * routeV ; + int nodeUPrevIndex, nodeUIndex, nodeXIndex, nodeXNextIndex ; + int nodeVPrevIndex, nodeVIndex, nodeYIndex, nodeYNextIndex ; + double loadU, loadX, loadV, loadY; + double serviceU, serviceX, serviceV, serviceY; + double penaltyCapacityLS, penaltyDurationLS ; + bool intraRouteMove ; + + void setLocalVariablesRouteU(); // Initializes some local variables and distances associated to routeU to avoid always querying the same values in the distance matrix + void setLocalVariablesRouteV(); // Initializes some local variables and distances associated to routeV to avoid always querying the same values in the distance matrix + + inline double penaltyExcessDuration(double myDuration) {return std::max(0., myDuration - params.durationLimit)*penaltyDurationLS;} + inline double penaltyExcessLoad(double myLoad) {return std::max(0., myLoad - params.vehicleCapacity)*penaltyCapacityLS;} + + /* RELOCATE MOVES */ + // (Legacy notations: move1...move9 from Prins 2004) + bool move1(); // If U is a client node, remove U and insert it after V + bool move2(); // If U and X are client nodes, remove them and insert (U,X) after V + bool move3(); // If U and X are client nodes, remove them and insert (X,U) after V + + /* SWAP MOVES */ + bool move4(); // If U and V are client nodes, swap U and V + bool move5(); // If U, X and V are client nodes, swap (U,X) and V + bool move6(); // If (U,X) and (V,Y) are client nodes, swap (U,X) and (V,Y) + + /* 2-OPT and 2-OPT* MOVES */ + bool move7(); // If route(U) == route(V), replace (U,X) and (V,Y) by (U,V) and (X,Y) + bool move8(); // If route(U) != route(V), replace (U,X) and (V,Y) by (U,V) and (X,Y) + bool move9(); // If route(U) != route(V), replace (U,X) and (V,Y) by (U,Y) and (V,X) + + /* SUB-ROUTINES FOR EFFICIENT SWAP* EVALUATIONS */ + bool swapStar(); // Calculates all SWAP* between routeU and routeV and apply the best improving move + double getCheapestInsertSimultRemoval(Node * U, Node * V, Node *& bestPosition); // Calculates the insertion cost and position in the route of V, where V is omitted + void preprocessInsertions(Route * R1, Route * R2); // Preprocess all insertion costs of nodes of route R1 in route R2 + + /* ROUTINES TO UPDATE THE SOLUTIONS */ + static void insertNode(Node * U, Node * V); // Solution update: Insert U after V + static void swapNode(Node * U, Node * V) ; // Solution update: Swap U and V + void updateRouteData(Route * myRoute); // Updates the preprocessed data of a route + + public: + + // Run the local search with the specified penalty values + void run(Individual & indiv, double penaltyCapacityLS, double penaltyDurationLS, int count=INT_MAX); + + // Loading an initial solution into the local search + void loadIndividual(const Individual & indiv); + + // Exporting the LS solution into an individual and calculating the penalized cost according to the original penalty weights from Params + void exportIndividual(Individual & indiv); + + // Constructor + LocalSearch(Params & params); +}; + +#endif diff --git a/rl4co/envs/routing/cvrp/HGS-CVRP/Program/Params.cpp b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/Params.cpp new file mode 100644 index 00000000..4c277741 --- /dev/null +++ b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/Params.cpp @@ -0,0 +1,123 @@ +#include "Params.h" + +// The universal constructor for both executable and shared library +// When the executable is run from the commandline, +// it will first generate an CVRPLIB instance from .vrp file, then supply necessary information. +Params::Params( + const std::vector& x_coords, + const std::vector& y_coords, + const std::vector>& dist_mtx, + const std::vector& service_time, + const std::vector& demands, + double vehicleCapacity, + double durationLimit, + int nbVeh, + bool isDurationConstraint, + bool verbose, + const AlgorithmParameters& ap +) + : ap(ap), isDurationConstraint(isDurationConstraint), nbVehicles(nbVeh), durationLimit(durationLimit), + vehicleCapacity(vehicleCapacity), timeCost(dist_mtx), verbose(verbose) +{ + // This marks the starting time of the algorithm + startTime = clock(); + + nbClients = (int)demands.size() - 1; // Need to substract the depot from the number of nodes + totalDemand = 0.; + maxDemand = 0.; + + // Initialize RNG + ran.seed(ap.seed); + + // check if valid coordinates are provided + areCoordinatesProvided = (demands.size() == x_coords.size()) && (demands.size() == y_coords.size()); + + cli = std::vector(nbClients + 1); + for (int i = 0; i <= nbClients; i++) + { + // If useSwapStar==false, x_coords and y_coords may be empty. + if (ap.useSwapStar == 1 && areCoordinatesProvided) + { + cli[i].coordX = x_coords[i]; + cli[i].coordY = y_coords[i]; + cli[i].polarAngle = CircleSector::positive_mod( + 32768. * atan2(cli[i].coordY - cli[0].coordY, cli[i].coordX - cli[0].coordX) / PI); + } + else + { + cli[i].coordX = 0.0; + cli[i].coordY = 0.0; + cli[i].polarAngle = 0.0; + } + + cli[i].serviceDuration = service_time[i]; + cli[i].demand = demands[i]; + if (cli[i].demand > maxDemand) maxDemand = cli[i].demand; + totalDemand += cli[i].demand; + } + + if (verbose && ap.useSwapStar == 1 && !areCoordinatesProvided) + std::cout << "----- NO COORDINATES HAVE BEEN PROVIDED, SWAP* NEIGHBORHOOD WILL BE DEACTIVATED BY DEFAULT" << std::endl; + + // Default initialization if the number of vehicles has not been provided by the user + if (nbVehicles == INT_MAX) + { + nbVehicles = (int)std::ceil(1.3*totalDemand/vehicleCapacity) + 3; // Safety margin: 30% + 3 more vehicles than the trivial bin packing LB + if (verbose) + std::cout << "----- FLEET SIZE WAS NOT SPECIFIED: DEFAULT INITIALIZATION TO " << nbVehicles << " VEHICLES" << std::endl; + } + else + { + if (verbose) + std::cout << "----- FLEET SIZE SPECIFIED: SET TO " << nbVehicles << " VEHICLES" << std::endl; + } + + // Calculation of the maximum distance + maxDist = 0.; + for (int i = 0; i <= nbClients; i++) + for (int j = 0; j <= nbClients; j++) + if (timeCost[i][j] > maxDist) maxDist = timeCost[i][j]; + + // Calculation of the correlated vertices for each customer (for the granular restriction) + correlatedVertices = std::vector >(nbClients + 1); + std::vector > setCorrelatedVertices = std::vector >(nbClients + 1); + std::vector > orderProximity; + for (int i = 1; i <= nbClients; i++) + { + orderProximity.clear(); + for (int j = 1; j <= nbClients; j++) + if (i != j) orderProximity.emplace_back(timeCost[i][j], j); + std::sort(orderProximity.begin(), orderProximity.end()); + + for (int j = 0; j < std::min(ap.nbGranular, nbClients - 1); j++) + { + // If i is correlated with j, then j should be correlated with i + setCorrelatedVertices[i].insert(orderProximity[j].second); + setCorrelatedVertices[orderProximity[j].second].insert(i); + } + } + + // Filling the vector of correlated vertices + for (int i = 1; i <= nbClients; i++) + for (int x : setCorrelatedVertices[i]) + correlatedVertices[i].push_back(x); + + // Safeguards to avoid possible numerical instability in case of instances containing arbitrarily small or large numerical values + if (maxDist < 0.1 || maxDist > 100000) + throw std::string( + "The distances are of very small or large scale. This could impact numerical stability. Please rescale the dataset and run again."); + if (maxDemand < 0.1 || maxDemand > 100000) + throw std::string( + "The demand quantities are of very small or large scale. This could impact numerical stability. Please rescale the dataset and run again."); + if (nbVehicles < std::ceil(totalDemand / vehicleCapacity)) + throw std::string("Fleet size is insufficient to service the considered clients."); + + // A reasonable scale for the initial values of the penalties + penaltyDuration = 1; + penaltyCapacity = std::max(0.1, std::min(1000., maxDist / maxDemand)); + + if (verbose) + std::cout << "----- INSTANCE SUCCESSFULLY LOADED WITH " << nbClients << " CLIENTS AND " << nbVehicles << " VEHICLES" << std::endl; +} + + diff --git a/rl4co/envs/routing/cvrp/HGS-CVRP/Program/Params.h b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/Params.h new file mode 100644 index 00000000..1de772ed --- /dev/null +++ b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/Params.h @@ -0,0 +1,99 @@ +/*MIT License + +Copyright(c) 2020 Thibaut Vidal + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files(the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions : + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE.*/ + +#ifndef PARAMS_H +#define PARAMS_H + +#include "CircleSector.h" +#include "AlgorithmParameters.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#define MY_EPSILON 0.00001 // Precision parameter, used to avoid numerical instabilities +#define PI 3.14159265359 + +struct Client +{ + double coordX; // Coordinate X + double coordY; // Coordinate Y + double serviceDuration; // Service duration + double demand; // Demand + int polarAngle; // Polar angle of the client around the depot, measured in degrees and truncated for convenience +}; + +class Params +{ +public: + + /* PARAMETERS OF THE GENETIC ALGORITHM */ + bool verbose; // Controls verbose level through the iterations + AlgorithmParameters ap; // Main parameters of the HGS algorithm + + /* ADAPTIVE PENALTY COEFFICIENTS */ + double penaltyCapacity; // Penalty for one unit of capacity excess (adapted through the search) + double penaltyDuration; // Penalty for one unit of duration excess (adapted through the search) + + /* START TIME OF THE ALGORITHM */ + clock_t startTime; // Start time of the optimization (set when Params is constructed) + + /* RANDOM NUMBER GENERATOR */ + std::minstd_rand ran; // Using the fastest and simplest LCG. The quality of random numbers is not critical for the LS, but speed is + + /* DATA OF THE PROBLEM INSTANCE */ + bool isDurationConstraint ; // Indicates if the problem includes duration constraints + int nbClients ; // Number of clients (excluding the depot) + int nbVehicles ; // Number of vehicles + double durationLimit; // Route duration limit + double vehicleCapacity; // Capacity limit + double totalDemand ; // Total demand required by the clients + double maxDemand; // Maximum demand of a client + double maxDist; // Maximum distance between two clients + std::vector< Client > cli ; // Vector containing information on each client + const std::vector< std::vector< double > >& timeCost; // Distance matrix + std::vector< std::vector< int > > correlatedVertices; // Neighborhood restrictions: For each client, list of nearby customers + bool areCoordinatesProvided; // Check if valid coordinates are provided + + // Initialization from a given data set + Params(const std::vector& x_coords, + const std::vector& y_coords, + const std::vector>& dist_mtx, + const std::vector& service_time, + const std::vector& demands, + double vehicleCapacity, + double durationLimit, + int nbVeh, + bool isDurationConstraint, + bool verbose, + const AlgorithmParameters& ap); +}; +#endif + diff --git a/rl4co/envs/routing/cvrp/HGS-CVRP/Program/Population.cpp b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/Population.cpp new file mode 100644 index 00000000..f08bd22d --- /dev/null +++ b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/Population.cpp @@ -0,0 +1,307 @@ +#include "Population.h" + +void Population::generatePopulation() +{ + if (params.verbose) std::cout << "----- BUILDING INITIAL POPULATION" << std::endl; + for (int i = 0; i < 4*params.ap.mu && (i == 0 || params.ap.timeLimit == 0 || (double)(clock() - params.startTime) / (double)CLOCKS_PER_SEC < params.ap.timeLimit) ; i++) + { + Individual randomIndiv(params); + split.generalSplit(randomIndiv, params.nbVehicles); + localSearch.run(randomIndiv, params.penaltyCapacity, params.penaltyDuration); + addIndividual(randomIndiv, true); + if (!randomIndiv.eval.isFeasible && params.ran() % 2 == 0) // Repair half of the solutions in case of infeasibility + { + localSearch.run(randomIndiv, params.penaltyCapacity*10., params.penaltyDuration*10.); + if (randomIndiv.eval.isFeasible) addIndividual(randomIndiv, false); + } + } +} + +bool Population::addIndividual(const Individual & indiv, bool updateFeasible) +{ + if (updateFeasible) + { + listFeasibilityLoad.push_back(indiv.eval.capacityExcess < MY_EPSILON); + listFeasibilityDuration.push_back(indiv.eval.durationExcess < MY_EPSILON); + listFeasibilityLoad.pop_front(); + listFeasibilityDuration.pop_front(); + } + + // Find the adequate subpopulation in relation to the individual feasibility + SubPopulation & subpop = (indiv.eval.isFeasible) ? feasibleSubpop : infeasibleSubpop; + + // Create a copy of the individual and updade the proximity structures calculating inter-individual distances + Individual * myIndividual = new Individual(indiv); + for (Individual * myIndividual2 : subpop) + { + double myDistance = brokenPairsDistance(*myIndividual,*myIndividual2); + myIndividual2->indivsPerProximity.insert({ myDistance, myIndividual }); + myIndividual->indivsPerProximity.insert({ myDistance, myIndividual2 }); + } + + // Identify the correct location in the subpopulation and insert the individual + int place = (int)subpop.size(); + while (place > 0 && subpop[place - 1]->eval.penalizedCost > indiv.eval.penalizedCost - MY_EPSILON) place--; + subpop.emplace(subpop.begin() + place, myIndividual); + + // Trigger a survivor selection if the maximimum subpopulation size is exceeded + if ((int)subpop.size() > params.ap.mu + params.ap.lambda) + while ((int)subpop.size() > params.ap.mu) + removeWorstBiasedFitness(subpop); + + // Track best solution + if (indiv.eval.isFeasible && indiv.eval.penalizedCost < bestSolutionRestart.eval.penalizedCost - MY_EPSILON) + { + bestSolutionRestart = indiv; // Copy + if (indiv.eval.penalizedCost < bestSolutionOverall.eval.penalizedCost - MY_EPSILON) + { + bestSolutionOverall = indiv; + searchProgress.push_back({ clock() - params.startTime , bestSolutionOverall.eval.penalizedCost }); + } + return true; + } + else + return false; +} + +void Population::updateBiasedFitnesses(SubPopulation & pop) +{ + // Ranking the individuals based on their diversity contribution (decreasing order of distance) + std::vector > ranking; + for (int i = 0 ; i < (int)pop.size(); i++) + ranking.push_back({-averageBrokenPairsDistanceClosest(*pop[i],params.ap.nbClose),i}); + std::sort(ranking.begin(), ranking.end()); + + // Updating the biased fitness values + if (pop.size() == 1) + pop[0]->biasedFitness = 0; + else + { + for (int i = 0; i < (int)pop.size(); i++) + { + double divRank = (double)i / (double)(pop.size() - 1); // Ranking from 0 to 1 + double fitRank = (double)ranking[i].second / (double)(pop.size() - 1); + if ((int)pop.size() <= params.ap.nbElite) // Elite individuals cannot be smaller than population size + pop[ranking[i].second]->biasedFitness = fitRank; + else + pop[ranking[i].second]->biasedFitness = fitRank + (1.0 - (double)params.ap.nbElite / (double)pop.size()) * divRank; + } + } +} + +void Population::removeWorstBiasedFitness(SubPopulation & pop) +{ + updateBiasedFitnesses(pop); + if (pop.size() <= 1) throw std::string("Eliminating the best individual: this should not occur in HGS"); + + Individual * worstIndividual = NULL; + int worstIndividualPosition = -1; + bool isWorstIndividualClone = false; + double worstIndividualBiasedFitness = -1.e30; + for (int i = 1; i < (int)pop.size(); i++) + { + bool isClone = (averageBrokenPairsDistanceClosest(*pop[i],1) < MY_EPSILON); // A distance equal to 0 indicates that a clone exists + if ((isClone && !isWorstIndividualClone) || (isClone == isWorstIndividualClone && pop[i]->biasedFitness > worstIndividualBiasedFitness)) + { + worstIndividualBiasedFitness = pop[i]->biasedFitness; + isWorstIndividualClone = isClone; + worstIndividualPosition = i; + worstIndividual = pop[i]; + } + } + + // Removing the individual from the population and freeing memory + pop.erase(pop.begin() + worstIndividualPosition); + + // Cleaning its distances from the other individuals in the population + for (Individual * indiv2 : pop) + { + auto it = indiv2->indivsPerProximity.begin(); + while (it->second != worstIndividual) ++it; + indiv2->indivsPerProximity.erase(it); + } + + // Freeing memory + delete worstIndividual; +} + +void Population::restart() +{ + if (params.verbose) std::cout << "----- RESET: CREATING A NEW POPULATION -----" << std::endl; + for (Individual * indiv : feasibleSubpop) delete indiv ; + for (Individual * indiv : infeasibleSubpop) delete indiv; + feasibleSubpop.clear(); + infeasibleSubpop.clear(); + bestSolutionRestart = Individual(params); + generatePopulation(); +} + +void Population::managePenalties() +{ + // Setting some bounds [0.1,100000] to the penalty values for safety + double fractionFeasibleLoad = (double)std::count(listFeasibilityLoad.begin(), listFeasibilityLoad.end(), true) / (double)listFeasibilityLoad.size(); + if (fractionFeasibleLoad < params.ap.targetFeasible - 0.05 && params.penaltyCapacity < 100000.) + params.penaltyCapacity = std::min(params.penaltyCapacity * params.ap.penaltyIncrease, 100000.); + else if (fractionFeasibleLoad > params.ap.targetFeasible + 0.05 && params.penaltyCapacity > 0.1) + params.penaltyCapacity = std::max(params.penaltyCapacity * params.ap.penaltyDecrease, 0.1); + + // Setting some bounds [0.1,100000] to the penalty values for safety + double fractionFeasibleDuration = (double)std::count(listFeasibilityDuration.begin(), listFeasibilityDuration.end(), true) / (double)listFeasibilityDuration.size(); + if (fractionFeasibleDuration < params.ap.targetFeasible - 0.05 && params.penaltyDuration < 100000.) + params.penaltyDuration = std::min(params.penaltyDuration * params.ap.penaltyIncrease, 100000.); + else if (fractionFeasibleDuration > params.ap.targetFeasible + 0.05 && params.penaltyDuration > 0.1) + params.penaltyDuration = std::max(params.penaltyDuration * params.ap.penaltyDecrease, 0.1); + + // Update the evaluations + for (int i = 0; i < (int)infeasibleSubpop.size(); i++) + infeasibleSubpop[i]->eval.penalizedCost = infeasibleSubpop[i]->eval.distance + + params.penaltyCapacity * infeasibleSubpop[i]->eval.capacityExcess + + params.penaltyDuration * infeasibleSubpop[i]->eval.durationExcess; + + // If needed, reorder the individuals in the infeasible subpopulation since the penalty values have changed (simple bubble sort for the sake of simplicity) + for (int i = 0; i < (int)infeasibleSubpop.size(); i++) + { + for (int j = 0; j < (int)infeasibleSubpop.size() - i - 1; j++) + { + if (infeasibleSubpop[j]->eval.penalizedCost > infeasibleSubpop[j + 1]->eval.penalizedCost + MY_EPSILON) + { + Individual * indiv = infeasibleSubpop[j]; + infeasibleSubpop[j] = infeasibleSubpop[j + 1]; + infeasibleSubpop[j + 1] = indiv; + } + } + } +} + +const Individual & Population::getBinaryTournament () +{ + // Picking two individuals with uniform distribution over the union of the feasible and infeasible subpopulations + std::uniform_int_distribution<> distr(0, feasibleSubpop.size() + infeasibleSubpop.size() - 1); + int place1 = distr(params.ran); + int place2 = distr(params.ran); + Individual * indiv1 = (place1 >= (int)feasibleSubpop.size()) ? infeasibleSubpop[place1 - feasibleSubpop.size()] : feasibleSubpop[place1]; + Individual * indiv2 = (place2 >= (int)feasibleSubpop.size()) ? infeasibleSubpop[place2 - feasibleSubpop.size()] : feasibleSubpop[place2]; + + // Keeping the best of the two in terms of biased fitness + updateBiasedFitnesses(feasibleSubpop); + updateBiasedFitnesses(infeasibleSubpop); + if (indiv1->biasedFitness < indiv2->biasedFitness) return *indiv1 ; + else return *indiv2 ; +} + +const Individual * Population::getBestFeasible () +{ + if (!feasibleSubpop.empty()) return feasibleSubpop[0] ; + else return NULL ; +} + +const Individual * Population::getBestInfeasible () +{ + if (!infeasibleSubpop.empty()) return infeasibleSubpop[0] ; + else return NULL ; +} + +const Individual * Population::getBestFound() +{ + if (bestSolutionOverall.eval.penalizedCost < 1.e29) return &bestSolutionOverall; + else return NULL; +} + +void Population::printState(int nbIter, int nbIterNoImprovement) +{ + if (params.verbose) + { + std::printf("It %6d %6d | T(s) %.2f", nbIter, nbIterNoImprovement, (double)(clock()-params.startTime)/(double)CLOCKS_PER_SEC); + + if (getBestFeasible() != NULL) std::printf(" | Feas %zu %.2f %.2f", feasibleSubpop.size(), getBestFeasible()->eval.penalizedCost, getAverageCost(feasibleSubpop)); + else std::printf(" | NO-FEASIBLE"); + + if (getBestInfeasible() != NULL) std::printf(" | Inf %zu %.2f %.2f", infeasibleSubpop.size(), getBestInfeasible()->eval.penalizedCost, getAverageCost(infeasibleSubpop)); + else std::printf(" | NO-INFEASIBLE"); + + std::printf(" | Div %.2f %.2f", getDiversity(feasibleSubpop), getDiversity(infeasibleSubpop)); + std::printf(" | Feas %.2f %.2f", (double)std::count(listFeasibilityLoad.begin(), listFeasibilityLoad.end(), true) / (double)listFeasibilityLoad.size(), (double)std::count(listFeasibilityDuration.begin(), listFeasibilityDuration.end(), true) / (double)listFeasibilityDuration.size()); + std::printf(" | Pen %.2f %.2f", params.penaltyCapacity, params.penaltyDuration); + std::cout << std::endl; + } +} + +double Population::brokenPairsDistance(const Individual & indiv1, const Individual & indiv2) +{ + int differences = 0; + for (int j = 1; j <= params.nbClients; j++) + { + if (indiv1.successors[j] != indiv2.successors[j] && indiv1.successors[j] != indiv2.predecessors[j]) differences++; + if (indiv1.predecessors[j] == 0 && indiv2.predecessors[j] != 0 && indiv2.successors[j] != 0) differences++; + } + return (double)differences / (double)params.nbClients; +} + +double Population::averageBrokenPairsDistanceClosest(const Individual & indiv, int nbClosest) +{ + double result = 0.; + int maxSize = std::min(nbClosest, indiv.indivsPerProximity.size()); + auto it = indiv.indivsPerProximity.begin(); + for (int i = 0; i < maxSize; i++) + { + result += it->first; + ++it; + } + return result / (double)maxSize; +} + +double Population::getDiversity(const SubPopulation & pop) +{ + double average = 0.; + int size = std::min(params.ap.mu, pop.size()); // Only monitoring the "mu" better solutions to avoid too much noise in the measurements + for (int i = 0; i < size; i++) average += averageBrokenPairsDistanceClosest(*pop[i],size); + if (size > 0) return average / (double)size; + else return -1.0; +} + +double Population::getAverageCost(const SubPopulation & pop) +{ + double average = 0.; + int size = std::min(params.ap.mu, pop.size()); // Only monitoring the "mu" better solutions to avoid too much noise in the measurements + for (int i = 0; i < size; i++) average += pop[i]->eval.penalizedCost; + if (size > 0) return average / (double)size; + else return -1.0; +} + +void Population::exportSearchProgress(std::string fileName, std::string instanceName) +{ + std::ofstream myfile(fileName); + for (std::pair state : searchProgress) + myfile << instanceName << ";" << params.ap.seed << ";" << state.second << ";" << (double)state.first / (double)CLOCKS_PER_SEC << std::endl; +} + +void Population::exportCVRPLibFormat(const Individual & indiv, std::string fileName) +{ + std::ofstream myfile(fileName); + if (myfile.is_open()) + { + for (int k = 0; k < (int)indiv.chromR.size(); k++) + { + if (!indiv.chromR[k].empty()) + { + myfile << "Route #" << k + 1 << ":"; // Route IDs start at 1 in the file format + for (int i : indiv.chromR[k]) myfile << " " << i; + myfile << std::endl; + } + } + myfile << "Cost " << indiv.eval.penalizedCost << std::endl; + } + else std::cout << "----- IMPOSSIBLE TO OPEN: " << fileName << std::endl; +} + +Population::Population(Params & params, Split & split, LocalSearch & localSearch) : params(params), split(split), localSearch(localSearch), bestSolutionRestart(params), bestSolutionOverall(params) +{ + listFeasibilityLoad = std::list(params.ap.nbIterPenaltyManagement, true); + listFeasibilityDuration = std::list(params.ap.nbIterPenaltyManagement, true); +} + +Population::~Population() +{ + for (int i = 0; i < (int)feasibleSubpop.size(); i++) delete feasibleSubpop[i]; + for (int i = 0; i < (int)infeasibleSubpop.size(); i++) delete infeasibleSubpop[i]; +} \ No newline at end of file diff --git a/rl4co/envs/routing/cvrp/HGS-CVRP/Program/Population.h b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/Population.h new file mode 100644 index 00000000..3a90dc1c --- /dev/null +++ b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/Population.h @@ -0,0 +1,108 @@ +/*MIT License + +Copyright(c) 2020 Thibaut Vidal + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files(the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions : + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE.*/ + +#ifndef POPULATION_H +#define POPULATION_H + +#include "Individual.h" +#include "LocalSearch.h" +#include "Split.h" + +typedef std::vector SubPopulation ; + +class Population +{ + private: + + Params & params ; // Problem parameters + Split & split; // Split algorithm + LocalSearch & localSearch; // Local search structure + SubPopulation feasibleSubpop; // Feasible subpopulation, kept ordered by increasing penalized cost + SubPopulation infeasibleSubpop; // Infeasible subpopulation, kept ordered by increasing penalized cost + std::list listFeasibilityLoad ; // Load feasibility of recent individuals generated by LS + std::list listFeasibilityDuration ; // Duration feasibility of recent individuals generated by LS + std::vector> searchProgress; // Keeps tracks of the time stamps of successive best solutions + Individual bestSolutionRestart; // Best solution found during the current restart of the algorithm + Individual bestSolutionOverall; // Best solution found during the complete execution of the algorithm + + // Evaluates the biased fitness of all individuals in the population + void updateBiasedFitnesses(SubPopulation & pop); + + // Removes the worst individual in terms of biased fitness + void removeWorstBiasedFitness(SubPopulation & subpop); + + public: + + // Creates an initial population of individuals + void generatePopulation(); + + // Add an individual in the population (survivor selection is automatically triggered whenever the population reaches its maximum size) + // Returns TRUE if a new best solution of the run has been found + bool addIndividual (const Individual & indiv, bool updateFeasible); + + // Cleans all solutions and generates a new initial population (only used when running HGS until a time limit, in which case the algorithm restarts until the time limit is reached) + void restart(); + + // Adaptation of the penalty parameters + void managePenalties(); + + // Select an individal by binary tournament in the union of the feasible and infeasible subpopulations. + const Individual & getBinaryTournament(); + + // Accesses the best feasible individual + const Individual * getBestFeasible(); + + // Accesses the best infeasible individual + const Individual * getBestInfeasible(); + + // Accesses the best found solution at all time + const Individual * getBestFound(); + + // Prints population state + void printState(int nbIter, int nbIterNoImprovement); + + // Distance measure between two individuals, used for diversity calculations + double brokenPairsDistance(const Individual & indiv1, const Individual & indiv2); + + // Returns the average broken pairs distance of this individual with the nbClosest individuals in the population + double averageBrokenPairsDistanceClosest(const Individual & indiv, int nbClosest); + + // Returns the average diversity value among the 50% best individuals in the subpopulation + double getDiversity(const SubPopulation & pop); + + // Returns the average solution value among the 50% best individuals in the subpopulation + double getAverageCost(const SubPopulation & pop); + + // Exports in a file the history of solution improvements + void exportSearchProgress(std::string fileName, std::string instanceName); + + // Exports an Individual in CVRPLib format + void exportCVRPLibFormat(const Individual & indiv, std::string fileName); + + // Constructor + Population(Params & params, Split & split, LocalSearch & localSearch); + + // Destructor + ~Population(); +}; + +#endif diff --git a/rl4co/envs/routing/cvrp/HGS-CVRP/Program/Split.cpp b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/Split.cpp new file mode 100644 index 00000000..546ffb2e --- /dev/null +++ b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/Split.cpp @@ -0,0 +1,221 @@ +#include "Split.h" + +void Split::generalSplit(Individual & indiv, int nbMaxVehicles) +{ + // Do not apply Split with fewer vehicles than the trivial (LP) bin packing bound + maxVehicles = std::max(nbMaxVehicles, std::ceil(params.totalDemand/params.vehicleCapacity)); + + // Initialization of the data structures for the linear split algorithms + // Direct application of the code located at https://github.com/vidalt/Split-Library + for (int i = 1; i <= params.nbClients; i++) + { + cliSplit[i].demand = params.cli[indiv.chromT[i - 1]].demand; + cliSplit[i].serviceTime = params.cli[indiv.chromT[i - 1]].serviceDuration; + cliSplit[i].d0_x = params.timeCost[0][indiv.chromT[i - 1]]; + cliSplit[i].dx_0 = params.timeCost[indiv.chromT[i - 1]][0]; + if (i < params.nbClients) cliSplit[i].dnext = params.timeCost[indiv.chromT[i - 1]][indiv.chromT[i]]; + else cliSplit[i].dnext = -1.e30; + sumLoad[i] = sumLoad[i - 1] + cliSplit[i].demand; + sumService[i] = sumService[i - 1] + cliSplit[i].serviceTime; + sumDistance[i] = sumDistance[i - 1] + cliSplit[i - 1].dnext; + } + + // We first try the simple split, and then the Split with limited fleet if this is not successful + if (splitSimple(indiv) == 0) + splitLF(indiv); + + // Build up the rest of the Individual structure + indiv.evaluateCompleteCost(params); +} + +int Split::splitSimple(Individual & indiv) +{ + // Reinitialize the potential structures + potential[0][0] = 0; + for (int i = 1; i <= params.nbClients; i++) + potential[0][i] = 1.e30; + + // MAIN ALGORITHM -- Simple Split using Bellman's algorithm in topological order + // This code has been maintained as it is very simple and can be easily adapted to a variety of constraints, whereas the O(n) Split has a more restricted application scope + if (params.isDurationConstraint) + { + for (int i = 0; i < params.nbClients; i++) + { + double load = 0.; + double distance = 0.; + double serviceDuration = 0.; + for (int j = i + 1; j <= params.nbClients && load <= 1.5 * params.vehicleCapacity ; j++) + { + load += cliSplit[j].demand; + serviceDuration += cliSplit[j].serviceTime; + if (j == i + 1) distance += cliSplit[j].d0_x; + else distance += cliSplit[j - 1].dnext; + double cost = distance + cliSplit[j].dx_0 + + params.penaltyCapacity * std::max(load - params.vehicleCapacity, 0.) + + params.penaltyDuration * std::max(distance + cliSplit[j].dx_0 + serviceDuration - params.durationLimit, 0.); + if (potential[0][i] + cost < potential[0][j]) + { + potential[0][j] = potential[0][i] + cost; + pred[0][j] = i; + } + } + } + } + else + { + Trivial_Deque queue = Trivial_Deque(params.nbClients + 1, 0); + for (int i = 1; i <= params.nbClients; i++) + { + // The front is the best predecessor for i + potential[0][i] = propagate(queue.get_front(), i, 0); + pred[0][i] = queue.get_front(); + + if (i < params.nbClients) + { + // If i is not dominated by the last of the pile + if (!dominates(queue.get_back(), i, 0)) + { + // then i will be inserted, need to remove whoever is dominated by i. + while (queue.size() > 0 && dominatesRight(queue.get_back(), i, 0)) + queue.pop_back(); + queue.push_back(i); + } + // Check iteratively if front is dominated by the next front + while (queue.size() > 1 && propagate(queue.get_front(), i + 1, 0) > propagate(queue.get_next_front(), i + 1, 0) - MY_EPSILON) + queue.pop_front(); + } + } + } + + if (potential[0][params.nbClients] > 1.e29) + throw std::string("ERROR : no Split solution has been propagated until the last node"); + + // Filling the chromR structure + for (int k = params.nbVehicles - 1; k >= maxVehicles; k--) + indiv.chromR[k].clear(); + + int end = params.nbClients; + for (int k = maxVehicles - 1; k >= 0; k--) + { + indiv.chromR[k].clear(); + int begin = pred[0][end]; + for (int ii = begin; ii < end; ii++) + indiv.chromR[k].push_back(indiv.chromT[ii]); + end = begin; + } + + // Return OK in case the Split algorithm reached the beginning of the routes + return (end == 0); +} + +// Split for problems with limited fleet +int Split::splitLF(Individual & indiv) +{ + // Initialize the potential structures + potential[0][0] = 0; + for (int k = 0; k <= maxVehicles; k++) + for (int i = 1; i <= params.nbClients; i++) + potential[k][i] = 1.e30; + + // MAIN ALGORITHM -- Simple Split using Bellman's algorithm in topological order + // This code has been maintained as it is very simple and can be easily adapted to a variety of constraints, whereas the O(n) Split has a more restricted application scope + if (params.isDurationConstraint) + { + for (int k = 0; k < maxVehicles; k++) + { + for (int i = k; i < params.nbClients && potential[k][i] < 1.e29 ; i++) + { + double load = 0.; + double serviceDuration = 0.; + double distance = 0.; + for (int j = i + 1; j <= params.nbClients && load <= 1.5 * params.vehicleCapacity ; j++) // Setting a maximum limit on load infeasibility to accelerate the algorithm + { + load += cliSplit[j].demand; + serviceDuration += cliSplit[j].serviceTime; + if (j == i + 1) distance += cliSplit[j].d0_x; + else distance += cliSplit[j - 1].dnext; + double cost = distance + cliSplit[j].dx_0 + + params.penaltyCapacity * std::max(load - params.vehicleCapacity, 0.) + + params.penaltyDuration * std::max(distance + cliSplit[j].dx_0 + serviceDuration - params.durationLimit, 0.); + if (potential[k][i] + cost < potential[k + 1][j]) + { + potential[k + 1][j] = potential[k][i] + cost; + pred[k + 1][j] = i; + } + } + } + } + } + else // MAIN ALGORITHM -- Without duration constraints in O(n), from "Vidal, T. (2016). Split algorithm in O(n) for the capacitated vehicle routing problem. C&OR" + { + Trivial_Deque queue = Trivial_Deque(params.nbClients + 1, 0); + for (int k = 0; k < maxVehicles; k++) + { + // in the Split problem there is always one feasible solution with k routes that reaches the index k in the tour. + queue.reset(k); + + // The range of potentials < 1.29 is always an interval. + // The size of the queue will stay >= 1 until we reach the end of this interval. + for (int i = k + 1; i <= params.nbClients && queue.size() > 0; i++) + { + // The front is the best predecessor for i + potential[k + 1][i] = propagate(queue.get_front(), i, k); + pred[k + 1][i] = queue.get_front(); + + if (i < params.nbClients) + { + // If i is not dominated by the last of the pile + if (!dominates(queue.get_back(), i, k)) + { + // then i will be inserted, need to remove whoever he dominates + while (queue.size() > 0 && dominatesRight(queue.get_back(), i, k)) + queue.pop_back(); + queue.push_back(i); + } + + // Check iteratively if front is dominated by the next front + while (queue.size() > 1 && propagate(queue.get_front(), i + 1, k) > propagate(queue.get_next_front(), i + 1, k) - MY_EPSILON) + queue.pop_front(); + } + } + } + } + + if (potential[maxVehicles][params.nbClients] > 1.e29) + throw std::string("ERROR : no Split solution has been propagated until the last node"); + + // It could be cheaper to use a smaller number of vehicles + double minCost = potential[maxVehicles][params.nbClients]; + int nbRoutes = maxVehicles; + for (int k = 1; k < maxVehicles; k++) + if (potential[k][params.nbClients] < minCost) + {minCost = potential[k][params.nbClients]; nbRoutes = k;} + + // Filling the chromR structure + for (int k = params.nbVehicles-1; k >= nbRoutes ; k--) + indiv.chromR[k].clear(); + + int end = params.nbClients; + for (int k = nbRoutes - 1; k >= 0; k--) + { + indiv.chromR[k].clear(); + int begin = pred[k+1][end]; + for (int ii = begin; ii < end; ii++) + indiv.chromR[k].push_back(indiv.chromT[ii]); + end = begin; + } + + // Return OK in case the Split algorithm reached the beginning of the routes + return (end == 0); +} + +Split::Split(const Params & params): params(params) +{ + // Structures of the linear Split + cliSplit = std::vector (params.nbClients + 1); + sumDistance = std::vector (params.nbClients + 1,0.); + sumLoad = std::vector (params.nbClients + 1,0.); + sumService = std::vector (params.nbClients + 1, 0.); + potential = std::vector < std::vector >(params.nbVehicles + 1, std::vector (params.nbClients + 1,1.e30)); + pred = std::vector < std::vector >(params.nbVehicles + 1, std::vector (params.nbClients + 1,0)); +} diff --git a/rl4co/envs/routing/cvrp/HGS-CVRP/Program/Split.h b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/Split.h new file mode 100644 index 00000000..f3cf96ec --- /dev/null +++ b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/Split.h @@ -0,0 +1,118 @@ +/*MIT License + +Copyright(c) 2020 Thibaut Vidal + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files(the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions : + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE.*/ + +#ifndef SPLIT_H +#define SPLIT_H + +#include "Params.h" +#include "Individual.h" + +struct ClientSplit +{ + double demand; + double serviceTime; + double d0_x; + double dx_0; + double dnext; + ClientSplit() : demand(0.), serviceTime(0.), d0_x(0.), dx_0(0.), dnext(0.) {}; +}; + +// Simple Deque which is used for all Linear Split algorithms +struct Trivial_Deque +{ + std::vector myDeque; // Simply a vector structure to keep the elements of the queue + int indexFront; // Index of the front element + int indexBack; // Index of the back element + inline void pop_front(){indexFront++;} // Removes the front element of the queue D + inline void pop_back(){indexBack--;} // Removes the back element of the queue D + inline void push_back(int i){indexBack++; myDeque[indexBack] = i;} // Appends a new element to the back of the queue D + inline int get_front(){return myDeque[indexFront];} + inline int get_next_front(){return myDeque[indexFront + 1];} + inline int get_back(){return myDeque[indexBack];} + void reset(int firstNode) { myDeque[0] = firstNode; indexBack = 0; indexFront = 0; } + inline int size(){return indexBack - indexFront + 1;} + + Trivial_Deque(int nbElements, int firstNode) + { + myDeque = std::vector (nbElements); + myDeque[0] = firstNode; + indexBack = 0; + indexFront = 0; + } +}; + +class Split +{ + + private: + + // Problem parameters + const Params & params ; + int maxVehicles ; + + /* Auxiliary data structures to run the Linear Split algorithm */ + std::vector < ClientSplit > cliSplit; + std::vector < std::vector < double > > potential; // Potential vector + std::vector < std::vector < int > > pred; // Indice of the predecessor in an optimal path + std::vector sumDistance; // sumDistance[i] for i > 1 contains the sum of distances : sum_{k=1}^{i-1} d_{k,k+1} + std::vector sumLoad; // sumLoad[i] for i >= 1 contains the sum of loads : sum_{k=1}^{i} q_k + std::vector sumService; // sumService[i] for i >= 1 contains the sum of service time : sum_{k=1}^{i} s_k + + // To be called with i < j only + // Computes the cost of propagating the label i until j + inline double propagate(int i, int j, int k) + { + return potential[k][i] + sumDistance[j] - sumDistance[i + 1] + cliSplit[i + 1].d0_x + cliSplit[j].dx_0 + + params.penaltyCapacity * std::max(sumLoad[j] - sumLoad[i] - params.vehicleCapacity, 0.); + } + + // Tests if i dominates j as a predecessor for all nodes x >= j+1 + // We assume that i < j + inline bool dominates(int i, int j, int k) + { + return potential[k][j] + cliSplit[j + 1].d0_x > potential[k][i] + cliSplit[i + 1].d0_x + sumDistance[j + 1] - sumDistance[i + 1] + + params.penaltyCapacity * (sumLoad[j] - sumLoad[i]); + } + + // Tests if j dominates i as a predecessor for all nodes x >= j+1 + // We assume that i < j + inline bool dominatesRight(int i, int j, int k) + { + return potential[k][j] + cliSplit[j + 1].d0_x < potential[k][i] + cliSplit[i + 1].d0_x + sumDistance[j + 1] - sumDistance[i + 1] + MY_EPSILON; + } + + // Split for unlimited fleet + int splitSimple(Individual & indiv); + + // Split for limited fleet + int splitLF(Individual & indiv); + +public: + + // General Split function (tests the unlimited fleet, and only if it does not produce a feasible solution, runs the Split algorithm for limited fleet) + void generalSplit(Individual & indiv, int nbMaxVehicles); + + // Constructor + Split(const Params & params); + +}; +#endif diff --git a/rl4co/envs/routing/cvrp/HGS-CVRP/Program/commandline.h b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/commandline.h new file mode 100644 index 00000000..b47847f8 --- /dev/null +++ b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/commandline.h @@ -0,0 +1,125 @@ +/*MIT License + +Copyright(c) 2020 Thibaut Vidal + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files(the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions : + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE.*/ + +#ifndef COMMAND_LINE_H +#define COMMAND_LINE_H + +#include +#include +#include +#include "AlgorithmParameters.h" + +class CommandLine +{ +public: + AlgorithmParameters ap = default_algorithm_parameters(); + + int nbVeh = INT_MAX; // Number of vehicles. Default value: infinity + std::string pathInstance; // Instance path + std::string pathSolution; // Solution path + bool verbose = true; + bool isRoundingInteger = true; + + // Reads the line of command and extracts possible options + CommandLine(int argc, char* argv[]) + { + if (argc % 2 != 1 || argc > 35 || argc < 3) + { + std::cout << "----- NUMBER OF COMMANDLINE ARGUMENTS IS INCORRECT: " << argc << std::endl; + display_help(); throw std::string("Incorrect line of command"); + } + else + { + pathInstance = std::string(argv[1]); + pathSolution = std::string(argv[2]); + for (int i = 3; i < argc; i += 2) + { + if (std::string(argv[i]) == "-t") + ap.timeLimit = atof(argv[i+1]); + else if (std::string(argv[i]) == "-it") + ap.nbIter = atoi(argv[i+1]); + else if (std::string(argv[i]) == "-seed") + ap.seed = atoi(argv[i+1]); + else if (std::string(argv[i]) == "-veh") + nbVeh = atoi(argv[i+1]); + else if (std::string(argv[i]) == "-round") + isRoundingInteger = atoi(argv[i+1]); + else if (std::string(argv[i]) == "-log") + verbose = atoi(argv[i+1]); + else if (std::string(argv[i]) == "-nbGranular") + ap.nbGranular = atoi(argv[i+1]); + else if (std::string(argv[i]) == "-mu") + ap.mu = atoi(argv[i+1]); + else if (std::string(argv[i]) == "-lambda") + ap.lambda = atoi(argv[i+1]); + else if (std::string(argv[i]) == "-nbElite") + ap.nbElite = atoi(argv[i+1]); + else if (std::string(argv[i]) == "-nbClose") + ap.nbClose = atoi(argv[i+1]); + else if (std::string(argv[i]) == "-nbIterPenaltyManagement") + ap.nbIterPenaltyManagement = atoi(argv[i+1]); + else if (std::string(argv[i]) == "-nbIterTraces") + ap.nbIterTraces = atoi(argv[i + 1]); + else if (std::string(argv[i]) == "-targetFeasible") + ap.targetFeasible = atof(argv[i+1]); + else if (std::string(argv[i]) == "-penaltyIncrease") + ap.penaltyIncrease = atof(argv[i+1]); + else if (std::string(argv[i]) == "-penaltyDecrease") + ap.penaltyDecrease = atof(argv[i+1]); + else + { + std::cout << "----- ARGUMENT NOT RECOGNIZED: " << std::string(argv[i]) << std::endl; + display_help(); throw std::string("Incorrect line of command"); + } + } + } + } + + // Printing information about how to use the code + void display_help() + { + std::cout << std::endl; + std::cout << "-------------------------------------------------- HGS-CVRP algorithm (2020) ---------------------------------------------------" << std::endl; + std::cout << "Call with: ./hgs instancePath solPath [-it nbIter] [-t myCPUtime] [-seed mySeed] [-veh nbVehicles] [-log verbose] " << std::endl; + std::cout << "[-it ] sets a maximum number of iterations without improvement. Defaults to 20,000 " << std::endl; + std::cout << "[-t ] sets a time limit in seconds. If this parameter is set the code will be run iteratively until the time limit " << std::endl; + std::cout << "[-seed ] sets a fixed seed. Defaults to 0 " << std::endl; + std::cout << "[-veh ] sets a prescribed fleet size. Otherwise a reasonable UB on the the fleet size is calculated " << std::endl; + std::cout << "[-round ] rounding the distance to the nearest integer or not. It can be 0 (not rounding) or 1 (rounding). Defaults to 1. " << std::endl; + std::cout << "[-log ] sets the verbose level of the algorithm log. It can be 0 or 1. Defaults to 1. " << std::endl; + std::cout << std::endl; + std::cout << "Additional Arguments: " << std::endl; + std::cout << "[-nbIterTraces ] Number of iterations between traces display during HGS execution. Defaults to 500 " << std::endl; + std::cout << "[-nbGranular ] Granular search parameter, limits the number of moves in the RI local search. Defaults to 20 " << std::endl; + std::cout << "[-mu ] Minimum population size. Defaults to 25 " << std::endl; + std::cout << "[-lambda ] Number of solutions created before reaching the maximum population size (i.e., generation size). Defaults to 40 " << std::endl; + std::cout << "[-nbElite ] Number of elite individuals. Defaults to 5 " << std::endl; + std::cout << "[-nbClose ] Number of closest solutions/individuals considered when calculating diversity contribution. Defaults to 4 " << std::endl; + std::cout << "[-nbIterPenaltyManagement ] Number of iterations between penalty updates. Defaults to 100 " << std::endl; + std::cout << "[-targetFeasible ] target ratio of feasible individuals between penalty updates. Defaults to 0.2 " << std::endl; + std::cout << "[-penaltyIncrease ] penalty increase if insufficient feasible individuals between penalty updates. Defaults to 1.2 " << std::endl; + std::cout << "[-penaltyDecrease ] penalty decrease if sufficient feasible individuals between penalty updates. Defaults to 0.85 " << std::endl; + std::cout << "--------------------------------------------------------------------------------------------------------------------------------" << std::endl; + std::cout << std::endl; + }; +}; +#endif diff --git a/rl4co/envs/routing/cvrp/HGS-CVRP/Program/main.cpp b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/main.cpp new file mode 100644 index 00000000..029f7b13 --- /dev/null +++ b/rl4co/envs/routing/cvrp/HGS-CVRP/Program/main.cpp @@ -0,0 +1,40 @@ +#include "Genetic.h" +#include "commandline.h" +#include "LocalSearch.h" +#include "Split.h" +#include "InstanceCVRPLIB.h" +using namespace std; + +int main(int argc, char *argv[]) +{ + try + { + // Reading the arguments of the program + CommandLine commandline(argc, argv); + + // Print all algorithm parameter values + if (commandline.verbose) print_algorithm_parameters(commandline.ap); + + // Reading the data file and initializing some data structures + if (commandline.verbose) std::cout << "----- READING INSTANCE: " << commandline.pathInstance << std::endl; + InstanceCVRPLIB cvrp(commandline.pathInstance, commandline.isRoundingInteger); + + Params params(cvrp.x_coords,cvrp.y_coords,cvrp.dist_mtx,cvrp.service_time,cvrp.demands, + cvrp.vehicleCapacity,cvrp.durationLimit,commandline.nbVeh,cvrp.isDurationConstraint,commandline.verbose,commandline.ap); + + // Running HGS + Genetic solver(params); + solver.run(); + + // Exporting the best solution + if (solver.population.getBestFound() != NULL) + { + if (params.verbose) std::cout << "----- WRITING BEST SOLUTION IN : " << commandline.pathSolution << std::endl; + solver.population.exportCVRPLibFormat(*solver.population.getBestFound(),commandline.pathSolution); + solver.population.exportSearchProgress(commandline.pathSolution + ".PG.csv", commandline.pathInstance); + } + } + catch (const string& e) { std::cout << "EXCEPTION | " << e << std::endl; } + catch (const std::exception& e) { std::cout << "EXCEPTION | " << e.what() << std::endl; } + return 0; +} diff --git a/rl4co/envs/routing/cvrp/HGS-CVRP/README.md b/rl4co/envs/routing/cvrp/HGS-CVRP/README.md new file mode 100644 index 00000000..587ea5d7 --- /dev/null +++ b/rl4co/envs/routing/cvrp/HGS-CVRP/README.md @@ -0,0 +1,170 @@ + +[![CI_Build](https://github.com/vidalt/HGS-CVRP/actions/workflows/CI_Build.yml/badge.svg?branch=main)](https://github.com/vidalt/HGS-CVRP/actions/workflows/CI_Build.yml) + +# HGS-CVRP: A modern implementation of the Hybrid Genetic Search for the CVRP + +This is a modern implementation of the Hybrid Genetic Search (HGS) with Advanced Diversity Control of [1], specialized to the Capacitated Vehicle Routing Problem (CVRP). + +This algorithm has been designed to be transparent, specialized, and highly concise, retaining only the core elements that make this method successful. +Beyond a simple reimplementation of the original algorithm, this code also includes speed-up strategies and methodological improvements learned over the past decade of research and dedicated to the CVRP. +In particular, it relies on an additional neighborhood called SWAP*, which consists in exchanging two customers between different routes without an insertion in place. + +## References + +When using this algorithm (or part of it) in derived academic studies, please refer to the following works: + +[1] Vidal, T., Crainic, T. G., Gendreau, M., Lahrichi, N., Rei, W. (2012). +A hybrid genetic algorithm for multidepot and periodic vehicle routing problems. Operations Research, 60(3), 611-624. +https://doi.org/10.1287/opre.1120.1048 (Available [HERE](https://w1.cirrelt.ca/~vidalt/papers/HGS-CIRRELT-2011.pdf) in technical report form). + +[2] Vidal, T. (2022). Hybrid genetic search for the CVRP: Open-source implementation and SWAP* neighborhood. Computers & Operations Research, 140, 105643. +https://doi.org/10.1016/j.cor.2021.105643 (Available [HERE](https://arxiv.org/abs/2012.10384) in technical report form). + +We also recommend referring to the Github version of the code used, as future versions may achieve better performance as the code evolves. +The version associated with the results presented in [2] is [v1.0.0](https://github.com/vidalt/HGS-CVRP/releases/tag/v1.0.0). + +## Other programming languages + +There exist wrappers for this code in the following languages: +* **C**: The **C_Interface** file contains a simple C API +* **Python**: The [PyHygese](https://github.com/chkwon/PyHygese) package is maintained to interact with the latest release of this algorithm +* **Julia**: The [Hygese.jl](https://github.com/chkwon/Hygese.jl) package is maintained to interact with the latest release of this algorithm + +We encourage you to consider using these wrappers in your different projects. +Please contact me if you wish to list other wrappers and interfaces in this section. + +## Scope + +This code has been designed to solve the "canonical" Capacitated Vehicle Routing Problem (CVRP). +It can also directly handle asymmetric distances as well as duration constraints. + +This code version has been designed and calibrated for medium-scale instances with up to 1,000 customers. +It is **not** designed in its current form to run very-large scale instances (e.g., with over 5,000 customers), as this requires additional solution strategies (e.g., decompositions and additional neighborhood limitations). +If you need to solve problems outside of this algorithm's scope, do not hesitate to contact me at . + +## Compiling the executable + +You need [`CMake`](https://cmake.org) to compile. + +Build with: +```console +mkdir build +cd build +cmake .. -DCMAKE_BUILD_TYPE=Release -G "Unix Makefiles" +make bin +``` +This will generate the executable file `hgs` in the `build` directory. + +Test with: +```console +ctest -R bin --verbose +``` + +## Running the algorithm + +After building the executable, try an example: +```console +./hgs ../Instances/CVRP/X-n157-k13.vrp mySolution.sol -seed 1 -t 30 +``` + +The following options are supported: +``` +Call with: ./hgs instancePath solPath [-it nbIter] [-t myCPUtime] [-bks bksPath] [-seed mySeed] [-veh nbVehicles] [-log verbose] +[-it ] sets a maximum number of iterations without improvement. Defaults to 20,000 +[-t ] sets a time limit in seconds. If this parameter is set, the code will be run iteratively until the time limit +[-seed ] sets a fixed seed. Defaults to 0 +[-veh ] sets a prescribed fleet size. Otherwise a reasonable UB on the fleet size is calculated +[-round ] rounding the distance to the nearest integer or not. It can be 0 (not rounding) or 1 (rounding). Defaults to 1. +[-log ] sets the verbose level of the algorithm log. It can be 0 or 1. Defaults to 1. + +Additional Arguments: +[-nbIterTraces ] Number of iterations between traces display during HGS execution. Defaults to 500 +[-nbGranular ] Granular search parameter, limits the number of moves in the RI local search. Defaults to 20 +[-mu ] Minimum population size. Defaults to 25 +[-lambda ] Number of solutions created before reaching the maximum population size (i.e., generation size). Defaults to 40 +[-nbElite ] Number of elite individuals. Defaults to 5 +[-nbClose ] Number of closest solutions/individuals considered when calculating diversity contribution. Defaults to 4 +[-nbIterPenaltyManagement ] Number of iterations between penalty updates. Defaults to 100 +[-targetFeasible ] target ratio of feasible individuals between penalty updates. Defaults to 0.2 +[-penaltyIncrease ] penalty increase if insufficient feasible individuals between penalty updates. Defaults to 1.2 +[-penaltyDecrease ] penalty decrease if sufficient feasible individuals between penalty updates. Defaults to 0.85 +``` + +There exist different conventions regarding distance calculations in the academic literature. +The default code behavior is to apply integer rounding, as it should be done on the X instances of Uchoa et al. (2017). +To change this behavior (e.g., when testing on the CMT or Golden instances), give a flag `-round 0`, when you run the executable. + +The progress of the algorithm in the standard output will be displayed as: + +`` +It [N1] [N2] | T(s) [T] | Feas [NF] [BestF] [AvgF] | Inf [NI] [BestI] [AvgI] | Div [DivF] [DivI] | Feas [FeasC] [FeasD] | Pen [PenC] [PenD] +`` +``` +[N1] and [N2]: Total number of iterations and iterations without improvement +[T]: CPU time spent until now +[NF] and [NI]: Number of feasible and infeasible solutions in the subpopulations +[BestF] and [BestI]: Value of the best feasible and infeasible solution in the subpopulations +[AvgF] and [AvgI]: Average value of the solutions in the feasible and infeasible subpopulations +[DivF] and [DivI]: Diversity of the feasible and infeasible subpopulations +[FC] and [FD]: Percentage of naturally feasible solutions in relation to the capacity and duration constraints +[PC] and [PD]: Current penalty level per unit of excess capacity and duration +``` + +## Code structure + +The main classes containing the logic of the algorithm are the following: +* **Params**: Stores the main data structures for the method +* **Individual**: Represents an individual solution in the genetic algorithm, also provides I/O functions to read and write individual solutions in CVRPLib format. +* **Population**: Stores the solutions of the genetic algorithm into two different groups according to their feasibility. Also includes the functions in charge of diversity management. +* **Genetic**: Contains the main procedures of the genetic algorithm as well as the crossover +* **LocalSearch**: Includes the local search functions, including the SWAP* neighborhood +* **Split**: Algorithms designed to decode solutions represented as giant tours into complete CVRP solutions +* **CircleSector**: Small code used to represent and manage arc sectors (to efficiently restrict the SWAP* neighborhood) + +In addition, additional classes have been created to facilitate interfacing: +* **AlgorithmParameters**: Stores the parameters of the algorithm +* **CVRPLIB** Contains the instance data and functions designed to read input data as text files according to the CVRPLIB conventions +* **commandline**: Reads the line of command +* **main**: Main code to start the algorithm +* **C_Interface**: Provides a C interface for the method + +## Compiling the shared library + +You can also build a shared library to call the HGS-CVRP algorithm from your code. + +```console +mkdir build +cd build +cmake .. -DCMAKE_BUILD_TYPE=Release -G "Unix Makefiles" +make lib +``` +This will generate the library file, `libhgscvrp.so` (Linux), `libhgscvrp.dylib` (macOS), or `hgscvrp.dll` (Windows), +in the `build` directory. + +To test calling the shared library from a C code: +```console +make lib_test_c +ctest -R lib --verbose +``` + +## Contributing + +Thank you very much for your interest in this code. +This code is still actively maintained and evolving. Pull requests and contributions seeking to improve the code in terms of readability, usability, and performance are welcome. Development is conducted in the `dev` branch. I recommend to contact me beforehand at before any major rework. + +As a general guideline, the goal of this code is to stay **simple**, **stand-alone**, and **specialized** to the CVRP. +Contributions that aim to extend this approach to different variants of the vehicle routing problem should usually remain in a separate repository. +Similarly, additional libraries or significant increases of conceptual complexity will be avoided. Indeed, when developing (meta-)heuristics, it seems always possible to do a bit better at the cost of extra conceptual complexity. The overarching goal of this code is to find a good trade-off between algorithm simplicity and performance. + +There are two main types of contributions: +* Changes that do not impact the sequence of solutions found by the HGS algorithm when running `ctest` and testing other instances with a fixed seed. +This is visible by comparing the average solution value in the population and diversity through a test run. Such contributions include refactoring, simplification, and code optimization. Pull requests of this type are likely to be integrated more quickly. +* Changes that impact the sequence of solutions found by the algorithm. +In this case, I recommend to contact me beforehand with (i) a detailed description of the changes, (ii) detailed results on 10 runs of the algorithm for each of the 100 instances of Uchoa et al. (2017) before and after the changes, using the same termination criterion as used in [2](https://arxiv.org/abs/2012.10384). + +## License + +[![License](http://img.shields.io/:license-mit-blue.svg?style=flat-square)](http://badges.mit-license.org) + +- **[MIT license](http://opensource.org/licenses/mit-license.php)** +- Copyright(c) 2020 Thibaut Vidal diff --git a/rl4co/envs/routing/cvrp/HGS-CVRP/build.sh b/rl4co/envs/routing/cvrp/HGS-CVRP/build.sh new file mode 100644 index 00000000..de789797 --- /dev/null +++ b/rl4co/envs/routing/cvrp/HGS-CVRP/build.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +PWD=$(pwd) + +cd "$(dirname "$0")" +mkdir build +cd build +cmake .. -DCMAKE_BUILD_TYPE=Release -G "Unix Makefiles" +make lib +cd $PWD diff --git a/rl4co/envs/routing/cvrp/env.py b/rl4co/envs/routing/cvrp/env.py index b01083f4..5ba802fb 100644 --- a/rl4co/envs/routing/cvrp/env.py +++ b/rl4co/envs/routing/cvrp/env.py @@ -14,8 +14,7 @@ try: from .local_search import local_search -except Exception: - # In case some dependencies are not installed (e.g., pyvrp) +except: # In case when we fail to build HGS local_search = None from .render import render @@ -133,7 +132,7 @@ def _reset( @staticmethod def get_action_mask(td: TensorDict) -> torch.Tensor: # For demand steps_dim is inserted by indexing with id, for used_capacity insert node dim for broadcasting - exceeds_cap = td["demand"] + td["used_capacity"] > td["vehicle_capacity"] + exceeds_cap = td["demand"] + td["used_capacity"] > td["vehicle_capacity"] + 1e-5 # Nodes that cannot be visited are already visited or too much demand to be served now mask_loc = td["visited"][..., 1:].to(exceeds_cap.dtype) | exceeds_cap @@ -182,7 +181,7 @@ def check_solution_validity(td: TensorDict, actions: torch.Tensor): # Cannot use less than 0 used_cap[used_cap < 0] = 0 assert ( - used_cap <= td["vehicle_capacity"] + 1e-5 + used_cap <= td["vehicle_capacity"][:, 0] + 1e-5 ).all(), "Used more than capacity" @staticmethod @@ -257,7 +256,7 @@ def replace_selected_actions( def local_search(td: TensorDict, actions: torch.Tensor, **kwargs) -> torch.Tensor: assert ( local_search is not None - ), "Cannot import local_search module. Check if `pyvrp` is installed." + ), "Cannot import local_search module. Check if HGS is built." return local_search(td, actions, **kwargs) @staticmethod diff --git a/rl4co/envs/routing/cvrp/local_search.py b/rl4co/envs/routing/cvrp/local_search.py index 829368aa..3b6b1c46 100644 --- a/rl4co/envs/routing/cvrp/local_search.py +++ b/rl4co/envs/routing/cvrp/local_search.py @@ -1,49 +1,45 @@ -from functools import partial -from multiprocessing import Pool -from typing import Tuple +import os +import platform +from ctypes import ( + Structure, CDLL, POINTER, c_int, c_double, c_char, sizeof, cast, byref +) +from dataclasses import dataclass +from typing import List +import concurrent.futures import numpy as np +import sys +import random +import time import torch - -from pyvrp import ( - Client, - CostEvaluator, - Depot, - ProblemData, - RandomNumberGenerator, - Solution, - VehicleType, -) -from pyvrp.search import ( - NODE_OPERATORS, - ROUTE_OPERATORS, - LocalSearch, - NeighbourhoodParams, - compute_neighbours, -) from tensordict.tensordict import TensorDict from rl4co.utils.ops import get_distance_matrix from rl4co.utils.pylogger import get_pylogger + log = get_pylogger(__name__) -C = ( - 10**4 -) # Scaling factor for the data, to convert the float values to integers as required by PyVRP +def get_lib_filename(hgs_dir: str) -> str: + path = os.path.join(hgs_dir, "build", "libhgscvrp.so") + if not os.path.isfile(path): + raise FileNotFoundError(f"Shared library file `{path}` not found") + return path -def local_search( - td: TensorDict, - actions: torch.Tensor, - max_trials: int = 10, - neighbourhood_params: dict | None = None, - load_penalty: float = 0.2, - allow_infeasible_solution: bool = False, - seed: int = 0, - num_workers: int = 1, -): +# Check if HGS-CVRP is installed +hgs_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "HGS-CVRP") +try: + HGS_LIBRARY_FILEPATH = get_lib_filename(hgs_dir) +except FileNotFoundError: + log.info("HGS-CVRP is not installed. Running the build script...") + os.popen(f"sh {hgs_dir}/build.sh").read() + HGS_LIBRARY_FILEPATH = get_lib_filename(hgs_dir) + log.info("HGS-CVRP is installed successfully.") + + +def local_search(td: TensorDict, actions: torch.Tensor, max_iterations: int = 1000) -> torch.Tensor: """ Improve the solution using local search for CVRP, based on PyVRP. @@ -51,18 +47,16 @@ def local_search( td: TensorDict, td from env with shape [batch_size,] actions: torch.Tensor, Tour indices with shape [batch_size, max_seq_len] max_trials: int, maximum number of trials for local search - neighbourhood_params: dict, parameters for neighbourhood search - load_penalty: int, penalty for exceeding the vehicle capacity allow_infeasible_solution: bool, whether to allow infeasible solutions seed: int, random seed for local search - num_workers: int, number of workers for parallel processing Returns: torch.Tensor, Improved tour indices with shape [batch_size, max_seq_len] """ # Convert tensors to numpy arrays # Note: to avoid the overhead of device transfer, we recommend to pass the tensors in cpu - actions_np = actions.detach().cpu().numpy() + actions_np = actions.detach().cpu().numpy() # [batch_size, max_seq_len] + actions_np = np.pad(actions_np, ((0, 0), (1, 1)), mode="constant") positions_np = td["locs"].detach().cpu().numpy() # [batch_size, num_loc + 1, 2] demands_np = td["demand"].detach().cpu().numpy() # [batch_size, num_loc] demands_np = np.pad(demands_np, ((0, 0), (1, 0)), mode="constant") # Add depot demand @@ -72,144 +66,409 @@ def local_search( else: distances_np = distances.detach().cpu().numpy() - max_trials = 1 if allow_infeasible_solution else max_trials - - partial_func = partial( - local_search_single, - neighbourhood_params=neighbourhood_params, - load_penalty=load_penalty, - allow_infeasible_solution=allow_infeasible_solution, - max_trials=max_trials, - seed=seed, - ) - - if num_workers > 1: - with Pool(processes=num_workers) as pool: - new_actions = pool.starmap( - partial_func, zip(actions_np, positions_np, demands_np, distances_np) + subroutes_all: List[List[List[int]]] = [get_subroutes(path) for path in actions_np] + with concurrent.futures.ThreadPoolExecutor() as executor: + futures = [] + for i in range(len(subroutes_all)): + future = executor.submit( + swapstar, + demands_np[i], + distances_np[i], + positions_np[i], + subroutes_all[i], + count=max_iterations, ) - else: - new_actions = [ - partial_func(*args) - for args in zip(actions_np, positions_np, demands_np, distances_np) + futures.append((i, future)) + + worst_seq_len = positions_np.shape[1] * 2 + new_actions = np.zeros((actions_np.shape[0], worst_seq_len), dtype=np.int64) + + for i, future in futures: + new_actions[i] = merge_subroutes(future.result(), worst_seq_len) + + # Remove heading and tailing zeros + max_pos = np.max(np.where(new_actions != 0)[1]) + new_actions = new_actions[:, 1: max_pos + 1] + new_actions = torch.from_numpy(new_actions).to(td.device) + + # Check the validity of the solution and use the original solution if the new solution is invalid + isvalid = check_validity(td, new_actions) + if not isvalid.all(): + new_actions[~isvalid] = 0 + orig_valid_actions = actions[~isvalid] + # pad if needed + orig_max_pos = torch.max(torch.where(orig_valid_actions != 0)[1]) + 1 + if orig_max_pos > max_pos: + new_actions = torch.nn.functional.pad( + new_actions, (0, orig_max_pos - max_pos, 0, 0), mode="constant", value=0 # type: ignore + ) + new_actions[~isvalid, :orig_max_pos] = orig_valid_actions[:, :orig_max_pos] + return new_actions + + +def get_subroutes(path, end_with_zero = True) -> List[List[int]]: + x = np.where(path == 0)[0] + subroutes = [] + for i, j in zip(x, x[1:]): + if j - i > 1: + if end_with_zero: + j = j + 1 + subroutes.append(path[i: j]) + return subroutes + + +def merge_subroutes(subroutes, length): + route = np.zeros(length, dtype=np.int64) + i = 0 + for r in subroutes: + if len(r) > 2: + r = r[:-1] # remove the last zero + route[i: i + len(r)] = r + i += len(r) + return route + + +def check_validity(td: TensorDict, actions: torch.Tensor) -> torch.Tensor: + """ + Check the validity of the solution for CVRP and return a boolean tensor. + Modified from CVRPEnv.check_solution_validity in rl4co/envs/routing/cvrp/env.py + """ + # Check if tour is valid, i.e. contain 0 to n-1 + batch_size, graph_size = td["demand"].size() + sorted_pi = actions.data.sort(1)[0] + + # Sorting it should give all zeros at front and then 1...n + assert ( + torch.arange(1, graph_size + 1, out=sorted_pi.data.new()) + .view(1, -1) + .expand(batch_size, graph_size) + == sorted_pi[:, -graph_size:] + ).all() and (sorted_pi[:, :-graph_size] == 0).all(), "Invalid tour" + + # Visiting depot resets capacity so we add demand = -capacity (we make sure it does not become negative) + demand_with_depot = torch.cat((-td["vehicle_capacity"], td["demand"]), 1) + d = demand_with_depot.gather(1, actions) + + used_cap = torch.zeros_like(td["demand"][:, 0]) + valid = torch.ones(batch_size, dtype=torch.bool) + for i in range(actions.size(1)): + used_cap += d[ + :, i + ] # This will reset/make capacity negative if i == 0, e.g. depot visited + # Cannot use less than 0 + used_cap[used_cap < 0] = 0 + valid &= ( + used_cap <= td["vehicle_capacity"][:, 0] + 1e-5 + ) + return valid + + +########################## HGS-CVRP python wrapper ########################### +# Adapted from https://github.com/chkwon/PyHygese/blob/master/hygese/hygese.py + + +c_double_p = POINTER(c_double) +c_int_p = POINTER(c_int) +C_INT_MAX = 2 ** (sizeof(c_int) * 8 - 1) - 1 +C_DBL_MAX = sys.float_info.max + + +def write_routes(routes: List[np.ndarray], filepath: str): + with open(filepath, "w") as f: + for i, r in enumerate(routes): + f.write(f"Route #{i + 1}: "+' '.join([str(x) for x in r if x > 0])+"\n") + return + + +def read_routes(filepath) -> List[np.ndarray]: + routes = [] + with open(filepath, "r") as f: + while 1: + line = f.readline().strip() + if line.startswith("Route"): + routes.append(np.array([0, *map(int, line.split(":")[1].split()), 0])) + else: + break + return routes + + +# Must match with AlgorithmParameters.h in HGS-CVRP: https://github.com/vidalt/HGS-CVRP +class CAlgorithmParameters(Structure): + _fields_ = [ + ("nbGranular", c_int), + ("mu", c_int), + ("lambda", c_int), + ("nbElite", c_int), + ("nbClose", c_int), + ("targetFeasible", c_double), + ("seed", c_int), + ("nbIter", c_int), + ("timeLimit", c_double), + ("useSwapStar", c_int), + ] + + +@dataclass +class AlgorithmParameters: + nbGranular: int = 20 + mu: int = 25 + lambda_: int = 40 + nbElite: int = 4 + nbClose: int = 5 + targetFeasible: float = 0.2 + seed: int = 0 + nbIter: int = 20000 + timeLimit: float = 0.0 + useSwapStar: bool = True + + @property + def ctypes(self) -> CAlgorithmParameters: + return CAlgorithmParameters( + self.nbGranular, + self.mu, + self.lambda_, + self.nbElite, + self.nbClose, + self.targetFeasible, + self.seed, + self.nbIter, + self.timeLimit, + int(self.useSwapStar), + ) + + +class _SolutionRoute(Structure): + _fields_ = [("length", c_int), ("path", c_int_p)] + + +class _Solution(Structure): + _fields_ = [ + ("cost", c_double), + ("time", c_double), + ("n_routes", c_int), + ("routes", POINTER(_SolutionRoute)), + ] + + +class RoutingSolution: + def __init__(self, sol_ptr): + if not sol_ptr: + raise TypeError("The solution pointer is null.") + + self.cost = sol_ptr[0].cost + self.time = sol_ptr[0].time + self.n_routes = sol_ptr[0].n_routes + self.routes = [] + for i in range(self.n_routes): + r = sol_ptr[0].routes[i] + path = r.path[0 : r.length] + self.routes.append(path) + + +class Solver: + def __init__(self, parameters=AlgorithmParameters(), verbose=False): + if platform.system() == "Windows": + hgs_library = CDLL(HGS_LIBRARY_FILEPATH, winmode=0) + else: + hgs_library = CDLL(HGS_LIBRARY_FILEPATH) + + self.algorithm_parameters = parameters + self.verbose = verbose + + # solve_cvrp + self._c_api_solve_cvrp = hgs_library.solve_cvrp + self._c_api_solve_cvrp.argtypes = [ + c_int, + c_double_p, + c_double_p, + c_double_p, + c_double_p, + c_double, + c_double, + c_char, + c_char, + c_int, + POINTER(CAlgorithmParameters), + c_char, ] + self._c_api_solve_cvrp.restype = POINTER(_Solution) - # padding with zero - lengths = [len(act) for act in new_actions] - max_length = max(lengths) - new_actions = np.array( - [ - np.pad(act, (0, max_length - length), mode="constant") - for act, length in zip(new_actions, lengths) + # solve_cvrp_dist_mtx + self._c_api_local_search = hgs_library.local_search + self._c_api_local_search.argtypes = [ + c_int, + c_double_p, + c_double_p, + c_double_p, + c_double_p, + c_double_p, + c_double, + c_double, + c_char, + c_int, + POINTER(CAlgorithmParameters), + c_char, + c_int, + c_int, ] - ) - return torch.from_numpy(new_actions[:, :-1].astype(np.int64)).to( - td.device - ) # We can remove the last zero - - -def local_search_single( - path: np.ndarray, - positions: np.ndarray, - demands: np.ndarray, - distances: np.ndarray, - neighbourhood_params: dict | None = None, - allow_infeasible_solution: bool = False, - load_penalty: float = 0.2, - max_trials: int = 10, - seed: int = 0, -) -> np.ndarray: - data = make_data(positions, demands, distances) - solution = make_solution(data, path) - ls_operator = make_search_operator(data, seed, neighbourhood_params) - - improved_solution, is_feasible = perform_local_search( - ls_operator, - solution, - int(load_penalty * C), # * C as we scale the data in `make_data` - remaining_trials=max_trials, - ) - - # Return the original path if no feasible solution is found - if not is_feasible and not allow_infeasible_solution: - return path - - # Recover the path from the sub-routes in the solution - route_list = [ - idx for route in improved_solution.routes() for idx in [0] + route.visits() - ] + [0] - return np.array(route_list) - - -def make_data( - positions: np.ndarray, demands: np.ndarray, distances: np.ndarray -) -> ProblemData: - positions = (positions * C).astype(int) - distances = (distances * C).astype(int) - - capacity = C - demands = np.round(demands * capacity).astype(int) - - return ProblemData( - clients=[ - Client(x=pos[0], y=pos[1], delivery=d) - for pos, d in zip(positions[1:], demands[1:]) - ], - depots=[Depot(x=positions[0][0], y=positions[0][1])], - vehicle_types=[ - VehicleType( - len(positions) - 1, - capacity, - 0, - name=",".join(map(str, range(1, len(positions)))), + self._c_api_local_search.restype = c_int + + # delete_solution + self._c_api_delete_sol = hgs_library.delete_solution + self._c_api_delete_sol.restype = None + self._c_api_delete_sol.argtypes = [POINTER(_Solution)] + + def local_search(self, data, routes: List[np.ndarray], count: int = 1) -> List[np.ndarray]: + # required data + demand = np.asarray(data["demands"]) + vehicle_capacity = data["vehicle_capacity"] + n_nodes = len(demand) + + # optional depot + depot = data.get("depot", 0) + if depot != 0: + raise ValueError("In HGS, the depot location must be 0.") + + # optional num_vehicles + maximum_number_of_vehicles = data.get("num_vehicles", C_INT_MAX) + + # optional service_times + service_times = data.get("service_times") + if service_times is None: + service_times = np.zeros(n_nodes) + else: + service_times = np.asarray(service_times) + + # optional duration_limit + duration_limit = data.get("duration_limit") + if duration_limit is None: + is_duration_constraint = False + duration_limit = C_DBL_MAX + else: + is_duration_constraint = True + + x_coords = data.get("x_coordinates") + y_coords = data.get("y_coordinates") + dist_mtx = data.get("distance_matrix") + + if x_coords is None or y_coords is None: + assert dist_mtx is not None + x_coords = np.zeros(n_nodes) + y_coords = np.zeros(n_nodes) + else: + x_coords = np.asarray(x_coords) + y_coords = np.asarray(y_coords) + + assert len(x_coords) == len(y_coords) == len(service_times) == len(demand) + assert (x_coords >= 0.0).all() + assert (y_coords >= 0.0).all() + assert (service_times >= 0.0).all() + assert (demand >= 0.0).all() + + dist_mtx = np.asarray(dist_mtx) + assert dist_mtx.shape[0] == dist_mtx.shape[1] + assert (dist_mtx >= 0.0).all() + + callid = (time.time_ns()*10000+random.randint(0,10000))%C_INT_MAX + + tmppath = "/tmp/route-{}".format(callid) + resultpath = "/tmp/swapstar-result-{}".format(callid) + write_routes(routes, tmppath) + try: + self._local_search( + x_coords, + y_coords, + dist_mtx, + service_times, + demand, + vehicle_capacity, + duration_limit, + is_duration_constraint, + maximum_number_of_vehicles, + self.algorithm_parameters, + self.verbose, + callid, + count, ) - ], - distance_matrices=[distances], - duration_matrices=[np.zeros_like(distances)], - ) + result = read_routes(resultpath) + except Exception as e: + print(e) + result = routes + else: + os.remove(resultpath) + finally: + os.remove(tmppath) + + return result + def _local_search( + self, + x_coords: np.ndarray, + y_coords: np.ndarray, + dist_mtx: np.ndarray, + service_times: np.ndarray, + demand: np.ndarray, + vehicle_capacity: int, + duration_limit: float, + is_duration_constraint: bool, + maximum_number_of_vehicles: int, + algorithm_parameters: AlgorithmParameters, + verbose: bool, + callid: int, + count:int, + ): + n_nodes = x_coords.size -def make_solution(data: ProblemData, path: np.ndarray) -> Solution: - # Split the paths into sub-routes by the zeros - routes = [ - arr[1:].tolist() for arr in np.split(path, np.where(path == 0)[0]) if len(arr) > 1 - ] - return Solution(data, routes) - - -def make_search_operator( - data: ProblemData, seed=0, neighbourhood_params: dict | None = None -) -> LocalSearch: - rng = RandomNumberGenerator(seed) - neighbours = compute_neighbours( - data, NeighbourhoodParams(**(neighbourhood_params or {})) - ) - ls = LocalSearch(data, rng, neighbours) - for node_op in NODE_OPERATORS: - ls.add_node_operator(node_op(data)) - for route_op in ROUTE_OPERATORS: - ls.add_route_operator(route_op(data)) - return ls - - -def perform_local_search( - ls_operator: LocalSearch, - solution: Solution, - load_penalty: int, - remaining_trials: int = 5, -) -> Tuple[Solution, bool]: - cost_evaluator = CostEvaluator( - load_penalty=load_penalty, tw_penalty=0, dist_penalty=0 - ) - improved_solution = ls_operator(solution, cost_evaluator) - remaining_trials -= 1 - if is_feasible := improved_solution.is_feasible() or remaining_trials == 0: - return improved_solution, is_feasible - - # print("Warning: Infeasible solution found from local search.", - # "This will slow down the search due to the repeated local search runs.") - - # If infeasible, run the local search again with a higher penalty - return perform_local_search( - ls_operator, solution, load_penalty * 10, remaining_trials=remaining_trials - ) + x_ct = x_coords.astype(c_double).ctypes + y_ct = y_coords.astype(c_double).ctypes + s_ct = service_times.astype(c_double).ctypes + d_ct = demand.astype(c_double).ctypes + + m_ct = dist_mtx.reshape(n_nodes * n_nodes).astype(c_double).ctypes + ap_ct = algorithm_parameters.ctypes + + + # struct Solution *solve_cvrp_dist_mtx( + # int n, double* x, double* y, double *dist_mtx, double *serv_time, double *dem, + # double vehicleCapacity, double durationLimit, char isDurationConstraint, + # int max_nbVeh, const struct AlgorithmParameters *ap, char verbose); + sol_p = self._c_api_local_search( + n_nodes, + cast(x_ct, c_double_p), # type: ignore + cast(y_ct, c_double_p), # type: ignore + cast(m_ct, c_double_p), # type: ignore + cast(s_ct, c_double_p), # type: ignore + cast(d_ct, c_double_p), # type: ignore + vehicle_capacity, + duration_limit, + is_duration_constraint, + maximum_number_of_vehicles, + byref(ap_ct), + verbose, + callid, + count, + ) + + result = sol_p + return result + + +def swapstar(demands, matrix, positions, routes: List[np.ndarray], count=1): + ap = AlgorithmParameters() + hgs_solver = Solver(parameters=ap, verbose=False) + + data = dict() + x = positions[:, 0] + y = positions[:, 1] + data['x_coordinates'] = x + data['y_coordinates'] = y + + data['depot'] = 0 + data['demands'] = demands * 1000 + data["num_vehicles"] = len(routes) + data['vehicle_capacity'] = 1000.001 # to avoid floating-point error + + # Solve with calculated distances + data['distance_matrix'] = matrix + result = hgs_solver.local_search(data, routes, count) + return result diff --git a/rl4co/models/__init__.py b/rl4co/models/__init__.py index 339c3b01..6bf6d7d9 100644 --- a/rl4co/models/__init__.py +++ b/rl4co/models/__init__.py @@ -26,6 +26,7 @@ from rl4co.models.zoo.dact import DACT, DACTPolicy from rl4co.models.zoo.deepaco import DeepACO, DeepACOPolicy from rl4co.models.zoo.eas import EAS, EASEmb, EASLay +from rl4co.models.zoo.gfacs import GFACS, GFACSPolicy from rl4co.models.zoo.ham import ( HeterogeneousAttentionModel, HeterogeneousAttentionModelPolicy, diff --git a/rl4co/models/common/constructive/base.py b/rl4co/models/common/constructive/base.py index 9abcdf70..804bb867 100644 --- a/rl4co/models/common/constructive/base.py +++ b/rl4co/models/common/constructive/base.py @@ -58,7 +58,7 @@ def forward( def pre_decoder_hook( self, td: TensorDict, env: RL4COEnvBase, hidden: Any = None, num_starts: int = 0 - ) -> Tuple[TensorDict, Any, RL4COEnvBase]: + ) -> Tuple[TensorDict, RL4COEnvBase, Any]: """By default, we don't need to do anything here. Args: @@ -68,7 +68,7 @@ def pre_decoder_hook( num_starts: Number of starts for multistart decoding Returns: - Tuple containing the updated hidden state, TensorDict, and environment + Tuple containing the updated Tensordict, environment, and hidden state """ return td, env, hidden diff --git a/rl4co/models/nn/env_embeddings/edge.py b/rl4co/models/nn/env_embeddings/edge.py index 97fa3fb5..fda40684 100644 --- a/rl4co/models/nn/env_embeddings/edge.py +++ b/rl4co/models/nn/env_embeddings/edge.py @@ -26,11 +26,11 @@ def env_edge_embedding(env_name: str, config: dict) -> nn.Module: embedding_registry = { "tsp": TSPEdgeEmbedding, "atsp": ATSPEdgeEmbedding, - "cvrp": TSPEdgeEmbedding, + "cvrp": CVRPEdgeEmbedding, "sdvrp": TSPEdgeEmbedding, - "pctsp": TSPEdgeEmbedding, + "pctsp": CVRPEdgeEmbedding, "spctsp": TSPEdgeEmbedding, - "op": TSPEdgeEmbedding, + "op": CVRPEdgeEmbedding, "dpp": TSPEdgeEmbedding, "mdpp": TSPEdgeEmbedding, "pdp": TSPEdgeEmbedding, @@ -93,12 +93,70 @@ def _cost_matrix_to_graph(self, batch_cost_matrix: Tensor, init_embeddings: Tens edge_index = get_full_graph_edge_index( cost_matrix.shape[0], self_loop=False ).to(cost_matrix.device) - edge_attr = cost_matrix[edge_index[0], edge_index[1]] + edge_attr = cost_matrix[edge_index[0], edge_index[1]].unsqueeze(-1) graph = Data( - x=init_embeddings[index], - edge_index=edge_index, - edge_attr=edge_attr, + x=init_embeddings[index], edge_index=edge_index, edge_attr=edge_attr + ) + graph_data.append(graph) + + batch = Batch.from_data_list(graph_data) + batch.edge_attr = self.edge_embed(batch.edge_attr) + return batch + +class CVRPEdgeEmbedding(TSPEdgeEmbedding): + """Edge embedding module for the Capacitated Vehicle Routing Problem (CVRP). + Unlike the TSP, all nodes in the CVRP should be connected to the depot, + so each node will have k_sparse + 1 edges. + """ + + def _cost_matrix_to_graph(self, batch_cost_matrix: Tensor, init_embeddings: Tensor): + """Convert batched cost_matrix to batched PyG graph, and calculate edge embeddings. + + Args: + batch_cost_matrix: Tensor of shape [batch_size, n, n] + init_embedding: init embeddings + """ + graph_data = [] + for index, cost_matrix in enumerate(batch_cost_matrix): + if self.sparsify: + edge_index, edge_attr = sparsify_graph( + cost_matrix[1:, 1:], self.k_sparse, self_loop=False + ) + edge_index = edge_index + 1 # because we removed the depot + # Note here + edge_index = torch.cat( + [ + edge_index, + # All nodes should be connected to the depot + torch.stack( + [ + torch.arange(1, cost_matrix.shape[0]), + torch.zeros(cost_matrix.shape[0] - 1, dtype=torch.long), + ] + ).to(edge_index.device), + # Depot should be connected to all nodes + torch.stack( + [ + torch.zeros(cost_matrix.shape[0] - 1, dtype=torch.long), + torch.arange(1, cost_matrix.shape[0]), + ] + ).to(edge_index.device), + ], + dim=1, + ) + edge_attr = torch.cat( + [edge_attr, cost_matrix[1:, [0]], cost_matrix[[0], 1:].t()], dim=0 + ) + + else: + edge_index = get_full_graph_edge_index( + cost_matrix.shape[0], self_loop=False + ).to(cost_matrix.device) + edge_attr = cost_matrix[edge_index[0], edge_index[1]].unsqueeze(-1) + + graph = Data( + x=init_embeddings[index], edge_index=edge_index, edge_attr=edge_attr ) graph_data.append(graph) diff --git a/rl4co/models/zoo/deepaco/antsystem.py b/rl4co/models/zoo/deepaco/antsystem.py index 1965cd3d..bfa8ef18 100644 --- a/rl4co/models/zoo/deepaco/antsystem.py +++ b/rl4co/models/zoo/deepaco/antsystem.py @@ -25,9 +25,7 @@ class AntSystem: beta: Importance of heuristic information in the decision-making process. Defaults to 1.0. decay: Rate at which pheromone evaporates. Should be between 0 and 1. Defaults to 0.95. Q: Rate at which pheromone deposits. Defaults to `1 / n_ants`. - temperature: Temperature for the softmax during decoding. Defaults to 0.1. pheromone: Initial pheromone matrix. Defaults to `torch.ones_like(log_heuristic)`. - require_logprobs: Whether to require the log probability of actions. Defaults to False. use_local_search: Whether to use local_search provided by the env. Default to False. use_nls: Whether to use neural-guided local search provided by the env. Default to False. n_perturbations: Number of perturbations to be used for nls. Defaults to 5. @@ -43,9 +41,7 @@ def __init__( beta: float = 1.0, decay: float = 0.95, Q: Optional[float] = None, - temperature: float = 0.1, pheromone: Optional[Tensor] = None, - require_logprobs: bool = False, use_local_search: bool = False, use_nls: bool = False, n_perturbations: int = 5, @@ -59,9 +55,8 @@ def __init__( self.beta = beta self.decay = decay self.Q = 1 / self.n_ants if Q is None else Q - self.temperature = temperature - self.log_heuristic = log_heuristic / self.temperature + self.log_heuristic = log_heuristic if pheromone is None: self.pheromone = torch.ones_like(log_heuristic) @@ -70,8 +65,7 @@ def __init__( self.pheromone = pheromone self.final_actions = self.final_reward = None - self.require_logprobs = require_logprobs - self.all_records = [] + self.final_reward_cache = torch.zeros(self.batch_size, 0, device=log_heuristic.device) self.use_local_search = use_local_search assert not (use_nls and not use_local_search), "use_nls requires use_local_search" @@ -92,7 +86,7 @@ def heuristic_dist(self) -> torch.Tensor: @staticmethod def select_start_node_fn( - td: TensorDict, env: RL4COEnvBase, num_starts: int, start_node: Optional[int]=None + td: TensorDict, env: RL4COEnvBase, num_starts: int, start_node: Optional[int] = None ): if env.name == "tsp" and start_node is not None: # For now, only TSP supports explicitly setting the start node @@ -126,8 +120,8 @@ def run( td = td_initial.clone() self._one_step(td, env) + assert self.final_reward is not None action_matrix = self._convert_final_action_to_matrix() - assert action_matrix is not None and self.final_reward is not None td, env = self._recreate_final_routes(td_initial, env, action_matrix) return td, action_matrix, self.final_reward @@ -187,9 +181,6 @@ def _sampling( logprobs, actions, td, env = decode_strategy.post_decoder_hook(td, env) reward = env.get_reward(td, actions) - if self.require_logprobs: - self.all_records.append((logprobs, actions, reward, td.get("mask", None))) - return td, env, actions, reward def local_search( @@ -249,6 +240,9 @@ def _update_results(self, actions, reward): self.final_actions[index] = best_actions[index] self.final_reward[require_update] = best_reward[require_update] + self.final_reward_cache = torch.cat( + [self.final_reward_cache, self.final_reward.unsqueeze(-1)], -1 + ) return best_index def _update_pheromone(self, actions, reward): @@ -288,53 +282,14 @@ def _recreate_final_routes(self, td, env, action_matrix): assert td["done"].all() return td, env - def get_logp(self): - """Get the log probability (logprobs) values recorded during the execution of the algorithm. - - Returns: - results: Tuple containing the log probability values, - actions chosen, rewards obtained, and mask values (if available). - - Raises: - AssertionError: If `require_logp` is not enabled. - """ - - assert ( - self.require_logprobs - ), "Please enable `require_logp` to record logprobs values" - - logprobs_list, actions_list, reward_list, mask_list = [], [], [], [] - - for logprobs, actions, reward, mask in self.all_records: - logprobs_list.append(logprobs) - actions_list.append(actions) - reward_list.append(reward) - mask_list.append(mask) - - if mask_list[0] is None: - mask_list = None - else: - mask_list = torch.stack(mask_list, 0) - - # reset records - self.all_records = [] - - return ( - torch.stack(logprobs_list, 0), - torch.stack(actions_list, 0), - torch.stack(reward_list, 0), - mask_list, - ) - @staticmethod @lru_cache(5) def _batch_action_indices(batch_size: int, n_actions: int, device: torch.device): batchindex = torch.arange(batch_size, device=device) return batchindex.unsqueeze(1).repeat(1, n_actions).view(-1) - def _convert_final_action_to_matrix(self) -> Optional[Tensor]: - if self.final_actions is None: - return None + def _convert_final_action_to_matrix(self) -> Tensor: + assert self.final_actions is not None action_count = max(len(actions) for actions in self.final_actions) mat_actions = torch.zeros( (self.batch_size, action_count), @@ -343,5 +298,4 @@ def _convert_final_action_to_matrix(self) -> Optional[Tensor]: ) for index, action in enumerate(self.final_actions): mat_actions[index, : len(action)] = action - return mat_actions diff --git a/rl4co/models/zoo/deepaco/model.py b/rl4co/models/zoo/deepaco/model.py index b4dfdbd1..376c28bf 100644 --- a/rl4co/models/zoo/deepaco/model.py +++ b/rl4co/models/zoo/deepaco/model.py @@ -1,5 +1,8 @@ from typing import Any, Optional +from tensordict import TensorDict +import torch + from rl4co.envs.common.base import RL4COEnvBase from rl4co.models.rl import REINFORCE from rl4co.models.rl.reinforce.baselines import REINFORCEBaseline @@ -12,7 +15,9 @@ class DeepACO(REINFORCE): Args: env: Environment to use for the algorithm policy: Policy to use for the algorithm - baseline: REINFORCE baseline. Defaults to exponential + baseline: REINFORCE baseline. Defaults to "no" because the shared baseline is manually implemented. + train_with_local_search: Whether to train with local search. Defaults to False. + ls_reward_aug_W: Coefficient to be used for the reward augmentation with the local search. Defaults to 0.95. policy_kwargs: Keyword arguments for policy baseline_kwargs: Keyword arguments for baseline **kwargs: Keyword arguments passed to the superclass @@ -22,16 +27,23 @@ def __init__( self, env: RL4COEnvBase, policy: Optional[DeepACOPolicy] = None, - baseline: REINFORCEBaseline | str = "no", + baseline: REINFORCEBaseline | str = "no", # Shared baseline is manually implemented + train_with_local_search: bool = True, + ls_reward_aug_W: float = 0.95, policy_kwargs: dict = {}, baseline_kwargs: dict = {}, **kwargs, ): if policy is None: - policy = DeepACOPolicy(env_name=env.name, **policy_kwargs) + policy = DeepACOPolicy( + env_name=env.name, train_with_local_search=train_with_local_search, **policy_kwargs + ) super().__init__(env, policy, baseline, baseline_kwargs, **kwargs) + self.train_with_local_search = train_with_local_search + self.ls_reward_aug_W = ls_reward_aug_W + def shared_step( self, batch: Any, @@ -45,7 +57,36 @@ def shared_step( # Compute loss if phase == "train": - out["loss"] = -(out["advantage"] * out["log_likelihood"]).mean() + out["loss"] = self.calculate_loss(td, batch, out) metrics = self.log_metrics(out, phase, dataloader_idx=dataloader_idx) return {"loss": out.get("loss", None), **metrics} + + def calculate_loss( + self, + td: TensorDict, + batch: TensorDict, + policy_out: dict, + reward: Optional[torch.Tensor] = None, + log_likelihood: Optional[torch.Tensor] = None, + ): + """Calculate loss for REINFORCE algorithm. + + Args: + td: TensorDict containing the current state of the environment + batch: Batch of data. This is used to get the extra loss terms, e.g., REINFORCE baseline + policy_out: Output of the policy network + reward: Reward tensor. If None, it is taken from `policy_out` + log_likelihood: Log-likelihood tensor. If None, it is taken from `policy_out` + """ + reward = policy_out["reward"] + advantage = reward - reward.mean(dim=1, keepdim=True) # Shared baseline + + if self.train_with_local_search: + ls_reward = policy_out["ls_reward"] + ls_advantage = ls_reward - ls_reward.mean(dim=1, keepdim=True) # Shared baseline + weighted_advantage = advantage * (1 - self.ls_reward_aug_W) + ls_advantage * self.ls_reward_aug_W + else: + weighted_advantage = advantage + + return -(weighted_advantage * policy_out["log_likelihood"]).mean() diff --git a/rl4co/models/zoo/deepaco/policy.py b/rl4co/models/zoo/deepaco/policy.py index af05eed5..34494a28 100644 --- a/rl4co/models/zoo/deepaco/policy.py +++ b/rl4co/models/zoo/deepaco/policy.py @@ -10,6 +10,7 @@ ) from rl4co.models.zoo.deepaco.antsystem import AntSystem from rl4co.models.zoo.nargnn.encoder import NARGNNEncoder +from rl4co.utils.decoding import modify_logits_for_top_k_filtering, modify_logits_for_top_p_filtering from rl4co.utils.ops import batchify, unbatchify from rl4co.utils.utils import merge_with_defaults @@ -22,12 +23,12 @@ class DeepACOPolicy(NonAutoregressivePolicy): Args: encoder: Encoder module. Can be passed by sub-classes env_name: Name of the environment used to initialize embeddings - temperature: Temperature for the softmax during decoding. Defaults to 0.1. + temperature: Temperature for the softmax during decoding. Defaults to 1.0. aco_class: Class representing the ACO algorithm to be used. Defaults to :class:`AntSystem`. aco_kwargs: Additional arguments to be passed to the ACO algorithm. + train_with_local_search: Whether to train with local search. Defaults to False. n_ants: Number of ants to be used in the ACO algorithm. Can be an integer or dictionary. Defaults to 20. n_iterations: Number of iterations to run the ACO algorithm. Can be an integer or dictionary. Defaults to `dict(train=1, val=20, test=100)`. - ls_reward_aug_W: Coefficient to be used for the reward augmentation with the local search. Defaults to 0.95. encoder_kwargs: Additional arguments to be passed to the encoder. """ @@ -36,16 +37,17 @@ def __init__( encoder: Optional[NonAutoregressiveEncoder] = None, env_name: str = "tsp", temperature: float = 1.0, + top_p: float = 0.0, + top_k: int = 0, aco_class: Optional[Type[AntSystem]] = None, aco_kwargs: dict = {}, train_with_local_search: bool = True, n_ants: Optional[int | dict] = None, n_iterations: Optional[int | dict] = None, - ls_reward_aug_W: float = 0.95, **encoder_kwargs, ): if encoder is None: - encoder = NARGNNEncoder(**encoder_kwargs) + encoder = NARGNNEncoder(env_name=env_name, **encoder_kwargs) super(DeepACOPolicy, self).__init__( encoder=encoder, @@ -56,23 +58,28 @@ def __init__( test_decode_type="multistart_sampling", ) + self.top_p = top_p + self.top_k = top_k + self.aco_class = AntSystem if aco_class is None else aco_class self.aco_kwargs = aco_kwargs self.train_with_local_search = train_with_local_search + if train_with_local_search: + assert self.aco_kwargs.get("use_local_search", False) self.n_ants = merge_with_defaults(n_ants, train=30, val=48, test=48) self.n_iterations = merge_with_defaults(n_iterations, train=1, val=5, test=10) - self.ls_reward_aug_W = ls_reward_aug_W + self.top_p = top_p + self.top_k = top_k def forward( self, td_initial: TensorDict, env: Optional[str | RL4COEnvBase] = None, - calc_reward: bool = True, phase: str = "train", - actions=None, return_actions: bool = True, return_hidden: bool = True, - **kwargs, + actions=None, + **decoding_kwargs, ): """ Forward method. During validation and testing, the policy runs the ACO algorithm to construct solutions. @@ -80,75 +87,70 @@ def forward( """ n_ants = self.n_ants[phase] # Instantiate environment if needed - if (phase != "train" or self.ls_reward_aug_W > 0) and ( + if (phase != "train" or self.train_with_local_search) and ( env is None or isinstance(env, str) ): env_name = self.env_name if env is None else env env = get_env(env_name) + else: + assert isinstance(env, RL4COEnvBase), "env must be an instance of RL4COEnvBase" if phase == "train": select_start_nodes_fn = partial( self.aco_class.select_start_node_fn, start_node=self.aco_kwargs.get("start_node", None), ) - kwargs.update({"select_start_nodes_fn": select_start_nodes_fn}) + decoding_kwargs.update( + { + "select_start_nodes_fn": select_start_nodes_fn, + # TODO: Are they useful for training too? + # "top_p": self.top_p, + # "top_k": self.top_k, + } + ) # we just use the constructive policy outdict = super().forward( td_initial, env, phase=phase, decode_type="multistart_sampling", - calc_reward=calc_reward, + calc_reward=True, num_starts=n_ants, actions=actions, return_actions=return_actions, return_hidden=return_hidden, - **kwargs, + **decoding_kwargs, ) - # manually compute the advantage - reward = unbatchify(outdict["reward"], n_ants) - advantage = reward - reward.mean(dim=1, keepdim=True) + outdict["reward"] = unbatchify(outdict["reward"], n_ants) - if self.ls_reward_aug_W > 0 and self.train_with_local_search: + if self.train_with_local_search: heatmap_logits = outdict["hidden"] - aco = self.aco_class( - heatmap_logits, - n_ants=n_ants, - temperature=self.aco_kwargs.get("temperature", self.temperature), - **self.aco_kwargs, - ) - - actions = outdict["actions"] + # TODO: Refactor this so that we don't need to use the aco object + aco = self.aco_class(heatmap_logits, n_ants=n_ants, **self.aco_kwargs) _, ls_reward = aco.local_search( - batchify(td_initial, n_ants), env, actions + batchify(td_initial, n_ants), env, outdict["actions"] # type:ignore ) + outdict["ls_reward"] = unbatchify(ls_reward, n_ants) - ls_reward = unbatchify(ls_reward, n_ants) - ls_advantage = ls_reward - ls_reward.mean(dim=1, keepdim=True) - advantage = ( - advantage * (1 - self.ls_reward_aug_W) - + ls_advantage * self.ls_reward_aug_W - ) - - outdict["advantage"] = advantage outdict["log_likelihood"] = unbatchify(outdict["log_likelihood"], n_ants) - return outdict heatmap_logits, _ = self.encoder(td_initial) + heatmap_logits /= self.temperature - aco = self.aco_class( - heatmap_logits, - n_ants=self.n_ants[phase], - temperature=self.aco_kwargs.get("temperature", self.temperature), - **self.aco_kwargs, - ) + if self.top_k > 0: + self.top_k = min(self.top_k, heatmap_logits.size(-1)) # safety check + heatmap_logits = modify_logits_for_top_k_filtering(heatmap_logits, self.top_k) + + if self.top_p > 0: + assert self.top_p <= 1.0, "top-p should be in (0, 1]." + heatmap_logits = modify_logits_for_top_p_filtering(heatmap_logits, self.top_p) + + aco = self.aco_class(heatmap_logits, n_ants=n_ants, **self.aco_kwargs) td, actions, reward = aco.run(td_initial, env, self.n_iterations[phase]) - out = {} - if calc_reward: - out["reward"] = reward + out = {"reward": reward} if return_actions: out["actions"] = actions diff --git a/rl4co/models/zoo/gfacs/__init__.py b/rl4co/models/zoo/gfacs/__init__.py new file mode 100644 index 00000000..564477f8 --- /dev/null +++ b/rl4co/models/zoo/gfacs/__init__.py @@ -0,0 +1,2 @@ +from rl4co.models.zoo.gfacs.model import GFACS +from rl4co.models.zoo.gfacs.policy import GFACSPolicy diff --git a/rl4co/models/zoo/gfacs/encoder.py b/rl4co/models/zoo/gfacs/encoder.py new file mode 100644 index 00000000..dbeaa81a --- /dev/null +++ b/rl4co/models/zoo/gfacs/encoder.py @@ -0,0 +1,69 @@ +from typing import Optional +from tensordict import TensorDict +import torch.nn as nn + +from rl4co.models.zoo.nargnn.encoder import NARGNNEncoder + + +class GFACSEncoder(NARGNNEncoder): + """ + NARGNNEncoder with log-partition function estimation for training with + Trajectory Balance (TB) loss (Malkin et al., https://arxiv.org/abs/2201.13259) + """ + def __init__( + self, + embed_dim: int = 64, + env_name: str = "tsp", + # TODO: pass network + init_embedding: Optional[nn.Module] = None, + edge_embedding: Optional[nn.Module] = None, + graph_network: Optional[nn.Module] = None, + heatmap_generator: Optional[nn.Module] = None, + num_layers_heatmap_generator: int = 5, + num_layers_graph_encoder: int = 15, + act_fn="silu", + agg_fn="mean", + linear_bias: bool = True, + k_sparse: Optional[int] = None, + z_out_dim: int = 1, + ): + super().__init__( + embed_dim=embed_dim, + env_name=env_name, + init_embedding=init_embedding, + edge_embedding=edge_embedding, + graph_network=graph_network, + heatmap_generator=heatmap_generator, + num_layers_heatmap_generator=num_layers_heatmap_generator, + num_layers_graph_encoder=num_layers_graph_encoder, + act_fn=act_fn, + agg_fn=agg_fn, + linear_bias=linear_bias, + k_sparse=k_sparse, + ) + self.Z_net = nn.Sequential( + nn.Linear(embed_dim, embed_dim), nn.SiLU(), nn.Linear(embed_dim, z_out_dim) + ) + self.z_out_dim = z_out_dim + + def forward(self, td: TensorDict): + """Forward pass of the encoder. + Transform the input TensorDict into the latent representation. + """ + # Transfer to embedding space + node_embed = self.init_embedding(td) + graph = self.edge_embedding(td, node_embed) + + # Process embedding into graph + # TODO: standardize? + graph.x, graph.edge_attr = self.graph_network( + graph.x, graph.edge_index, graph.edge_attr + ) + + logZ = self.Z_net(graph.edge_attr).reshape(-1, len(td), self.z_out_dim).mean(0) + + # Generate heatmap logits + heatmap_logits = self.heatmap_generator(graph) + + # Return latent representation (i.e. heatmap logits), initial embeddings and logZ + return heatmap_logits, node_embed, logZ diff --git a/rl4co/models/zoo/gfacs/model.py b/rl4co/models/zoo/gfacs/model.py new file mode 100644 index 00000000..89d4ef90 --- /dev/null +++ b/rl4co/models/zoo/gfacs/model.py @@ -0,0 +1,146 @@ +import math + +from typing import Optional + +import numpy as np +import scipy +import torch + +from tensordict import TensorDict + +from rl4co.envs.common.base import RL4COEnvBase +from rl4co.models.rl.reinforce.baselines import REINFORCEBaseline +from rl4co.models.zoo.deepaco import DeepACO +from rl4co.models.zoo.gfacs.policy import GFACSPolicy +from rl4co.utils.ops import unbatchify + + +class GFACS(DeepACO): + """Implements GFACS: https://arxiv.org/abs/2403.07041. + + Args: + env: Environment to use for the algorithm + policy: Policy to use for the algorithm + baseline: REINFORCE baseline. Defaults to no baseline + train_with_local_search: Whether to train with local search. Defaults to False. + ls_reward_aug_W: Coefficient to be used for the reward augmentation with the local search. Defaults to 0.95. + policy_kwargs: Keyword arguments for policy + baseline_kwargs: Keyword arguments for baseline + **kwargs: Keyword arguments passed to the superclass + """ + + def __init__( + self, + env: RL4COEnvBase, + policy: Optional[GFACSPolicy] = None, + baseline: REINFORCEBaseline | str = "no", + train_with_local_search: bool = True, + ls_reward_aug_W: float = 0.95, + policy_kwargs: dict = {}, + baseline_kwargs: dict = {}, + beta_min: float = 1.0, + beta_max: float = 1.0, + beta_flat_epochs: int = 0, + **kwargs, + ): + if policy is None: + policy = GFACSPolicy( + env_name=env.name, + train_with_local_search=train_with_local_search, + **policy_kwargs, + ) + + super().__init__( + env, + policy, + baseline, + train_with_local_search, + ls_reward_aug_W, + policy_kwargs, + baseline_kwargs, + **kwargs, + ) + + self.beta_min = beta_min + self.beta_max = beta_max + self.beta_flat_epochs = beta_flat_epochs + + @property + def beta(self) -> float: + return self.beta_min + (self.beta_max - self.beta_min) * min( + math.log(self.current_epoch + 1) + / math.log(self.trainer.max_epochs - self.beta_flat_epochs), + 1.0, + ) + + def calculate_loss( + self, + td: TensorDict, + batch: TensorDict, + policy_out: dict, + reward: Optional[torch.Tensor] = None, + log_likelihood: Optional[torch.Tensor] = None, + ): + """Calculate loss for REINFORCE algorithm. + + Args: + td: TensorDict containing the current state of the environment + batch: Batch of data. This is used to get the extra loss terms, e.g., REINFORCE baseline + policy_out: Output of the policy network + reward: Reward tensor. If None, it is taken from `policy_out` + log_likelihood: Log-likelihood tensor. If None, it is taken from `policy_out` + """ + reward = policy_out["reward"] + n_ants = reward.size(1) + advantage = reward - reward.mean(dim=1, keepdim=True) + + if self.train_with_local_search: + ls_reward = policy_out["ls_reward"] + ls_advantage = ls_reward - ls_reward.mean(dim=1, keepdim=True) + weighted_advantage = ( + advantage * (1 - self.ls_reward_aug_W) + + ls_advantage * self.ls_reward_aug_W + ) + else: + weighted_advantage = advantage + + # On-policy loss + forward_flow = policy_out["log_likelihood"] + policy_out["logZ"].repeat(1, n_ants) + backward_flow = ( + self.calculate_log_pb_uniform(policy_out["actions"], n_ants) + + weighted_advantage.detach() * self.beta + ) + tb_loss = torch.pow(forward_flow - backward_flow, 2).mean() + + # Off-policy loss + if self.train_with_local_search: + ls_forward_flow = policy_out["ls_log_likelihood"] + policy_out[ + "ls_logZ" + ].repeat(1, n_ants) + ls_backward_flow = ( + self.calculate_log_pb_uniform(policy_out["ls_actions"], n_ants) + + ls_advantage.detach() * self.beta + ) + ls_tb_loss = torch.pow(ls_forward_flow - ls_backward_flow, 2).mean() + tb_loss = tb_loss + ls_tb_loss + + return tb_loss + + def calculate_log_pb_uniform(self, actions: torch.Tensor, n_ants: int): + match self.env.name: + case "tsp": + return math.log(1 / 2 * actions.size(1)) + case "cvrp": + _a1 = actions.detach().cpu().numpy() + # shape: (batch, max_tour_length) + n_nodes = np.count_nonzero(_a1, axis=1) + _a2 = _a1[:, 1:] - _a1[:, :-1] + n_routes = np.count_nonzero(_a2, axis=1) - n_nodes + _a3 = _a1[:, 2:] - _a1[:, :-2] + n_multinode_routes = np.count_nonzero(_a3, axis=1) - n_nodes + log_b_p = -scipy.special.gammaln( + n_routes + 1 + ) - n_multinode_routes * math.log(2) + return unbatchify(torch.from_numpy(log_b_p).to(actions.device), n_ants) + case _: + raise ValueError(f"Unknown environment: {self.env.name}") diff --git a/rl4co/models/zoo/gfacs/policy.py b/rl4co/models/zoo/gfacs/policy.py new file mode 100644 index 00000000..01304477 --- /dev/null +++ b/rl4co/models/zoo/gfacs/policy.py @@ -0,0 +1,252 @@ +from functools import partial +from typing import Optional, Type + +from tensordict import TensorDict +import torch + +from rl4co.envs import RL4COEnvBase, get_env +from rl4co.models.zoo.deepaco import DeepACOPolicy +from rl4co.models.zoo.deepaco.antsystem import AntSystem +from rl4co.models.zoo.gfacs.encoder import GFACSEncoder +from rl4co.utils.decoding import ( + DecodingStrategy, + get_decoding_strategy, + get_log_likelihood, + modify_logits_for_top_k_filtering, + modify_logits_for_top_p_filtering +) +from rl4co.utils.ops import batchify, unbatchify +from rl4co.utils.pylogger import get_pylogger + + +log = get_pylogger(__name__) + + +class GFACSPolicy(DeepACOPolicy): + """Implememts GFACS policy based on :class:`NonAutoregressivePolicy`. Introduced by Kim et al. (2024): https://arxiv.org/abs/2403.07041. + This policy uses a Non-Autoregressive Graph Neural Network to generate heatmaps, + which are then used to run Ant Colony Optimization (ACO) to construct solutions. + + Args: + encoder: Encoder module. Can be passed by sub-classes + env_name: Name of the environment used to initialize embeddings + temperature: Temperature for the softmax during decoding. Defaults to 1.0. + aco_class: Class representing the ACO algorithm to be used. Defaults to :class:`AntSystem`. + aco_kwargs: Additional arguments to be passed to the ACO algorithm. + train_with_local_search: Whether to train with local search. Defaults to False. + n_ants: Number of ants to be used in the ACO algorithm. Can be an integer or dictionary. Defaults to 20. + n_iterations: Number of iterations to run the ACO algorithm. Can be an integer or dictionary. Defaults to `dict(train=1, val=20, test=100)`. + encoder_kwargs: Additional arguments to be passed to the encoder. + """ + + def __init__( + self, + encoder: Optional[GFACSEncoder] = None, + env_name: str = "tsp", + temperature: float = 1.0, + top_p: float = 0.0, + top_k: int = 0, + aco_class: Optional[Type[AntSystem]] = None, + aco_kwargs: dict = {}, + train_with_local_search: bool = True, + n_ants: Optional[int | dict] = None, + n_iterations: Optional[int | dict] = None, + **encoder_kwargs, + ): + if encoder is None: + encoder_kwargs["z_out_dim"] = 2 if train_with_local_search else 1 + encoder = GFACSEncoder(env_name=env_name, **encoder_kwargs) + + super().__init__( + encoder=encoder, + env_name=env_name, + temperature=temperature, + top_p=top_p, + top_k=top_k, + aco_class=aco_class, + aco_kwargs=aco_kwargs, + train_with_local_search=train_with_local_search, + n_ants=n_ants, + n_iterations=n_iterations, + ) + + def forward( + self, + td_initial: TensorDict, + env: Optional[str | RL4COEnvBase] = None, + phase: str = "train", + return_actions: bool = True, + return_hidden: bool = False, + actions=None, + **decoding_kwargs, + ) -> dict: + """ + Forward method. During validation and testing, the policy runs the ACO algorithm to construct solutions. + See :class:`NonAutoregressivePolicy` for more details during the training phase. + """ + n_ants = self.n_ants[phase] + # Instantiate environment if needed + if (phase != "train" or self.train_with_local_search) and ( + env is None or isinstance(env, str) + ): + env_name = self.env_name if env is None else env + env = get_env(env_name) + else: + assert isinstance(env, RL4COEnvBase), "env must be an instance of RL4COEnvBase" + + if phase == "train": + # Encoder: get encoder output and initial embeddings from initial state + hidden, init_embeds, logZ = self.encoder(td_initial) + if self.train_with_local_search: + logZ, ls_logZ = logZ[:, [0]], logZ[:, [1]] + else: + logZ = logZ[:, [0]] + + select_start_nodes_fn = partial( + self.aco_class.select_start_node_fn, + start_node=self.aco_kwargs.get("start_node", None), + ) + decoding_kwargs.update( + { + "select_start_nodes_fn": select_start_nodes_fn, + # These are only for inference; TODO: Are they useful for training too? + # "top_p": self.top_p, + # "top_k": self.top_k, + } + ) + logprobs, actions, td, env = self.common_decoding( + "multistart_sampling", + td_initial, + env, + hidden, + n_ants, + actions, + **decoding_kwargs, + ) + td.set("reward", env.get_reward(td, actions)) + + # Output dictionary construction + outdict = { + "logZ": logZ, + "reward": unbatchify(td["reward"], n_ants), + "log_likelihood": unbatchify( + get_log_likelihood(logprobs, actions, td.get("mask", None), True), + n_ants, + ) + } + + if return_actions: + outdict["actions"] = actions + + ######################################################################## + # Local search + if self.train_with_local_search: + # TODO: Refactor this so that we don't need to use the aco object + aco = self.aco_class(hidden, n_ants=n_ants, **self.aco_kwargs) + ls_actions, ls_reward = aco.local_search( + batchify(td_initial, n_ants), env, actions # type:ignore + ) + ls_logprobs, ls_actions, td, env = self.common_decoding( + "evaluate", + td_initial, + env, + hidden, + n_ants, + ls_actions, + **decoding_kwargs, + ) + td.set("ls_reward", ls_reward) + outdict.update( + { + "ls_logZ": ls_logZ, + "ls_reward": unbatchify(ls_reward, n_ants), + "ls_log_likelihood": unbatchify( + get_log_likelihood( + ls_logprobs, ls_actions, td.get("mask", None), True + ), + n_ants, + ) + } + ) + if return_actions: + outdict["ls_actions"] = ls_actions + ######################################################################## + + if return_hidden: + outdict["hidden"] = hidden + + return outdict + + heatmap_logits, _, _ = self.encoder(td_initial) + heatmap_logits /= self.temperature + + if self.top_k > 0: + self.top_k = min(self.top_k, heatmap_logits.size(-1)) # safety check + heatmap_logits = modify_logits_for_top_k_filtering(heatmap_logits, self.top_k) + + if self.top_p > 0: + assert self.top_p <= 1.0, "top-p should be in (0, 1]." + heatmap_logits = modify_logits_for_top_p_filtering(heatmap_logits, self.top_p) + + aco = self.aco_class(heatmap_logits, n_ants=n_ants, **self.aco_kwargs) + td, actions, reward = aco.run(td_initial, env, self.n_iterations[phase]) + + out = {"reward": reward} + if return_actions: + out["actions"] = actions + + return out + + def common_decoding( + self, + decode_type: str | DecodingStrategy, + td: TensorDict, + env: RL4COEnvBase, + hidden: TensorDict, + num_starts: int, + actions: Optional[torch.Tensor] = None, + max_steps: int = 1_000_000, + **decoding_kwargs, + ): + multistart = True if num_starts > 1 else False + decoding_strategy: DecodingStrategy = get_decoding_strategy( + decoding_strategy=decode_type, + temperature=decoding_kwargs.pop("temperature", self.temperature), + mask_logits=decoding_kwargs.pop("mask_logits", self.mask_logits), + tanh_clipping=decoding_kwargs.pop("tanh_clipping", self.tanh_clipping), + num_starts=num_starts, + multistart=multistart, + select_start_nodes_fn=decoding_kwargs.pop("select_start_nodes_fn", None), + store_all_logp=decoding_kwargs.pop("store_all_logp", False), + **decoding_kwargs, + ) + if actions is not None: + assert decoding_strategy.name == "evaluate", "decoding strategy must be 'evaluate' when actions are provided" + + # Pre-decoding hook: used for the initial step(s) of the decoding strategy + td, env, num_starts = decoding_strategy.pre_decoder_hook(td, env, actions[:, 0] if actions is not None else None) + + # Additionally call a decoder hook if needed before main decoding + td, env, hidden = self.decoder.pre_decoder_hook(td, env, hidden, num_starts) + + # Main decoding: loop until all sequences are done + step = 1 if multistart else 0 + while not td["done"].all(): + logits, mask = self.decoder(td, hidden, num_starts) + td = decoding_strategy.step( + logits, + mask, + td, + action=actions[..., step] if actions is not None else None, + ) + td = env.step(td)["next"] + step += 1 + if step > max_steps: + log.error( + f"Exceeded maximum number of steps ({max_steps}) duing decoding" + ) + break + + # Post-decoding hook: used for the final step(s) of the decoding strategy + logprobs, actions, td, env = decoding_strategy.post_decoder_hook(td, env) + return logprobs, actions, td, env diff --git a/rl4co/models/zoo/nargnn/encoder.py b/rl4co/models/zoo/nargnn/encoder.py index 3d12968b..e63dd84b 100644 --- a/rl4co/models/zoo/nargnn/encoder.py +++ b/rl4co/models/zoo/nargnn/encoder.py @@ -80,8 +80,15 @@ def _make_heatmap_logits(self, batch_graph: Batch) -> Tensor: # type: ignore # if self.undirected_graph: # heatmap = (heatmap + heatmap.transpose(1, 2)) * 0.5 - heatmap += 1e-10 if heatmap.dtype != torch.float16 else 3e-8 - # 3e-8 is the smallest positive number such that log(3e-8) is not -inf + # Avoid log(0) by adding a small value + if heatmap.dtype == torch.float32 or heatmap.dtype == torch.bfloat16: + small_value = 1e-12 + elif heatmap.dtype == torch.float16: + small_value = 3e-8 # the smallest positive number such that log(small_value) is not -inf + else: + raise ValueError(f"Unsupported dtype: {heatmap.dtype}") + + heatmap += small_value heatmap_logits = torch.log(heatmap) return heatmap_logits diff --git a/rl4co/utils/decoding.py b/rl4co/utils/decoding.py index b16e5cf5..4790b237 100644 --- a/rl4co/utils/decoding.py +++ b/rl4co/utils/decoding.py @@ -267,8 +267,8 @@ def _step( self, logprobs: torch.Tensor, mask: torch.Tensor, - td: TensorDict, - action: torch.Tensor = None, + td: Optional[TensorDict] = None, + action: Optional[torch.Tensor] = None, **kwargs, ) -> Tuple[torch.Tensor, torch.Tensor, TensorDict]: """Main decoding operation. This method should be called in a loop until all sequences are done. @@ -282,7 +282,7 @@ def _step( raise NotImplementedError("Must be implemented by subclass") def pre_decoder_hook( - self, td: TensorDict, env: RL4COEnvBase, action: torch.Tensor = None + self, td: TensorDict, env: RL4COEnvBase, action: Optional[torch.Tensor] = None ): """Pre decoding hook. This method is called before the main decoding operation.""" @@ -291,13 +291,13 @@ def pre_decoder_hook( if self.num_starts is None: self.num_starts = env.get_num_starts(td) if self.multisample: - log.warn( + log.warning( f"num_starts is not provided for sampling, using num_starts={self.num_starts}" ) else: if self.num_starts is not None: if self.num_starts >= 1: - log.warn( + log.warning( f"num_starts={self.num_starts} is ignored for decode_type={self.name}" ) @@ -347,8 +347,8 @@ def step( self, logits: torch.Tensor, mask: torch.Tensor, - td: TensorDict = None, - action: torch.Tensor = None, + td: Optional[TensorDict] = None, + action: Optional[torch.Tensor] = None, **kwargs, ) -> TensorDict: """Main decoding operation. This method should be called in a loop until all sequences are done. @@ -378,7 +378,9 @@ def step( # directly return for improvement methods, since the action for improvement methods is finalized in its own policy if self.improvement_method_mode: return logprobs, selected_action + # for others + assert td is not None, "td must be provided" if not self.store_all_logp: logprobs = gather_by_index(logprobs, selected_action, dim=1) td.set("action", selected_action) diff --git a/tests/test_training.py b/tests/test_training.py index 07d23a25..6622252c 100644 --- a/tests/test_training.py +++ b/tests/test_training.py @@ -201,14 +201,16 @@ def test_nargnn(): "torch_geometric" not in sys.modules, reason="PyTorch Geometric not installed" ) @pytest.mark.skipfif("numba" not in sys.modules, reason="Numba not installed") -def test_deepaco(): +@pytest.mark.parametrize("use_local_search", [False]) +def test_deepaco(use_local_search): env = TSPEnv(generator_params=dict(num_loc=20)) model = DeepACO( env, train_data_size=10, val_data_size=10, test_data_size=10, - policy_kwargs={"n_ants": 5}, + train_with_local_search=use_local_search, + policy_kwargs={"n_ants": 5, "aco_kwargs": {"use_local_search": use_local_search}}, ) trainer = RL4COTrainer( max_epochs=1, gradient_clip_val=1, devices=1, accelerator=accelerator