From 9cc25bd7511983f3fab3f1ef46277e0d80a8c4db Mon Sep 17 00:00:00 2001 From: nychiang Date: Thu, 23 Feb 2023 22:42:38 -0800 Subject: [PATCH 01/10] add sparse raja example --- src/Drivers/Sparse/NlpSparseRajaEx2.cpp | 476 ++++++++++++++++++++++++ src/Drivers/Sparse/NlpSparseRajaEx2.hpp | 309 +++++++++++++++ 2 files changed, 785 insertions(+) create mode 100644 src/Drivers/Sparse/NlpSparseRajaEx2.cpp create mode 100644 src/Drivers/Sparse/NlpSparseRajaEx2.hpp diff --git a/src/Drivers/Sparse/NlpSparseRajaEx2.cpp b/src/Drivers/Sparse/NlpSparseRajaEx2.cpp new file mode 100644 index 000000000..d1b9c2552 --- /dev/null +++ b/src/Drivers/Sparse/NlpSparseRajaEx2.cpp @@ -0,0 +1,476 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC. +// Produced at the Lawrence Livermore National Laboratory (LLNL). +// LLNL-CODE-742473. All rights reserved. +// +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// Please also read "Additional BSD Notice" below. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// i. Redistributions of source code must retain the above copyright notice, this list +// of conditions and the disclaimer below. +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// other materials provided with the distribution. +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written +// permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Additional BSD Notice +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// Contract No. DE-AC52-07NA27344 with the DOE. +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any +// information, apparatus, product, or process disclosed, or represents that its use would +// not infringe privately-owned rights. +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// product endorsement purposes. + +/** + * @file NlpSparseRajaEx2.cpp + * + * @author Nai-Yuan Chiang , LNNL + * + */ + +#include "SparseRajaEx2.hpp" + +#include +#include +#include + +#include + +#include +#include //for memcpy +#include + + +//TODO: A good idea to not use the internal HiOp Raja policies here and, instead, give self-containing +// definitions of the policies here so that the user gets a better grasp of the concept and does not +// rely on the internals of HiOp. For example: +// #define RAJA_LAMBDA [=] __device__ +// using ex2_raja_exec = RAJA::cuda_exec<128>; +// more defs here + + +#if defined(HIOP_USE_CUDA) +#include "ExecPoliciesRajaCudaImpl.hpp" +using ex2_raja_exec = hiop::ExecRajaPoliciesBackend::hiop_raja_exec; +using ex2_raja_reduce = hiop::ExecRajaPoliciesBackend::hiop_raja_reduce; +using hiopMatrixRajaSparse = hiop::hiopMatrixRajaSparseTriplet; +#elif defined(HIOP_USE_HIP) +#include +using ex2_raja_exec = hiop::ExecRajaPoliciesBackend::hiop_raja_exec; +using ex2_raja_reduce = hiop::ExecRajaPoliciesBackend::hiop_raja_reduce; +using hiopMatrixRajaSparse = hiop::hiopMatrixRajaSparseTriplet; +#else +//#if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) +#include +using ex2_raja_exec = hiop::ExecRajaPoliciesBackend::hiop_raja_exec; +using ex2_raja_reduce = hiop::ExecRajaPoliciesBackend::hiop_raja_reduce; +using hiopMatrixRajaSparse = hiop::hiopMatrixRajaSparseTriplet; +#endif + + +/** Nonlinear *highly nonconvex* and *rank deficient* problem test for the Filter IPM + * Newton of HiOp. It uses a Sparse NLP formulation. The problem is based on SparseEx1. + * + * min (2*convex_obj-1)*scal*sum 1/4* { (x_{i}-1)^4 : i=1,...,n} + 0.5x^Tx + * s.t. + * 4*x_1 + 2*x_2 == 10 + * 5<= 2*x_1 + x_3 + * 1<= 2*x_1 + 0.5*x_i <= 2*n, for i=4,...,n + * x_1 free + * 0.0 <= x_2 + * 1.0 <= x_3 <= 10 + * x_i >=0.5, i=4,...,n + * + * Optionally, one can add the following constraints to obtain a rank-deficient Jacobian + * + * s.t. [-inf] <= 4*x_1 + 2*x_3 <= [ 19 ] (rnkdef-con1) + * 4*x_1 + 2*x_2 == 10 (rnkdef-con2) + * + * other parameters are: + * convex_obj: set to 1 to have a convex problem, otherwise set it to 0. + * scale_quartic_obj_term: scaling factor for the quartic term in the objective (1.0 by default). + * + */ +SparseRajaEx2::SparseRajaEx2(std::string mem_space, + int n, + bool convex_obj, + bool rankdefic_Jac_eq, + bool rankdefic_Jac_ineq, + double scal_neg_obj) + : mem_space_{mem_space}, + convex_obj_{convex_obj}, + rankdefic_eq_{rankdefic_Jac_eq}, + rankdefic_ineq_{rankdefic_Jac_ineq}, + n_vars_{n}, + scal_neg_obj_{scal_neg_obj}, + n_cons_{2} +{ + // Make sure mem_space_ is uppercase + transform(mem_space_.begin(), mem_space_.end(), mem_space_.begin(), ::toupper); + + assert(n>=3 && "number of variables should be greater than 3 for this example"); + if(n>3) { + n_cons_ += n-3; + } + n_cons_ += rankdefic_eq_ + rankdefic_ineq_; +} + +SparseRajaEx2::~SparseRajaEx2() +{ +} + +bool SparseRajaEx2::get_prob_sizes(size_type& n, size_type& m) +{ + n = n_vars_; + m = n_cons_; + return true; +} + +bool SparseRajaEx2::get_vars_info(const size_type& n, double *xlow, double* xupp, NonlinearityType* type) +{ + assert(n==n_vars_); + + RAJA::forall(RAJA::RangeSegment(0, 3), + RAJA_LAMBDA(RAJA::Index_type i) + { + if(i==0) { + xlow[i] = -1e20; + xupp[i] = 1e20; + } else if(i==1) { + xlow[i] = 0.0; + xupp[i] = 1e20; + } else { + xlow[i] = 1.0; + xupp[i] = 10.0; + } + }); + + if(n>3) { + RAJA::forall(RAJA::RangeSegment(3, n), + RAJA_LAMBDA(RAJA::Index_type i) + { + xlow[i] = 0.5; + xupp[i] = 1e20; + }); + } + + RAJA::forall(RAJA::RangeSegment(0, n), + RAJA_LAMBDA(RAJA::Index_type i) + { + type[i] = hiopNonlinear; + }); + return true; +} + +bool SparseRajaEx2::get_cons_info(const size_type& m, double* clow, double* cupp, NonlinearityType* type) +{ + assert(m==n_cons_); + + RAJA::forall(RAJA::RangeSegment(0, 2), + RAJA_LAMBDA(RAJA::Index_type i) + { + if(i==0) { + clow[i] = 10.0; + cupp[i] = 10.0; + } else { + clow[i] = 5.0; + cupp[i] = 1e20; + } + }); + + if(n>3) { + RAJA::forall(RAJA::RangeSegment(3, m), + RAJA_LAMBDA(RAJA::Index_type conidx) + { + clow[conidx] = 1.0; + cupp[conidx] = 2*n; + }); + } + + if(rankdefic_ineq_) { + RAJA::forall(RAJA::RangeSegment(m, m+1), + RAJA_LAMBDA(RAJA::Index_type conidx) + { + // [-inf] <= 4*x_1 + 2*x_3 <= [ 19 ] + clow[conidx] = -1e+20; + cupp[conidx] = 19.; + }); + } + + if(rankdefic_eq_) { + RAJA::forall(RAJA::RangeSegment(m+rankdefic_ineq_, m+rankdefic_ineq_+1), + RAJA_LAMBDA(RAJA::Index_type conidx) + { + // 4*x_1 + 2*x_2 == 10 + clow[conidx] = 10; + cupp[conidx] = 10; + }); + } + + RAJA::forall(RAJA::RangeSegment(0, m), + RAJA_LAMBDA(RAJA::Index_type i) + { + type[i] = hiopInterfaceBase::hiopNonlinear; + }); + + return true; +} + +bool SparseRajaEx2::get_sparse_blocks_info(int& nx, + int& nnz_sparse_Jaceq, + int& nnz_sparse_Jacineq, + int& nnz_sparse_Hess_Lagr) +{ + nx = n_vars_;; + nnz_sparse_Jaceq = 2 + 2*rankdefic_eq_; + nnz_sparse_Jacineq = 2 + 2*(n_vars_-3) + 2*rankdefic_ineq_; + nnz_sparse_Hess_Lagr = n_vars_; + return true; +} + +bool SparseRajaEx2::eval_f(const size_type& n, const double* x, bool new_x, double& obj_value) +{ + assert(n==n_vars_); + obj_value=0.; + { + int convex_obj = (int) convex_obj_; + double scal_neg_obj = scal_neg_obj_; + + RAJA::ReduceSum aux(0); + RAJA::forall(RAJA::RangeSegment(0, n), + RAJA_LAMBDA(RAJA::Index_type i) + { + aux += (2*convex_obj-1) * scal_neg_obj * 0.25 * std::pow(x[i]-1., 4) + 0.5 * std::pow(x[i], 2); + }); + obj_value += aux.get(); + } + return true; +} + +bool SparseRajaEx2::eval_grad_f(const size_type& n, const double* x, bool new_x, double* gradf) +{ + assert(n==n_vars_); + { + int convex_obj = (int) convex_obj_; + double scal_neg_obj = scal_neg_obj_; + + RAJA::forall(RAJA::RangeSegment(0, n), + RAJA_LAMBDA(RAJA::Index_type i) + { + gradf[i] = (2*convex_obj-1) * scal_neg_obj * std::pow(x[i]-1.,3) + x[i]; + }); + } + return true; +} + +bool SparseRajaEx2::eval_cons(const size_type& n, const size_type& m, const double* x, bool new_x, double* cons) +{ + assert(n==n_vars_); assert(m==n_cons_); + assert(n_cons_==2+n-3+rankdefic_eq_+rankdefic_ineq_); + + RAJA::forall(RAJA::RangeSegment(0, 2), + RAJA_LAMBDA(RAJA::Index_type i) + { + if(i==0) { + // --- constraint 1 body ---> 4*x_1 + 2*x_2 == 10 + cons[i] = 4*x[0] + 2*x[1]; + } else if(i==1) { + // --- constraint 2 body ---> 2*x_1 + x_3 + cons[i] = 2*x[0] + 1*x[2]; + } + }); + + RAJA::forall(RAJA::RangeSegment(2, n-1), + RAJA_LAMBDA(RAJA::Index_type i) + { + // --- constraint 3 body ---> 2*x_1 + 0.5*x_i, for i>=4 + cons[i] = 2*x[0] + 0.5*x[i+1]; + }); + + + if(rankdefic_ineq_) { + RAJA::forall(RAJA::RangeSegment(n-1, n), + RAJA_LAMBDA(RAJA::Index_type i) + { + // [-inf] <= 4*x_1 + 2*x_3 <= [ 19 ] + cons[i] = 4*x[0] + 2*x[2]; + }); + } + + if(rankdefic_eq_) { + RAJA::forall(RAJA::RangeSegment(n-1+rankdefic_ineq_, n+rankdefic_ineq_), + RAJA_LAMBDA(RAJA::Index_type i) + { + // 4*x_1 + 2*x_2 == 10 + cons[i] = 4*x[0] + 2*x[1]; + }); + } + assert(conidx==m); + + return true; +} + +bool SparseRajaEx2::eval_Jac_cons(const size_type& n, + const size_type& m, + const double* x, + bool new_x, + const int& nnzJacS, + index_type* iJacS, + index_type* jJacS, + double* MJacS) +{ + assert(n==n_vars_); assert(m==n_cons_); + assert(n>=3); + + assert(nnzJacS == 4 + 2*(n-3) + 2*rankdefic_eq_ + 2*rankdefic_ineq_); + + RAJA::forall(RAJA::RangeSegment(0, 2), + RAJA_LAMBDA(RAJA::Index_type itrow) + { + if(itrow==0) { + // --- constraint 1 body ---> 4*x_1 + 2*x_2 == 10 + iJacS[2*itrow] = itrow; + jJacS[2*itrow] = 0; + iJacS[2*itrow+1] = itrow; + jJacS[2*itrow+1] = 1; + if(MJacS != nullptr) { + MJacS[2*itrow] = 4.0; + MJacS[2*itrow+1] = 2.0; + } + } else if(itrow==1) { + // --- constraint 2 body ---> 2*x_1 + x_3 + iJacS[2*itrow] = itrow; + jJacS[2*itrow] = 0; + iJacS[2*itrow+1] = itrow; + jJacS[2*itrow+1] = 2; + if(MJacS != nullptr) { + MJacS[2*itrow] = 2.0; + MJacS[2*itrow+1] = 1.0; + } + } + }); + + RAJA::forall(RAJA::RangeSegment(2, n-1), + RAJA_LAMBDA(RAJA::Index_type itrow) + { + // --- constraint 3 body ---> 2*x_1 + 0.5*x_i, for i>=4 + iJacS[2*itrow] = itrow; + jJacS[2*itrow] = 0; + iJacS[2*itrow+1] = itrow; + jJacS[2*itrow+1] = itrow+1; + if(MJacS != nullptr) { + MJacS[2*itrow] = 2.0; + MJacS[2*itrow+1] = 0.5; + } + }); + + + if(rankdefic_ineq_) { + RAJA::forall(RAJA::RangeSegment(n-1, n), + RAJA_LAMBDA(RAJA::Index_type itrow) + { + // [-inf] <= 4*x_1 + 2*x_3 <= [ 19 ] + iJacS[2*itrow] = itrow; + jJacS[2*itrow] = 0; + iJacS[2*itrow+1] = itrow; + jJacS[2*itrow+1] = 2; + if(MJacS != nullptr) { + MJacS[2*itrow] = 4.0; + MJacS[2*itrow+1] = 2.0; + } + }); + } + + if(rankdefic_eq_) { + RAJA::forall(RAJA::RangeSegment(n-1+rankdefic_ineq_, n+rankdefic_ineq_), + RAJA_LAMBDA(RAJA::Index_type i) + { + // 4*x_1 + 2*x_2 == 10 + iJacS[2*itrow] = itrow; + jJacS[2*itrow] = 0; + iJacS[2*itrow+1] = itrow; + jJacS[2*itrow+1] = 1; + if(MJacS != nullptr) { + MJacS[2*itrow] = 4.0; + MJacS[2*itrow+1] = 2.0; + } + }); + } + + return true; +} + +bool SparseRajaEx2::eval_Hess_Lagr(const size_type& n, + const size_type& m, + const double* x, + bool new_x, + const double& obj_factor, + const double* lambda, + bool new_lambda, + const size_type& nnzHSS, + index_type* iHSS, + index_type* jHSS, + double* MHSS) +{ + //Note: lambda is not used since all the constraints are linear and, therefore, do + //not contribute to the Hessian of the Lagrangian + assert(nnzHSS == n); + + if(iHSS!=NULL && jHSS!=NULL) { + RAJA::forall(RAJA::RangeSegment(0, n), + RAJA_LAMBDA(RAJA::Index_type i) + { + iHSS[i] = i; + jHSS[i] = i; + }); + } + + int convex_obj = (int) convex_obj_; + double scal_neg_obj = scal_neg_obj_; + if(MHSS!=NULL) { + RAJA::forall(RAJA::RangeSegment(0, n), + RAJA_LAMBDA(RAJA::Index_type i) + { + MHSS[i] = obj_factor * ( (2*convex_obj-1) * scal_neg_obj * 3 * std::pow(x[i]-1., 2) + 1); + }); + } + return true; +} + + + + +bool SparseRajaEx2::get_starting_point(const size_type& n, double* x0) +{ + assert(n==n_vars_); + RAJA::forall(RAJA::RangeSegment(0, n), + RAJA_LAMBDA(RAJA::Index_type i) + { + x0[i] = 0.0; + }); + return true; +} diff --git a/src/Drivers/Sparse/NlpSparseRajaEx2.hpp b/src/Drivers/Sparse/NlpSparseRajaEx2.hpp new file mode 100644 index 000000000..acec2cb13 --- /dev/null +++ b/src/Drivers/Sparse/NlpSparseRajaEx2.hpp @@ -0,0 +1,309 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC. +// Produced at the Lawrence Livermore National Laboratory (LLNL). +// LLNL-CODE-742473. All rights reserved. +// +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// Please also read "Additional BSD Notice" below. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// i. Redistributions of source code must retain the above copyright notice, this list +// of conditions and the disclaimer below. +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// other materials provided with the distribution. +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written +// permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Additional BSD Notice +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// Contract No. DE-AC52-07NA27344 with the DOE. +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any +// information, apparatus, product, or process disclosed, or represents that its use would +// not infringe privately-owned rights. +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// product endorsement purposes. + +/** + * @file NlpSparseRajaEx2.hpp + * + * @author Nai-Yuan Chiang , LNNL + * + */ + +#ifndef HIOP_EXAMPLE_SPARSE_RAJA_EX2 +#define HIOP_EXAMPLE_SPARSE_RAJA_EX2 + +#include "hiopInterface.hpp" +#include "LinAlgFactory.hpp" + +#ifdef HIOP_USE_MPI +#include "mpi.h" +#else +#define MPI_COMM_WORLD 0 +#define MPI_Comm int +#endif + +#include +#include //for memcpy +#include +#include + +using size_type = hiop::size_type; +using index_type = hiop::index_type; + +/** Nonlinear *highly nonconvex* and *rank deficient* problem test for the Filter IPM + * Newton of HiOp. It uses a Sparse NLP formulation. The problem is based on SparseEx2. + * + * min (2*convex_obj-1)*scal*sum 1/4* { (x_{i}-1)^4 : i=1,...,n} + 0.5x^Tx + * s.t. + * 4*x_1 + 2*x_2 == 10 + * 5<= 2*x_1 + x_3 + * 1<= 2*x_1 + 0.5*x_i <= 2*n, for i=4,...,n + * x_1 free + * 0.0 <= x_2 + * 1.0 <= x_3 <= 10 + * x_i >=0.5, i=4,...,n + * + * Optionally, one can add the following constraints to obtain a rank-deficient Jacobian + * + * s.t. [-inf] <= 4*x_1 + 2*x_3 <= [ 19 ] (rnkdef-con1 --- ineq con) + * 4*x_1 + 2*x_2 == 10 (rnkdef-con2 --- eq con) + * + * other parameters are: + * convex_obj: set to 1 to have a convex problem, otherwise set it to 0 + * scale_quartic_obj_term: scaling factor for the quartic term in the objective (1.0 by default). + * + * @note All pointers marked as "managed by Umpire" are allocated by HiOp using the + * Umpire's API. They all are addresses in the same memory space; however, the memory + * space can be host (typically CPU), device (typically GPU), or unified memory (um) + * spaces as per Umpire specification. The selection of the memory space is done via + * the option "mem_space" of HiOp. It is the responsibility of the implementers of + * the HiOp's interfaces (such as the hiop::hiopInterfaceMDS used in this example) to + * work with the "managed by Umpire" pointers in the same memory space as the one + * specified by the "mem_space" option. + * + */ +class SparseRajaEx2 : public hiop::hiopInterfaceSparse +{ +public: + SparseRajaEx2(std::string mem_space, + int n, + bool convex_obj, + bool rankdefic_Jac_eq, + bool rankdefic_Jac_ineq, + double scal_neg_obj = 1.0); + virtual ~SparseRajaEx2(); + + /** + * @brief Number of variables and constraints. + */ + virtual bool get_prob_sizes(size_type& n, size_type& m); + + /** + * @brief Get types and bounds on the variables. + * + * @param[in] n number of variables + * @param[out] ixlow array with lower bounds (managed by Umpire) + * @param[out] ixupp array with upper bounds (managed by Umpire) + * @param[out] type array with the variable types (on host) + */ + virtual bool get_vars_info(const size_type& n, double *xlow, double* xupp, NonlinearityType* type); + + /** + * Get types and bounds corresponding to constraints. An equality constraint is specified + * by setting the lower and upper bounds equal. + * + * @param[in] m Number of constraints + * @param[out] iclow array with lower bounds (managed by Umpire) + * @param[out] icupp array with upper bounds (managed by Umpire) + * @param[out] type array with the variable types (on host) + */ + virtual bool get_cons_info(const size_type& m, double* clow, double* cupp, NonlinearityType* type); + + /** + * Returns the sizes and number of nonzeros of the sparse blocks + * + * @param[out] nx number of variables + * @param[out] nnz_sparse_Jace number of nonzeros in the Jacobian of the equalities w.r.t. + * sparse variables + * @param[out] nnz_sparse_Jaci number of nonzeros in the Jacobian of the inequalities w.r.t. + * sparse variables + * @param[out] nnz_sparse_Hess_Lagr number of nonzeros in the (sparse) Hessian + */ + virtual bool get_sparse_blocks_info(index_type& nx, + index_type& nnz_sparse_Jace, + index_type& nnz_sparse_Jaci, + index_type& nnz_sparse_Hess_Lagr); + + /** + * Evaluate objective. + * + * @param[in] n number of variables + * @param[in] x array with the optimization variables or point at which to evaluate + * (managed by Umpire) + * @param[in] new_x indicates whether any of the other eval functions have been + * evaluated previously (false) or not (true) at x + * @param[out] obj_value the objective function value. + */ + virtual bool eval_f(const size_type& n, const double* x, bool new_x, double& obj_value); + + /** + * Evaluate a subset of the constraints specified by idx_cons + * + * @param[in] num_cons number of constraints to evaluate (size of idx_cons array) + * @param[in] idx_cons indexes of the constraints to evaluate (managed by Umpire) + * @param[in] x the point at which to evaluate (managed by Umpire) + * @param[in] new_x indicates whether any of the other eval functions have been evaluated + * previously (false) or not (true) at x + * @param[out] cons array with values of the constraints (managed by Umpire, size num_cons) + */ + virtual bool eval_cons(const size_type& n, + const size_type& m, + const size_type& num_cons, + const index_type* idx_cons, + const double* x, + bool new_x, + double* cons) + { + //return false so that HiOp will rely on the constraint evaluator defined below + return false; + } + + virtual bool eval_cons(const size_type& n, + const size_type& m, + const double* x, + bool new_x, + double* cons); + + /** + * Evaluation of the gradient of the objective. + * + * @param[in] n number of variables + * @param[in] x array with the optimization variables or point at which to evaluate + * (managed by Umpire) + * @param[in] new_x indicates whether any of the other eval functions have been evaluated + * previously (false) or not (true) at x + * @param[out] gradf array with the values of the gradient (managed by Umpire) + */ + virtual bool eval_grad_f(const size_type& n, + const double* x, + bool new_x, + double* gradf); + + /** + * Evaluates the Jacobian of the constraints. Please check the user manual and the + * documentation of hiop::hiopInterfaceMDS for a detailed discussion of how the last + * four arguments are expected to behave. + * + * @param[in] n number of variables + * @param[in] m Number of constraints + * @param[in] num_cons number of constraints to evaluate (size of idx_cons array) + * @param[in] idx_cons indexes of the constraints to evaluate (managed by Umpire) + * @param[in] x the point at which to evaluate (managed by Umpire) + * @param[in] new_x indicates whether any of the other eval functions have been evaluated + * previously (false) or not (true) at x + * @param[in] nnzJacS number of nonzeros in the sparse Jacobian + * @param[out] iJacS array of row indexes in the sparse Jacobian (managed by Umpire) + * @param[out] jJacS array of column indexes in the sparse Jacobian (managed by Umpire) + * @param[out] MJacS array of nonzero values in the sparse Jacobian (managed by Umpire) + */ + virtual bool eval_Jac_cons(const size_type& n, + const size_type& m, + const size_type& num_cons, + const index_type* idx_cons, + const double* x, + bool new_x, + const size_type& nnzJacS, + index_type* iJacS, + index_type* jJacS, + double* MJacS) + { + //return false so that HiOp will rely on the Jacobian evaluator defined below + return false; + } + + /// Similar to the above, but not used in this example. + virtual bool eval_Jac_cons(const size_type& n, + const size_type& m, + const double* x, + bool new_x, + const size_type& nnzJacS, + index_type* iJacS, + index_type* jJacS, + double* MJacS); + + + /** + * Evaluate the Hessian of the Lagrangian function. Please consult the user manual for a + * detailed discussion of the form the Lagrangian function takes. + * + * @param[in] n number of variables + * @param[in] m Number of constraints + * @param[in] x the point at which to evaluate (managed by Umpire) + * @param[in] new_x indicates whether any of the other eval functions have been evaluated + * previously (false) or not (true) at x + * @param[in] obj_factor scalar that multiplies the objective term in the Lagrangian function + * @param[in] lambda array with values of the multipliers used by the Lagrangian function + * @param[in] new_lambda indicates whether lambda values changed since last call + * @param[in] nnzHSS number of nonzeros in the (sparse) Hessian w.r.t. sparse variables + * @param[out] iHSS array of row indexes in the Hessian w.r.t. sparse variables + * (managed by Umpire) + * @param[out] jHSS array of column indexes in the Hessian w.r.t. sparse variables + * (managed by Umpire) + * @param[out] MHSS array of nonzero values in the Hessian w.r.t. sparse variables + * (managed by Umpire) + */ + virtual bool eval_Hess_Lagr(const size_type& n, + const size_type& m, + const double* x, + bool new_x, + const double& obj_factor, + const double* lambda, + bool new_lambda, + const size_type& nnzHSS, + index_type* iHSS, + index_type* jHSS, + double* MHSS); + + /** + * Implementation of the primal starting point specification + * + * @param[in] n number of variables + * @param[in] x0 the primal starting point(managed by Umpire) + */ + virtual bool get_starting_point(const size_type&n, double* x0); + +private: + int n_vars_; + int n_cons_; + bool convex_obj_; + bool rankdefic_eq_; + bool rankdefic_ineq_; + double scal_neg_obj_; + + std::string mem_space_; +}; + +#endif From 994f748c09991503d12513b108f7021d265c33da Mon Sep 17 00:00:00 2001 From: nychiang Date: Thu, 23 Feb 2023 22:43:33 -0800 Subject: [PATCH 02/10] minor changes --- src/Drivers/MDS/NlpMdsRajaEx1.hpp | 4 ++-- src/Drivers/Sparse/NlpSparseEx2.cpp | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Drivers/MDS/NlpMdsRajaEx1.hpp b/src/Drivers/MDS/NlpMdsRajaEx1.hpp index c43346ccd..fef3747d6 100644 --- a/src/Drivers/MDS/NlpMdsRajaEx1.hpp +++ b/src/Drivers/MDS/NlpMdsRajaEx1.hpp @@ -170,7 +170,7 @@ class MdsEx1 : public hiop::hiopInterfaceMDS * Returns the sizes and number of nonzeros of the sparse and dense blocks within MDS * * @param[out] nx_sparse number of sparse variables - * @param[out] nx_ense number of dense variables + * @param[out] nx_dense number of dense variables * @param[out] nnz_sparse_Jace number of nonzeros in the Jacobian of the equalities w.r.t. * sparse variables * @param[out] nnz_sparse_Jaci number of nonzeros in the Jacobian of the inequalities w.r.t. @@ -233,7 +233,7 @@ class MdsEx1 : public hiop::hiopInterfaceMDS * @param[in] x array with the optimization variables or point at which to evaluate * (managed by Umpire) * @param[in] new_x indicates whether any of the other eval functions have been evaluated - * previously (false) or not (true) at x + * previously (false) or not (true) at x * @param[out] gradf array with the values of the gradient (managed by Umpire) */ bool eval_grad_f(const size_type& n, const double* x, bool new_x, double* gradf); diff --git a/src/Drivers/Sparse/NlpSparseEx2.cpp b/src/Drivers/Sparse/NlpSparseEx2.cpp index 72073e914..ce4a0615e 100644 --- a/src/Drivers/Sparse/NlpSparseEx2.cpp +++ b/src/Drivers/Sparse/NlpSparseEx2.cpp @@ -153,7 +153,6 @@ bool SparseEx2::eval_cons(const size_type& n, return false; } -/* Four constraints no matter how large n is */ bool SparseEx2::eval_cons(const size_type& n, const size_type& m, const double* x, bool new_x, double* cons) { assert(n==n_vars); assert(m==n_cons); @@ -170,7 +169,7 @@ bool SparseEx2::eval_cons(const size_type& n, const size_type& m, const double* // --- constraint 2 body ---> 2*x_1 + x_3 cons[conidx++] += 2*x[0] + 1*x[2]; - // --- constraint 3 body ---> 2*x_1 + 0.5*x_i, for i>=4 + // --- constraint 3 body ---> 2*x_1 + 0.5*x_i, for i>=4 for(auto i=3; i Date: Mon, 27 Feb 2023 18:00:50 -0800 Subject: [PATCH 03/10] update cmake file --- src/Drivers/Sparse/CMakeLists.txt | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/Drivers/Sparse/CMakeLists.txt b/src/Drivers/Sparse/CMakeLists.txt index 646c6e43b..1d6c505a5 100644 --- a/src/Drivers/Sparse/CMakeLists.txt +++ b/src/Drivers/Sparse/CMakeLists.txt @@ -7,6 +7,19 @@ target_link_libraries(NlpSparseEx2.exe HiOp::HiOp) add_executable(NlpSparseEx3.exe NlpSparseEx3.cpp NlpSparseEx3Driver.cpp) target_link_libraries(NlpSparseEx3.exe HiOp::HiOp) +if(HIOP_USE_RAJA) + if(HIOP_USE_GPU AND HIOP_USE_CUDA) + set_source_files_properties( + NlpSparseRajaEx2.cpp + NlpSparseRajaEx2Driver.cpp + PROPERTIES LANGUAGE CUDA + ) + endif() + add_executable(NlpSparseRajaEx2.exe NlpSparseRajaEx2Driver.cpp NlpSparseRajaEx2.cpp) + target_link_libraries(NlpSparseRajaEx2.exe HiOp::HiOp) + install(TARGETS NlpSparseRajaEx2.exe DESTINATION bin) +endif() + if(HIOP_BUILD_SHARED) add_executable(NlpSparseCEx1.exe NlpSparseCEx1.c) target_link_libraries(NlpSparseCEx1.exe HiOp::HiOp) @@ -52,6 +65,11 @@ if(HIOP_USE_GINKGO) add_test(NAME NlpSparse2_6 COMMAND ${RUNCMD} "$" "500" "-ginkgo_hip" "-inertiafree" "-selfcheck") endif(HIOP_USE_HIP) endif(HIOP_USE_GINKGO) + +if(HIOP_USE_RAJA) + add_test(NAME NlpSparseRaja2_1 COMMAND ${RUNCMD} bash -c "$ 500 -selfcheck ") +endif() + add_test(NAME NlpSparse3_1 COMMAND ${RUNCMD} "$" "500" "-selfcheck") if(HIOP_BUILD_SHARED AND NOT HIOP_USE_GPU) add_test(NAME NlpSparseCinterface COMMAND ${RUNCMD} "$") From f051f7d066491d95aaadd8b4136e156ff6570f34 Mon Sep 17 00:00:00 2001 From: nychiang Date: Mon, 27 Feb 2023 18:30:35 -0800 Subject: [PATCH 04/10] add driver --- src/Drivers/Sparse/NlpSparseRajaEx2.cpp | 94 +++--- src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp | 318 ++++++++++++++++++ src/LinAlg/hiopLinSolver.cpp | 8 +- src/LinAlg/hiopLinSolver.hpp | 6 +- src/LinAlg/hiopLinSolverSparseCUSOLVER.cpp | 78 ++++- src/LinAlg/hiopLinSolverSparseCUSOLVER.hpp | 26 +- src/LinAlg/hiopMatrixRajaSparseTriplet.hpp | 4 +- .../hiopMatrixRajaSparseTripletImpl.hpp | 14 +- src/Optimization/hiopKKTLinSysSparse.cpp | 44 ++- .../hiopKKTLinSysSparseCondensed.hpp | 1 - 10 files changed, 509 insertions(+), 84 deletions(-) create mode 100644 src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp diff --git a/src/Drivers/Sparse/NlpSparseRajaEx2.cpp b/src/Drivers/Sparse/NlpSparseRajaEx2.cpp index d1b9c2552..51c8a7232 100644 --- a/src/Drivers/Sparse/NlpSparseRajaEx2.cpp +++ b/src/Drivers/Sparse/NlpSparseRajaEx2.cpp @@ -52,13 +52,13 @@ * */ -#include "SparseRajaEx2.hpp" +#include "NlpSparseRajaEx2.hpp" #include #include #include -#include +//#include #include #include //for memcpy @@ -77,18 +77,18 @@ #include "ExecPoliciesRajaCudaImpl.hpp" using ex2_raja_exec = hiop::ExecRajaPoliciesBackend::hiop_raja_exec; using ex2_raja_reduce = hiop::ExecRajaPoliciesBackend::hiop_raja_reduce; -using hiopMatrixRajaSparse = hiop::hiopMatrixRajaSparseTriplet; +//using hiopMatrixRajaSparse = hiop::hiopMatrixRajaSparseTriplet; #elif defined(HIOP_USE_HIP) #include using ex2_raja_exec = hiop::ExecRajaPoliciesBackend::hiop_raja_exec; using ex2_raja_reduce = hiop::ExecRajaPoliciesBackend::hiop_raja_reduce; -using hiopMatrixRajaSparse = hiop::hiopMatrixRajaSparseTriplet; +//using hiopMatrixRajaSparse = hiop::hiopMatrixRajaSparseTriplet; #else //#if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) #include using ex2_raja_exec = hiop::ExecRajaPoliciesBackend::hiop_raja_exec; using ex2_raja_reduce = hiop::ExecRajaPoliciesBackend::hiop_raja_reduce; -using hiopMatrixRajaSparse = hiop::hiopMatrixRajaSparseTriplet; +//using hiopMatrixRajaSparse = hiop::hiopMatrixRajaSparseTriplet; #endif @@ -154,7 +154,7 @@ bool SparseRajaEx2::get_vars_info(const size_type& n, double *xlow, double* xupp { assert(n==n_vars_); - RAJA::forall(RAJA::RangeSegment(0, 3), + RAJA::forall(RAJA::RangeSegment(0, 3), RAJA_LAMBDA(RAJA::Index_type i) { if(i==0) { @@ -170,7 +170,7 @@ bool SparseRajaEx2::get_vars_info(const size_type& n, double *xlow, double* xupp }); if(n>3) { - RAJA::forall(RAJA::RangeSegment(3, n), + RAJA::forall(RAJA::RangeSegment(3, n), RAJA_LAMBDA(RAJA::Index_type i) { xlow[i] = 0.5; @@ -178,7 +178,7 @@ bool SparseRajaEx2::get_vars_info(const size_type& n, double *xlow, double* xupp }); } - RAJA::forall(RAJA::RangeSegment(0, n), + RAJA::forall(RAJA::RangeSegment(0, n), RAJA_LAMBDA(RAJA::Index_type i) { type[i] = hiopNonlinear; @@ -189,8 +189,9 @@ bool SparseRajaEx2::get_vars_info(const size_type& n, double *xlow, double* xupp bool SparseRajaEx2::get_cons_info(const size_type& m, double* clow, double* cupp, NonlinearityType* type) { assert(m==n_cons_); + size_type n = n_vars_; - RAJA::forall(RAJA::RangeSegment(0, 2), + RAJA::forall(RAJA::RangeSegment(0, 2), RAJA_LAMBDA(RAJA::Index_type i) { if(i==0) { @@ -203,16 +204,16 @@ bool SparseRajaEx2::get_cons_info(const size_type& m, double* clow, double* cupp }); if(n>3) { - RAJA::forall(RAJA::RangeSegment(3, m), + RAJA::forall(RAJA::RangeSegment(2, 2+n-3), RAJA_LAMBDA(RAJA::Index_type conidx) { clow[conidx] = 1.0; - cupp[conidx] = 2*n; + cupp[conidx] = 2*n_vars_; }); } if(rankdefic_ineq_) { - RAJA::forall(RAJA::RangeSegment(m, m+1), + RAJA::forall(RAJA::RangeSegment(2+n-3, 2+n-3+1), RAJA_LAMBDA(RAJA::Index_type conidx) { // [-inf] <= 4*x_1 + 2*x_3 <= [ 19 ] @@ -222,7 +223,7 @@ bool SparseRajaEx2::get_cons_info(const size_type& m, double* clow, double* cupp } if(rankdefic_eq_) { - RAJA::forall(RAJA::RangeSegment(m+rankdefic_ineq_, m+rankdefic_ineq_+1), + RAJA::forall(RAJA::RangeSegment(2+n-3+rankdefic_ineq_, m), RAJA_LAMBDA(RAJA::Index_type conidx) { // 4*x_1 + 2*x_2 == 10 @@ -231,7 +232,7 @@ bool SparseRajaEx2::get_cons_info(const size_type& m, double* clow, double* cupp }); } - RAJA::forall(RAJA::RangeSegment(0, m), + RAJA::forall(RAJA::RangeSegment(0, m), RAJA_LAMBDA(RAJA::Index_type i) { type[i] = hiopInterfaceBase::hiopNonlinear; @@ -304,16 +305,15 @@ bool SparseRajaEx2::eval_cons(const size_type& n, const size_type& m, const doub } }); - RAJA::forall(RAJA::RangeSegment(2, n-1), + RAJA::forall(RAJA::RangeSegment(2, 2+n-3), RAJA_LAMBDA(RAJA::Index_type i) { // --- constraint 3 body ---> 2*x_1 + 0.5*x_i, for i>=4 cons[i] = 2*x[0] + 0.5*x[i+1]; }); - if(rankdefic_ineq_) { - RAJA::forall(RAJA::RangeSegment(n-1, n), + RAJA::forall(RAJA::RangeSegment(2+n-3, 2+n-3+1), RAJA_LAMBDA(RAJA::Index_type i) { // [-inf] <= 4*x_1 + 2*x_3 <= [ 19 ] @@ -322,14 +322,13 @@ bool SparseRajaEx2::eval_cons(const size_type& n, const size_type& m, const doub } if(rankdefic_eq_) { - RAJA::forall(RAJA::RangeSegment(n-1+rankdefic_ineq_, n+rankdefic_ineq_), + RAJA::forall(RAJA::RangeSegment(2+n-3+rankdefic_ineq_, m), RAJA_LAMBDA(RAJA::Index_type i) { // 4*x_1 + 2*x_2 == 10 cons[i] = 4*x[0] + 2*x[1]; }); } - assert(conidx==m); return true; } @@ -353,20 +352,24 @@ bool SparseRajaEx2::eval_Jac_cons(const size_type& n, { if(itrow==0) { // --- constraint 1 body ---> 4*x_1 + 2*x_2 == 10 - iJacS[2*itrow] = itrow; - jJacS[2*itrow] = 0; - iJacS[2*itrow+1] = itrow; - jJacS[2*itrow+1] = 1; + if(iJacS !=nullptr && jJacS != nullptr) { + iJacS[2*itrow] = itrow; + jJacS[2*itrow] = 0; + iJacS[2*itrow+1] = itrow; + jJacS[2*itrow+1] = 1; + } if(MJacS != nullptr) { MJacS[2*itrow] = 4.0; MJacS[2*itrow+1] = 2.0; } } else if(itrow==1) { // --- constraint 2 body ---> 2*x_1 + x_3 - iJacS[2*itrow] = itrow; - jJacS[2*itrow] = 0; - iJacS[2*itrow+1] = itrow; - jJacS[2*itrow+1] = 2; + if(iJacS !=nullptr && jJacS != nullptr) { + iJacS[2*itrow] = itrow; + jJacS[2*itrow] = 0; + iJacS[2*itrow+1] = itrow; + jJacS[2*itrow+1] = 2; + } if(MJacS != nullptr) { MJacS[2*itrow] = 2.0; MJacS[2*itrow+1] = 1.0; @@ -378,26 +381,29 @@ bool SparseRajaEx2::eval_Jac_cons(const size_type& n, RAJA_LAMBDA(RAJA::Index_type itrow) { // --- constraint 3 body ---> 2*x_1 + 0.5*x_i, for i>=4 - iJacS[2*itrow] = itrow; - jJacS[2*itrow] = 0; - iJacS[2*itrow+1] = itrow; - jJacS[2*itrow+1] = itrow+1; + if(iJacS !=nullptr && jJacS != nullptr) { + iJacS[2*itrow] = itrow; + jJacS[2*itrow] = 0; + iJacS[2*itrow+1] = itrow; + jJacS[2*itrow+1] = itrow+1; + } if(MJacS != nullptr) { MJacS[2*itrow] = 2.0; MJacS[2*itrow+1] = 0.5; } }); - if(rankdefic_ineq_) { RAJA::forall(RAJA::RangeSegment(n-1, n), RAJA_LAMBDA(RAJA::Index_type itrow) { // [-inf] <= 4*x_1 + 2*x_3 <= [ 19 ] - iJacS[2*itrow] = itrow; - jJacS[2*itrow] = 0; - iJacS[2*itrow+1] = itrow; - jJacS[2*itrow+1] = 2; + if(iJacS !=nullptr && jJacS != nullptr) { + iJacS[2*itrow] = itrow; + jJacS[2*itrow] = 0; + iJacS[2*itrow+1] = itrow; + jJacS[2*itrow+1] = 2; + } if(MJacS != nullptr) { MJacS[2*itrow] = 4.0; MJacS[2*itrow+1] = 2.0; @@ -407,13 +413,15 @@ bool SparseRajaEx2::eval_Jac_cons(const size_type& n, if(rankdefic_eq_) { RAJA::forall(RAJA::RangeSegment(n-1+rankdefic_ineq_, n+rankdefic_ineq_), - RAJA_LAMBDA(RAJA::Index_type i) + RAJA_LAMBDA(RAJA::Index_type itrow) { // 4*x_1 + 2*x_2 == 10 - iJacS[2*itrow] = itrow; - jJacS[2*itrow] = 0; - iJacS[2*itrow+1] = itrow; - jJacS[2*itrow+1] = 1; + if(iJacS !=nullptr && jJacS != nullptr) { + iJacS[2*itrow] = itrow; + jJacS[2*itrow] = 0; + iJacS[2*itrow+1] = itrow; + jJacS[2*itrow+1] = 1; + } if(MJacS != nullptr) { MJacS[2*itrow] = 4.0; MJacS[2*itrow+1] = 2.0; @@ -440,7 +448,7 @@ bool SparseRajaEx2::eval_Hess_Lagr(const size_type& n, //not contribute to the Hessian of the Lagrangian assert(nnzHSS == n); - if(iHSS!=NULL && jHSS!=NULL) { + if(iHSS!=nullptr && jHSS!=nullptr) { RAJA::forall(RAJA::RangeSegment(0, n), RAJA_LAMBDA(RAJA::Index_type i) { @@ -451,7 +459,7 @@ bool SparseRajaEx2::eval_Hess_Lagr(const size_type& n, int convex_obj = (int) convex_obj_; double scal_neg_obj = scal_neg_obj_; - if(MHSS!=NULL) { + if(MHSS!=nullptr) { RAJA::forall(RAJA::RangeSegment(0, n), RAJA_LAMBDA(RAJA::Index_type i) { diff --git a/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp b/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp new file mode 100644 index 000000000..4df767d99 --- /dev/null +++ b/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp @@ -0,0 +1,318 @@ +#include "NlpSparseRajaEx2.hpp" +#include "hiopNlpFormulation.hpp" +#include "hiopAlgFilterIPM.hpp" + +#include +#include + +#include +#include +#include + +using namespace hiop; + +static bool self_check(size_type n, double obj_value, const bool inertia_free); + +static bool parse_arguments(int argc, + char **argv, + size_type& n, + bool& self_check, + bool& inertia_free, + bool& use_cusolver, + bool& use_cusolver_lu, + bool& use_ginkgo, + bool& use_ginkgo_cuda, + bool& use_ginkgo_hip) +{ + self_check = false; + n = 3; + inertia_free = false; + use_cusolver = false; + use_cusolver_lu = false; + use_ginkgo = false; + use_ginkgo_cuda = false; + use_ginkgo_cuda = false; + switch(argc) { + case 1: + //no arguments + return true; + break; + case 5: //4 arguments + { + if(std::string(argv[4]) == "-selfcheck") { + self_check = true; + } else if(std::string(argv[4]) == "-inertiafree") { + inertia_free = true; + } else if(std::string(argv[4]) == "-cusolver") { + use_cusolver = true; + } else if(std::string(argv[4]) == "-ginkgo"){ + use_ginkgo = true; + } else if(std::string(argv[4]) == "-ginkgo_cuda"){ + use_ginkgo = true; + use_ginkgo_cuda = true; + } else if(std::string(argv[4]) == "-ginkgo_hip"){ + use_ginkgo = true; + use_ginkgo_hip = true; + } else { + n = std::atoi(argv[4]); + if(n<=0) { + return false; + } + } + } + case 4: //3 arguments + { + if(std::string(argv[3]) == "-selfcheck") { + self_check = true; + } else if(std::string(argv[3]) == "-inertiafree") { + inertia_free = true; + } else if(std::string(argv[3]) == "-cusolver") { + use_cusolver = true; + } else if(std::string(argv[3]) == "-ginkgo"){ + use_ginkgo = true; + } else if(std::string(argv[3]) == "-ginkgo_cuda"){ + use_ginkgo = true; + use_ginkgo_cuda = true; + } else if(std::string(argv[3]) == "-ginkgo_hip"){ + use_ginkgo = true; + use_ginkgo_hip = true; + } else { + n = std::atoi(argv[3]); + if(n<=0) { + return false; + } + } + } + case 3: //2 arguments + { + if(std::string(argv[2]) == "-selfcheck") { + self_check = true; + } else if(std::string(argv[2]) == "-inertiafree") { + inertia_free = true; + } else if(std::string(argv[2]) == "-cusolver") { + use_cusolver = true; + } else if(std::string(argv[2]) == "-ginkgo"){ + use_ginkgo = true; + } else if(std::string(argv[2]) == "-ginkgo_cuda"){ + use_ginkgo = true; + use_ginkgo_cuda = true; + } else if(std::string(argv[2]) == "-ginkgo_hip"){ + use_ginkgo = true; + use_ginkgo_hip = true; + } else { + n = std::atoi(argv[2]); + if(n<=0) { + return false; + } + } + } + case 2: //1 argument + { + if(std::string(argv[1]) == "-selfcheck") { + self_check = true; + } else if(std::string(argv[1]) == "-inertiafree") { + inertia_free = true; + } else if(std::string(argv[1]) == "-cusolver") { + use_cusolver = true; + } else if(std::string(argv[1]) == "-ginkgo"){ + use_ginkgo = true; + } else if(std::string(argv[1]) == "-ginkgo_cuda"){ + use_ginkgo = true; + use_ginkgo_cuda = true; + } else if(std::string(argv[1]) == "-ginkgo_hip"){ + use_ginkgo = true; + use_ginkgo_hip = true; + } else { + n = std::atoi(argv[1]); + if(n<=0) { + return false; + } + } + } + break; + default: + return false; // 4 or more arguments + } + +// If CUDA is not available, de-select cuSOLVER +#ifndef HIOP_USE_CUDA + if(use_cusolver) { + printf("HiOp built without CUDA support. "); + printf("Using default instead of cuSOLVER ...\n"); + use_cusolver = false; + } +#endif + +// Use cuSOLVER's LU factorization, if it was configured +#ifdef HIOP_USE_CUSOLVER_LU + if(use_cusolver) { + use_cusolver_lu = true; + } +#endif + + // If cuSOLVER was selected, but inertia free approach was not, add inertia-free + if(use_cusolver && !(inertia_free)) { + inertia_free = true; + printf("LU solver from cuSOLVER library requires inertia free approach. "); + printf("Enabling now ...\n"); + } + +// If Ginkgo is not available, de-select it. +#ifndef HIOP_USE_GINKGO + if(use_ginkgo) { + printf("HiOp not built with GINKGO support, using default linear solver ...\n"); + use_ginkgo = false; + } +#endif + + // If Ginkgo was selected, but inertia free approach was not, add inertia-free + if(use_ginkgo && !(inertia_free)) { + inertia_free = true; + printf("LU solver from GINKGO library requires inertia free approach. "); + printf("Enabling now ...\n"); + } + + return true; +}; + +static void usage(const char* exeName) +{ + printf("hiOp driver %s that solves a synthetic convex problem of variable size.\n", exeName); + printf("Usage: \n"); + printf(" '$ %s problem_size -inertiafree -selfcheck'\n", exeName); + printf("Arguments:\n"); + printf(" 'problem_size': number of decision variables [optional, default is 50]\n"); + printf(" '-inertiafree': indicate if inertia free approach should be used [optional]\n"); + printf(" '-selfcheck': compares the optimal objective with a previously saved value for the " + "problem specified by 'problem_size'. [optional]\n"); + printf(" '-cusolver': use cuSOLVER linear solver [optional]\n"); + printf(" '-ginkgo': use GINKGO linear solver [optional]\n"); +} + + +int main(int argc, char **argv) +{ + int rank=0; +#ifdef HIOP_USE_MPI + MPI_Init(&argc, &argv); + int comm_size; + int ierr = MPI_Comm_size(MPI_COMM_WORLD, &comm_size); assert(MPI_SUCCESS==ierr); + if(comm_size != 1) { + printf("[error] driver detected more than one rank but the driver should be run " + "in serial only; will exit\n"); + MPI_Finalize(); + return 1; + } +#endif + + // Set memory space where to create models and perform NLP solve +#ifdef HIOP_USE_GPU + std::string mem_space = "device"; +#else + std::string mem_space = "host"; +#endif + + bool selfCheck = false; + size_type n = 50; + bool inertia_free = false; + bool use_cusolver = false; + bool use_cusolver_lu = false; + bool use_ginkgo = false; + bool use_ginkgo_cuda = false; + bool use_ginkgo_hip = false; + if(!parse_arguments(argc, argv, n, selfCheck, inertia_free, use_cusolver, use_cusolver_lu, use_ginkgo, use_ginkgo_cuda, use_ginkgo_hip)) { + usage(argv[0]); +#ifdef HIOP_USE_MPI + MPI_Finalize(); +#endif + return 1; + } + + bool convex_obj = false; + bool rankdefic_Jac_eq = true; + bool rankdefic_Jac_ineq = true; + double scal_neg_obj = 0.1; + + //first test + { + SparseRajaEx2 nlp_interface(mem_space, n, convex_obj, rankdefic_Jac_eq, rankdefic_Jac_ineq, scal_neg_obj); + hiopNlpSparse nlp(nlp_interface); + nlp.options->SetStringValue("compute_mode", "gpu"); + nlp.options->SetStringValue("KKTLinsys", "xdycyd"); + + // only support cusolverLU right now, 2023.02.28 + //lsq initialization of the duals fails for this example since the Jacobian is rank deficient + //use zero initialization + nlp.options->SetStringValue("duals_init", "zero"); + nlp.options->SetStringValue("mem_space", "device"); + nlp.options->SetStringValue("fact_acceptor", "inertia_free"); + nlp.options->SetStringValue("linsol_mode", "speculative"); + + hiopAlgFilterIPMNewton solver(&nlp); + hiopSolveStatus status = solver.run(); + + double obj_value = solver.getObjective(); + + if(status<0) { + if(rank==0) { + printf("solver returned negative solve status: %d (with objective is %18.12e)\n", status, obj_value); + } +#ifdef HIOP_USE_MPI + MPI_Finalize(); +#endif + return -1; + } + + //this is used for "regression" testing when the driver is called with -selfcheck + if(selfCheck) { + if(!self_check(n, obj_value, inertia_free)) { +#ifdef HIOP_USE_MPI + MPI_Finalize(); +#endif + return -1; + } + } else { + if(rank==0) { + printf("Optimal objective: %22.14e. Solver status: %d\n", obj_value, status); + } + } + } + + +#ifdef HIOP_USE_MPI + MPI_Finalize(); +#endif + + return 0; +} + + +static bool self_check(size_type n, double objval, const bool inertia_free) +{ +#define num_n_saved 3 //keep this is sync with n_saved and objval_saved + const size_type n_saved[] = {50, 500, 10000}; + const double objval_saved[] = { 8.7754974e+00, 6.4322371e+01, 1.2369786e+03}; + +#define relerr 1e-6 + bool found=false; + for(int it=0; it relerr) { + printf("selfcheck failure. Objective (%18.12e) does not agree (%d digits) with the saved value (%18.12e) for n=%d.\n", + objval, -(int)log10(relerr), objval_saved[it], n); + return false; + } else { + printf("selfcheck success (%d digits)\n", -(int)log10(relerr)); + } + break; + } + } + + if(!found) { + printf("selfcheck: driver does not have the objective for n=%d saved. BTW, obj=%18.12e was obtained for this n.\n", n, objval); + return false; + } + + return true; +} diff --git a/src/LinAlg/hiopLinSolver.cpp b/src/LinAlg/hiopLinSolver.cpp index 0c3760dcc..480aa9e28 100644 --- a/src/LinAlg/hiopLinSolver.cpp +++ b/src/LinAlg/hiopLinSolver.cpp @@ -99,7 +99,8 @@ namespace hiop { //we default to triplet matrix for now; derived classes using CSR matrices will not call //this constructor (will call the 1-parameter constructor below) so they avoid creating //the triplet matrix - M_ = new hiopMatrixSparseTriplet(n, n, nnz); + M_ = LinearAlgebraFactory::create_matrix_sparse(nlp->options->GetString("mem_space"), n, n, nnz); + M_host_ = LinearAlgebraFactory::create_matrix_sparse("default", n, n, nnz); //this class will own `M_` sys_mat_owned_ = true; nlp_ = nlp; @@ -112,19 +113,22 @@ namespace hiop { sys_mat_owned_ = false; nlp_ = nlp; perf_report_ = "on"==hiop::tolower(nlp->options->GetString("time_kkt")); + M_host_ = nullptr; } hiopLinSolverSymSparse::hiopLinSolverSymSparse(hiopMatrixSparse* M, hiopNlpFormulation* nlp) { M_ = M; sys_mat_owned_ = false; + M_host_ = nullptr; nlp_ = nlp; perf_report_ = "on"==hiop::tolower(nlp->options->GetString("time_kkt")); } hiopLinSolverNonSymSparse::hiopLinSolverNonSymSparse(int n, int nnz, hiopNlpFormulation* nlp) { - M_ = new hiopMatrixSparseTriplet(n, n, nnz); + M_ = LinearAlgebraFactory::create_matrix_sparse(nlp->options->GetString("mem_space"), n, n, nnz); + M_host_ = nullptr; sys_mat_owned_ = false; nlp_ = nlp; perf_report_ = "on"==hiop::tolower(nlp->options->GetString("time_kkt")); diff --git a/src/LinAlg/hiopLinSolver.hpp b/src/LinAlg/hiopLinSolver.hpp index 79fe53bc5..db953be1f 100644 --- a/src/LinAlg/hiopLinSolver.hpp +++ b/src/LinAlg/hiopLinSolver.hpp @@ -136,7 +136,8 @@ class hiopLinSolverSparseBase : public hiopLinSolver public: hiopLinSolverSparseBase() : M_(nullptr), - sys_mat_owned_(true) + sys_mat_owned_(true), + M_host_(nullptr) { } @@ -144,6 +145,7 @@ class hiopLinSolverSparseBase : public hiopLinSolver { if(sys_mat_owned_) { delete M_; + delete M_host_; } } @@ -157,12 +159,14 @@ class hiopLinSolverSparseBase : public hiopLinSolver if(sys_mat_owned_) { assert(false && "system matrix should not have been allocated internally when calling set_sys_matrix"); delete M_; + delete M_host_; } sys_mat_owned_ = false; M_ = M; } protected: hiopMatrixSparse* M_; + hiopMatrixSparse* M_host_; // TODO: for cusolver, delete this later! bool sys_mat_owned_; }; diff --git a/src/LinAlg/hiopLinSolverSparseCUSOLVER.cpp b/src/LinAlg/hiopLinSolverSparseCUSOLVER.cpp index 957b31f33..e4a51f5f7 100644 --- a/src/LinAlg/hiopLinSolverSparseCUSOLVER.cpp +++ b/src/LinAlg/hiopLinSolverSparseCUSOLVER.cpp @@ -252,12 +252,12 @@ namespace hiop } else { // update matrix for(int k = 0; k < nnz_; k++) { - kVal_[k] = M_->M()[index_covert_CSR2Triplet_[k]]; + kVal_[k] = M_host_->M()[index_covert_CSR2Triplet_[k]]; } for(int i = 0; i < n_; i++) { if(index_covert_extra_Diag2CSR_[i] != -1) kVal_[index_covert_extra_Diag2CSR_[i]] - += M_->M()[M_->numberOfNonzeros() - n_ + i]; + += M_host_->M()[M_host_->numberOfNonzeros() - n_ + i]; } } // else @@ -469,10 +469,10 @@ namespace hiop // // off-diagonal part kRowPtr_[0] = 0; - for(int k = 0; k < M_->numberOfNonzeros() - n_; k++) { - if(M_->i_row()[k] != M_->j_col()[k]) { - kRowPtr_[M_->i_row()[k] + 1]++; - kRowPtr_[M_->j_col()[k] + 1]++; + for(int k = 0; k < M_host_->numberOfNonzeros() - n_; k++) { + if(M_host_->i_row()[k] != M_host_->j_col()[k]) { + kRowPtr_[M_host_->i_row()[k] + 1]++; + kRowPtr_[M_host_->j_col()[k] + 1]++; nnz_ += 2; } } @@ -502,16 +502,16 @@ namespace hiop index_covert_extra_Diag2CSR_[k] = -1; } - for(int k = 0; k < M_->numberOfNonzeros() - n_; k++) { - rowID_tmp = M_->i_row()[k]; - colID_tmp = M_->j_col()[k]; + for(int k = 0; k < M_host_->numberOfNonzeros() - n_; k++) { + rowID_tmp = M_host_->i_row()[k]; + colID_tmp = M_host_->j_col()[k]; if(rowID_tmp == colID_tmp) { nnz_tmp = nnz_each_row_tmp[rowID_tmp] + kRowPtr_[rowID_tmp]; jCol_[nnz_tmp] = colID_tmp; - kVal_[nnz_tmp] = M_->M()[k]; + kVal_[nnz_tmp] = M_host_->M()[k]; index_covert_CSR2Triplet_[nnz_tmp] = k; - kVal_[nnz_tmp] += M_->M()[M_->numberOfNonzeros() - n_ + rowID_tmp]; + kVal_[nnz_tmp] += M_host_->M()[M_host_->numberOfNonzeros() - n_ + rowID_tmp]; index_covert_extra_Diag2CSR_[rowID_tmp] = nnz_tmp; nnz_each_row_tmp[rowID_tmp]++; @@ -519,12 +519,12 @@ namespace hiop } else { nnz_tmp = nnz_each_row_tmp[rowID_tmp] + kRowPtr_[rowID_tmp]; jCol_[nnz_tmp] = colID_tmp; - kVal_[nnz_tmp] = M_->M()[k]; + kVal_[nnz_tmp] = M_host_->M()[k]; index_covert_CSR2Triplet_[nnz_tmp] = k; nnz_tmp = nnz_each_row_tmp[colID_tmp] + kRowPtr_[colID_tmp]; jCol_[nnz_tmp] = rowID_tmp; - kVal_[nnz_tmp] = M_->M()[k]; + kVal_[nnz_tmp] = M_host_->M()[k]; index_covert_CSR2Triplet_[nnz_tmp] = k; nnz_each_row_tmp[rowID_tmp]++; @@ -538,8 +538,8 @@ namespace hiop assert(nnz_each_row_tmp[i] == kRowPtr_[i + 1] - kRowPtr_[i] - 1); nnz_tmp = nnz_each_row_tmp[i] + kRowPtr_[i]; jCol_[nnz_tmp] = i; - kVal_[nnz_tmp] = M_->M()[M_->numberOfNonzeros() - n_ + i]; - index_covert_CSR2Triplet_[nnz_tmp] = M_->numberOfNonzeros() - n_ + i; + kVal_[nnz_tmp] = M_host_->M()[M_host_->numberOfNonzeros() - n_ + i]; + index_covert_CSR2Triplet_[nnz_tmp] = M_host_->numberOfNonzeros() - n_ + i; total_nnz_tmp += 1; std::vector ind_temp(kRowPtr_[i + 1] - kRowPtr_[i]); @@ -1619,4 +1619,52 @@ namespace hiop break; } // switch } // GramSchmidt + + + + hiopLinSolverSymSparseCUSOLVERGPU::hiopLinSolverSymSparseCUSOLVERGPU(const int& n, + const int& nnz, + hiopNlpFormulation* nlp) + : hiopLinSolverSymSparseCUSOLVER(n, nnz, nlp), + rhs_host_{nullptr} + { + rhs_host_ = LinearAlgebraFactory::create_vector("default", n); + } + + hiopLinSolverSymSparseCUSOLVERGPU::~hiopLinSolverSymSparseCUSOLVERGPU() + { + delete rhs_host_; + } + + int hiopLinSolverSymSparseCUSOLVERGPU::matrixChanged() + { + size_type nnz = M_->numberOfNonzeros(); + double* mval_dev = M_->M(); + double* mval_host= M_host_->M(); + checkCudaErrors(cudaMemcpy(mval_host, mval_dev, sizeof(double) * nnz, cudaMemcpyDeviceToHost)); + + index_type* irow_dev = M_->i_row(); + index_type* irow_host= M_host_->i_row(); + checkCudaErrors(cudaMemcpy(irow_host, irow_dev, sizeof(index_type) * nnz, cudaMemcpyDeviceToHost)); + + index_type* jcol_dev = M_->j_col(); + index_type* jcol_host= M_host_->j_col(); + checkCudaErrors(cudaMemcpy(jcol_host, jcol_dev, sizeof(index_type) * nnz, cudaMemcpyDeviceToHost)); + + return hiopLinSolverSymSparseCUSOLVER::matrixChanged(); + } + + bool hiopLinSolverSymSparseCUSOLVERGPU::solve(hiopVector& x) + { + double* mval_dev = x.local_data(); + double* mval_host= rhs_host_->local_data(); + + checkCudaErrors(cudaMemcpy(mval_host, mval_dev, sizeof(double) * n_, cudaMemcpyDeviceToHost)); + + bool bret = hiopLinSolverSymSparseCUSOLVER::solve(*rhs_host_); + + checkCudaErrors(cudaMemcpy(mval_dev, mval_host, sizeof(double) * n_, cudaMemcpyHostToDevice)); + return bret; + } + } // namespace hiop diff --git a/src/LinAlg/hiopLinSolverSparseCUSOLVER.hpp b/src/LinAlg/hiopLinSolverSparseCUSOLVER.hpp index 16518052c..db6ea84e0 100644 --- a/src/LinAlg/hiopLinSolverSparseCUSOLVER.hpp +++ b/src/LinAlg/hiopLinSolverSparseCUSOLVER.hpp @@ -88,12 +88,12 @@ class hiopLinSolverSymSparseCUSOLVER : public hiopLinSolverSymSparse /** Triggers a refactorization of the matrix, if necessary. * Overload from base class. * In this case, KLU (SuiteSparse) is used to refactor*/ - int matrixChanged(); + virtual int matrixChanged(); /** solves a linear system. * param 'x' is on entry the right hand side(s) of the system to be solved. * On exit is contains the solution(s). */ - bool solve(hiopVector& x_); + virtual bool solve(hiopVector& x_); /** Multiple rhs not supported yet */ virtual bool @@ -103,7 +103,7 @@ class hiopLinSolverSymSparseCUSOLVER : public hiopLinSolverSymSparse return false; } -private: +protected: // int m_; // number of rows of the whole matrix int n_; // number of cols of the whole matrix @@ -204,6 +204,26 @@ class hiopLinSolverSymSparseCUSOLVER : public hiopLinSolverSymSparse void IRsetup(); }; +class hiopLinSolverSymSparseCUSOLVERGPU : public hiopLinSolverSymSparseCUSOLVER +{ +public: + hiopLinSolverSymSparseCUSOLVERGPU(const int& n, const int& nnz, hiopNlpFormulation* nlp); + virtual ~hiopLinSolverSymSparseCUSOLVERGPU(); + + virtual int matrixChanged(); + virtual bool solve(hiopVector& x_); + + /** Multiple rhs not supported yet */ + virtual bool + solve(hiopMatrix& /* x */) + { + assert(false && "not yet supported"); + return false; + } + +private: + hiopVector* rhs_host_; +}; // Forward declaration of LU class class hiopLinSolverSymSparseCUSOLVERLU; diff --git a/src/LinAlg/hiopMatrixRajaSparseTriplet.hpp b/src/LinAlg/hiopMatrixRajaSparseTriplet.hpp index 4769d47fa..00bc19a08 100644 --- a/src/LinAlg/hiopMatrixRajaSparseTriplet.hpp +++ b/src/LinAlg/hiopMatrixRajaSparseTriplet.hpp @@ -390,11 +390,13 @@ class hiopMatrixRajaSparseTriplet : public hiopMatrixSparse { index_type *idx_start_; //size num_rows+1 index_type *idx_start_host_; //size num_rows+1 + index_type register_row_st_; size_type num_rows_; std::string mem_space_; RowStartsInfo() : idx_start_(nullptr), - num_rows_(0) + num_rows_(0), + register_row_st_{0} {} RowStartsInfo(size_type n_rows, std::string memspace); virtual ~RowStartsInfo(); diff --git a/src/LinAlg/hiopMatrixRajaSparseTripletImpl.hpp b/src/LinAlg/hiopMatrixRajaSparseTripletImpl.hpp index 11919070f..fd538740f 100644 --- a/src/LinAlg/hiopMatrixRajaSparseTripletImpl.hpp +++ b/src/LinAlg/hiopMatrixRajaSparseTripletImpl.hpp @@ -1123,8 +1123,6 @@ copyRowsBlockFrom(const hiopMatrix& src_gen, assert(n_rows + rows_src_idx_st <= src.m()); assert(n_rows + rows_dst_idx_st <= this->m()); - index_type* src_row_st = src.row_starts_->idx_start_; - const index_type* iRow_src = src.i_row(); const index_type* jCol_src = src.j_col(); const double* values_src = src.M(); @@ -1143,6 +1141,8 @@ copyRowsBlockFrom(const hiopMatrix& src_gen, } assert(src.row_starts_); + index_type* src_row_st = src.row_starts_->idx_start_; + // // The latest CPU code can be found in 342eb99ec16d45f57a492be1bf1e39cce73995a5 // It is replaced by RAJA::inclusive_scan after that commit @@ -1165,10 +1165,12 @@ copyRowsBlockFrom(const hiopMatrix& src_gen, umpire::Allocator hostalloc = rm.getAllocator("HOST"); int *next_row_nnz = static_cast(hostalloc.allocate(sizeof(size_type))); + index_type register_row_st = row_starts_->register_row_st_; rm.copy(next_row_nnz, dst_row_st_dev+1+rows_dst_idx_st, 1*sizeof(size_type)); if(next_row_nnz[0] == 0) { + assert(rows_dst_idx_st >= register_row_st); // comput nnz in each row from source RAJA::forall( RAJA::RangeSegment(0, n_rows), @@ -1179,7 +1181,8 @@ copyRowsBlockFrom(const hiopMatrix& src_gen, dst_row_st_dev[row_dst+1] = src_row_st[row_src+1] - src_row_st[row_src]; } ); - RAJA::inclusive_scan_inplace(RAJA::make_span(dst_row_st_dev, n_rows+1), RAJA::operators::plus()); + RAJA::inclusive_scan_inplace(RAJA::make_span(dst_row_st_dev+register_row_st, n_rows+1+rows_dst_idx_st-register_row_st), RAJA::operators::plus()); + row_starts_->register_row_st_ = n_rows + rows_dst_idx_st; } index_type* dst_row_st = row_starts_->idx_start_; @@ -1191,7 +1194,7 @@ copyRowsBlockFrom(const hiopMatrix& src_gen, const index_type row_src = rows_src_idx_st + row_add; const index_type row_dst = rows_dst_idx_st + row_add; index_type k_src = src_row_st[row_src]; - index_type k_dst = dst_row_st[row_dst]; + index_type k_dst = dst_row_st[row_dst] - dst_row_st[rows_dst_idx_st] + dest_nnz_st; // copy from src while(k_src < src_row_st[row_src+1]) { @@ -1283,7 +1286,8 @@ template hiopMatrixRajaSparseTriplet::RowStartsInfo:: RowStartsInfo(size_type n_rows, std::string memspace) : num_rows_(n_rows), - mem_space_(memspace) + mem_space_(memspace), + register_row_st_{0} { auto& resmgr = umpire::ResourceManager::getInstance(); umpire::Allocator alloc = resmgr.getAllocator(mem_space_); diff --git a/src/Optimization/hiopKKTLinSysSparse.cpp b/src/Optimization/hiopKKTLinSysSparse.cpp index 7a1360bf2..8a6419f7e 100644 --- a/src/Optimization/hiopKKTLinSysSparse.cpp +++ b/src/Optimization/hiopKKTLinSysSparse.cpp @@ -63,6 +63,10 @@ #ifdef HIOP_USE_GINKGO #include "hiopLinSolverSparseGinkgo.hpp" #endif + +#ifdef HIOP_USE_GPU +#include "hiopMatrixRajaSparseTriplet.hpp" +#endif #endif namespace hiop @@ -184,7 +188,12 @@ namespace hiop write_linsys_counter_++; } if(write_linsys_counter_>=0) { - csr_writer_.writeMatToFile(*Msys, write_linsys_counter_, nx, neq, nineq); +#ifndef HIOP_USE_GPU + auto* MsysSp = dynamic_cast(linSys->sys_matrix()); + csr_writer_.writeMatToFile(*MsysSp, write_linsys_counter_, nx, neq, nineq); +#else + //TODO csr_writer_.writeMatToFile(*Msys, write_linsys_counter_, nx, neq, nineq); +#endif } return true; @@ -418,14 +427,13 @@ namespace hiop delta_wd_ = perturb_calc_->get_curr_delta_wd(); delta_cc_ = perturb_calc_->get_curr_delta_cc(); delta_cd_ = perturb_calc_->get_curr_delta_cd(); - - HessSp_ = dynamic_cast(Hess_); - if(!HessSp_) { assert(false); return false; } - Jac_cSp_ = dynamic_cast(Jac_c_); + HessSp_ = dynamic_cast(Hess_); + Jac_cSp_ = dynamic_cast(Jac_c_); + Jac_dSp_ = dynamic_cast(Jac_d_); + + if(!HessSp_) { assert(false); return false; } if(!Jac_cSp_) { assert(false); return false; } - - Jac_dSp_ = dynamic_cast(Jac_d_); if(!Jac_dSp_) { assert(false); return false; } size_type nx = HessSp_->n(), nd=Jac_dSp_->m(), neq=Jac_cSp_->m(), nineq=Jac_dSp_->m(); @@ -436,7 +444,7 @@ namespace hiop auto* linSys = dynamic_cast (linSys_); assert(linSys); - auto* Msys = dynamic_cast(linSys->sys_matrix()); + auto* Msys = dynamic_cast(linSys->sys_matrix()); assert(Msys); if(perf_report_) { nlp_->log->printf(hovSummary, @@ -510,7 +518,12 @@ namespace hiop write_linsys_counter_++; } if(write_linsys_counter_>=0) { - csr_writer_.writeMatToFile(*Msys, write_linsys_counter_, nx, neq, nineq); +#ifndef HIOP_USE_GPU + auto* MsysSp = dynamic_cast(linSys->sys_matrix()); + csr_writer_.writeMatToFile(*MsysSp, write_linsys_counter_, nx, neq, nineq); +#else + //TODO csr_writer_.writeMatToFile(*Msys, write_linsys_counter_, nx, neq, nineq); +#endif } return true; @@ -750,13 +763,13 @@ namespace hiop // // We don't allow CPU linear solvers. ///////////////////////////////////////////////////////////////////////////////////////////// - assert(false && "KKT_SPARSE_XDYcYd linsys: GPU compute mode not yet supported."); - assert(false == safe_mode_); + // assert(false && "KKT_SPARSE_XDYcYd linsys: GPU compute mode not yet supported."); + // assert(false == safe_mode_); assert(nullptr == linSys_); if(linear_solver == "cusolver-lu" || linear_solver == "auto") { #if defined(HIOP_USE_CUSOLVER_LU) - linSys_ = new hiopLinSolverSymSparseCUSOLVER(n, nnz, nlp_); + linSys_ = new hiopLinSolverSymSparseCUSOLVERGPU(n, nnz, nlp_); nlp_->log->printf(hovScalars, "KKT_SPARSE_XDYcYd linsys: alloc CUSOLVER-LU size %d (%d cons) (gpu)\n", n, @@ -1046,7 +1059,12 @@ namespace hiop write_linsys_counter_++; } if(write_linsys_counter_>=0) { - csr_writer_.writeMatToFile(*Msys, write_linsys_counter_, nx, neq, nineq); +#ifndef HIOP_USE_GPU + auto* MsysSp = dynamic_cast(linSys->sys_matrix()); + csr_writer_.writeMatToFile(*MsysSp, write_linsys_counter_, nx, neq, nineq); +#else + //TODO csr_writer_.writeMatToFile(*Msys, write_linsys_counter_, nx, neq, nineq); +#endif } return true; diff --git a/src/Optimization/hiopKKTLinSysSparseCondensed.hpp b/src/Optimization/hiopKKTLinSysSparseCondensed.hpp index 1e0747c61..cb9c6fc8d 100644 --- a/src/Optimization/hiopKKTLinSysSparseCondensed.hpp +++ b/src/Optimization/hiopKKTLinSysSparseCondensed.hpp @@ -210,7 +210,6 @@ class hiopKKTLinSysCondensedSparse : public hiopKKTLinSysCompressedSparseXDYcYd } else { //(opt_compute_mode == "hybrid" || opt_compute_mode == "gpu") { #ifdef HIOP_USE_CUDA - assert(opt_compute_mode != "gpu" && "When code is GPU-ready, remove this method"); return "CUDA"; #else assert(false && "compute mode not supported without HIOP_USE_CUDA build"); From cbe3a5331a710a2294a2a741b6377ce342c55d1d Mon Sep 17 00:00:00 2001 From: nychiang Date: Wed, 1 Mar 2023 21:29:17 -0800 Subject: [PATCH 05/10] fix cusolver for hybrid mode --- src/LinAlg/hiopLinSolverSparseCUSOLVER.cpp | 32 ++++++++++++++++------ src/Optimization/hiopKKTLinSysSparse.cpp | 4 +-- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/src/LinAlg/hiopLinSolverSparseCUSOLVER.cpp b/src/LinAlg/hiopLinSolverSparseCUSOLVER.cpp index e4a51f5f7..5403d29a0 100644 --- a/src/LinAlg/hiopLinSolverSparseCUSOLVER.cpp +++ b/src/LinAlg/hiopLinSolverSparseCUSOLVER.cpp @@ -1641,15 +1641,22 @@ namespace hiop size_type nnz = M_->numberOfNonzeros(); double* mval_dev = M_->M(); double* mval_host= M_host_->M(); - checkCudaErrors(cudaMemcpy(mval_host, mval_dev, sizeof(double) * nnz, cudaMemcpyDeviceToHost)); index_type* irow_dev = M_->i_row(); index_type* irow_host= M_host_->i_row(); - checkCudaErrors(cudaMemcpy(irow_host, irow_dev, sizeof(index_type) * nnz, cudaMemcpyDeviceToHost)); - + index_type* jcol_dev = M_->j_col(); index_type* jcol_host= M_host_->j_col(); - checkCudaErrors(cudaMemcpy(jcol_host, jcol_dev, sizeof(index_type) * nnz, cudaMemcpyDeviceToHost)); + + if("device" == nlp_->options->GetString("mem_space")) { + checkCudaErrors(cudaMemcpy(mval_host, mval_dev, sizeof(double) * nnz, cudaMemcpyDeviceToHost)); + checkCudaErrors(cudaMemcpy(irow_host, irow_dev, sizeof(index_type) * nnz, cudaMemcpyDeviceToHost)); + checkCudaErrors(cudaMemcpy(jcol_host, jcol_dev, sizeof(index_type) * nnz, cudaMemcpyDeviceToHost)); + } else { + checkCudaErrors(cudaMemcpy(mval_host, mval_dev, sizeof(double) * nnz, cudaMemcpyHostToHost)); + checkCudaErrors(cudaMemcpy(irow_host, irow_dev, sizeof(index_type) * nnz, cudaMemcpyHostToHost)); + checkCudaErrors(cudaMemcpy(jcol_host, jcol_dev, sizeof(index_type) * nnz, cudaMemcpyHostToHost)); + } return hiopLinSolverSymSparseCUSOLVER::matrixChanged(); } @@ -1658,12 +1665,21 @@ namespace hiop { double* mval_dev = x.local_data(); double* mval_host= rhs_host_->local_data(); - - checkCudaErrors(cudaMemcpy(mval_host, mval_dev, sizeof(double) * n_, cudaMemcpyDeviceToHost)); + + if("device" == nlp_->options->GetString("mem_space")) { + checkCudaErrors(cudaMemcpy(mval_host, mval_dev, sizeof(double) * n_, cudaMemcpyDeviceToHost)); + } else { + checkCudaErrors(cudaMemcpy(mval_host, mval_dev, sizeof(double) * n_, cudaMemcpyHostToHost)); + } bool bret = hiopLinSolverSymSparseCUSOLVER::solve(*rhs_host_); - - checkCudaErrors(cudaMemcpy(mval_dev, mval_host, sizeof(double) * n_, cudaMemcpyHostToDevice)); + + if("device" == nlp_->options->GetString("mem_space")) { + checkCudaErrors(cudaMemcpy(mval_dev, mval_host, sizeof(double) * n_, cudaMemcpyHostToDevice)); + } else { + checkCudaErrors(cudaMemcpy(mval_dev, mval_host, sizeof(double) * n_, cudaMemcpyHostToHost)); + } + return bret; } diff --git a/src/Optimization/hiopKKTLinSysSparse.cpp b/src/Optimization/hiopKKTLinSysSparse.cpp index 8a6419f7e..517b9fd79 100644 --- a/src/Optimization/hiopKKTLinSysSparse.cpp +++ b/src/Optimization/hiopKKTLinSysSparse.cpp @@ -338,7 +338,7 @@ namespace hiop if( (nullptr == linSys_ && linear_solver == "auto") || linear_solver == "cusolver-lu") { #if defined(HIOP_USE_CUSOLVER_LU) - linSys_ = new hiopLinSolverSymSparseCUSOLVER(n, nnz, nlp_); + linSys_ = new hiopLinSolverSymSparseCUSOLVERGPU(n, nnz, nlp_); linsol_actual = "CUSOLVER-LU"; auto* fact_acceptor_ic = dynamic_cast (fact_acceptor_); if(fact_acceptor_ic) { @@ -698,7 +698,7 @@ namespace hiop if(linear_solver == "cusolver-lu" || linear_solver == "auto") { #if defined(HIOP_USE_CUSOLVER_LU) actual_lin_solver = "CUSOLVER-LU"; - linSys_ = new hiopLinSolverSymSparseCUSOLVER(n, nnz, nlp_); + linSys_ = new hiopLinSolverSymSparseCUSOLVERGPU(n, nnz, nlp_); auto* fact_acceptor_ic = dynamic_cast (fact_acceptor_); if(fact_acceptor_ic) { nlp_->log->printf(hovError, From bed6ac821d30c0d3286ead023535b5b635841cf7 Mon Sep 17 00:00:00 2001 From: nychiang Date: Wed, 1 Mar 2023 21:58:31 -0800 Subject: [PATCH 06/10] update cmake file --- src/Drivers/Sparse/CMakeLists.txt | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/Drivers/Sparse/CMakeLists.txt b/src/Drivers/Sparse/CMakeLists.txt index 1d6c505a5..7c96910e4 100644 --- a/src/Drivers/Sparse/CMakeLists.txt +++ b/src/Drivers/Sparse/CMakeLists.txt @@ -14,10 +14,11 @@ if(HIOP_USE_RAJA) NlpSparseRajaEx2Driver.cpp PROPERTIES LANGUAGE CUDA ) + + add_executable(NlpSparseRajaEx2.exe NlpSparseRajaEx2Driver.cpp NlpSparseRajaEx2.cpp) + target_link_libraries(NlpSparseRajaEx2.exe HiOp::HiOp) + install(TARGETS NlpSparseRajaEx2.exe DESTINATION bin) endif() - add_executable(NlpSparseRajaEx2.exe NlpSparseRajaEx2Driver.cpp NlpSparseRajaEx2.cpp) - target_link_libraries(NlpSparseRajaEx2.exe HiOp::HiOp) - install(TARGETS NlpSparseRajaEx2.exe DESTINATION bin) endif() if(HIOP_BUILD_SHARED) @@ -66,7 +67,7 @@ if(HIOP_USE_GINKGO) endif(HIOP_USE_HIP) endif(HIOP_USE_GINKGO) -if(HIOP_USE_RAJA) +if(HIOP_USE_RAJA AND HIOP_USE_GPU AND HIOP_USE_CUDA) add_test(NAME NlpSparseRaja2_1 COMMAND ${RUNCMD} bash -c "$ 500 -selfcheck ") endif() From 8d16e81b90561647c85220bc9acad82f3f6e876a Mon Sep 17 00:00:00 2001 From: nychiang Date: Thu, 2 Mar 2023 10:11:38 -0800 Subject: [PATCH 07/10] re-engineer the code --- src/Drivers/Sparse/NlpSparseRajaEx2.cpp | 308 ++++++++++----------- src/LinAlg/hiopLinSolver.cpp | 4 - src/LinAlg/hiopLinSolver.hpp | 6 +- src/LinAlg/hiopLinSolverSparseCUSOLVER.cpp | 57 ++-- src/LinAlg/hiopLinSolverSparseCUSOLVER.hpp | 1 + 5 files changed, 182 insertions(+), 194 deletions(-) diff --git a/src/Drivers/Sparse/NlpSparseRajaEx2.cpp b/src/Drivers/Sparse/NlpSparseRajaEx2.cpp index 51c8a7232..1d730ba39 100644 --- a/src/Drivers/Sparse/NlpSparseRajaEx2.cpp +++ b/src/Drivers/Sparse/NlpSparseRajaEx2.cpp @@ -154,19 +154,18 @@ bool SparseRajaEx2::get_vars_info(const size_type& n, double *xlow, double* xupp { assert(n==n_vars_); - RAJA::forall(RAJA::RangeSegment(0, 3), + RAJA::forall(RAJA::RangeSegment(0, 1), RAJA_LAMBDA(RAJA::Index_type i) { - if(i==0) { - xlow[i] = -1e20; - xupp[i] = 1e20; - } else if(i==1) { - xlow[i] = 0.0; - xupp[i] = 1e20; - } else { - xlow[i] = 1.0; - xupp[i] = 10.0; - } + xlow[0] = -1e20; + xupp[0] = 1e20; + type[0] = hiopNonlinear; + xlow[1] = 0.0; + xupp[1] = 1e20; + type[1] = hiopNonlinear; + xlow[2] = 1.0; + xupp[2] = 10.0; + type[2] = hiopNonlinear; }); if(n>3) { @@ -175,14 +174,10 @@ bool SparseRajaEx2::get_vars_info(const size_type& n, double *xlow, double* xupp { xlow[i] = 0.5; xupp[i] = 1e20; + type[i] = hiopNonlinear; }); } - RAJA::forall(RAJA::RangeSegment(0, n), - RAJA_LAMBDA(RAJA::Index_type i) - { - type[i] = hiopNonlinear; - }); return true; } @@ -190,54 +185,44 @@ bool SparseRajaEx2::get_cons_info(const size_type& m, double* clow, double* cupp { assert(m==n_cons_); size_type n = n_vars_; - - RAJA::forall(RAJA::RangeSegment(0, 2), + assert(m-1 == n-1+rankdefic_ineq_); + + // serial part + RAJA::forall(RAJA::RangeSegment(0, 1), RAJA_LAMBDA(RAJA::Index_type i) - { - if(i==0) { - clow[i] = 10.0; - cupp[i] = 10.0; - } else { - clow[i] = 5.0; - cupp[i] = 1e20; + { + clow[0] = 10.0; + cupp[0] = 10.0; + type[0] = hiopInterfaceBase::hiopNonlinear; + clow[1] = 5.0; + cupp[1] = 1e20; + type[1] = hiopInterfaceBase::hiopNonlinear; + + if(rankdefic_ineq_) { + // [-inf] <= 4*x_1 + 2*x_3 <= [ 19 ] + clow[n-1] = -1e+20; + cupp[n-1] = 19.; + type[n-1] = hiopInterfaceBase::hiopNonlinear; + } + + if(rankdefic_eq_) { + // 4*x_1 + 2*x_2 == 10 + clow[m-1] = 10; + cupp[m-1] = 10; + type[m-1] = hiopInterfaceBase::hiopNonlinear; } }); if(n>3) { - RAJA::forall(RAJA::RangeSegment(2, 2+n-3), + RAJA::forall(RAJA::RangeSegment(2, n-1), RAJA_LAMBDA(RAJA::Index_type conidx) { clow[conidx] = 1.0; - cupp[conidx] = 2*n_vars_; + cupp[conidx] = 2*n; + type[conidx] = hiopInterfaceBase::hiopNonlinear; }); } - if(rankdefic_ineq_) { - RAJA::forall(RAJA::RangeSegment(2+n-3, 2+n-3+1), - RAJA_LAMBDA(RAJA::Index_type conidx) - { - // [-inf] <= 4*x_1 + 2*x_3 <= [ 19 ] - clow[conidx] = -1e+20; - cupp[conidx] = 19.; - }); - } - - if(rankdefic_eq_) { - RAJA::forall(RAJA::RangeSegment(2+n-3+rankdefic_ineq_, m), - RAJA_LAMBDA(RAJA::Index_type conidx) - { - // 4*x_1 + 2*x_2 == 10 - clow[conidx] = 10; - cupp[conidx] = 10; - }); - } - - RAJA::forall(RAJA::RangeSegment(0, m), - RAJA_LAMBDA(RAJA::Index_type i) - { - type[i] = hiopInterfaceBase::hiopNonlinear; - }); - return true; } @@ -290,46 +275,37 @@ bool SparseRajaEx2::eval_grad_f(const size_type& n, const double* x, bool new_x, bool SparseRajaEx2::eval_cons(const size_type& n, const size_type& m, const double* x, bool new_x, double* cons) { - assert(n==n_vars_); assert(m==n_cons_); + assert(n==n_vars_); + assert(m==n_cons_); assert(n_cons_==2+n-3+rankdefic_eq_+rankdefic_ineq_); - RAJA::forall(RAJA::RangeSegment(0, 2), + // serial part + RAJA::forall(RAJA::RangeSegment(0, 1), RAJA_LAMBDA(RAJA::Index_type i) - { - if(i==0) { - // --- constraint 1 body ---> 4*x_1 + 2*x_2 == 10 - cons[i] = 4*x[0] + 2*x[1]; - } else if(i==1) { - // --- constraint 2 body ---> 2*x_1 + x_3 - cons[i] = 2*x[0] + 1*x[2]; + { + // --- constraint 1 body ---> 4*x_1 + 2*x_2 == 10 + cons[0] = 4*x[0] + 2*x[1]; + // --- constraint 2 body ---> 2*x_1 + x_3 + cons[1] = 2*x[0] + 1*x[2]; + + if(rankdefic_ineq_) { + // [-inf] <= 4*x_1 + 2*x_3 <= [ 19 ] + cons[n-1] = 4*x[0] + 2*x[2]; + } + + if(rankdefic_eq_) { + // 4*x_1 + 2*x_2 == 10 + cons[m-1] = 4*x[0] + 2*x[1]; } }); - RAJA::forall(RAJA::RangeSegment(2, 2+n-3), + RAJA::forall(RAJA::RangeSegment(2, n-1), RAJA_LAMBDA(RAJA::Index_type i) { // --- constraint 3 body ---> 2*x_1 + 0.5*x_i, for i>=4 cons[i] = 2*x[0] + 0.5*x[i+1]; }); - if(rankdefic_ineq_) { - RAJA::forall(RAJA::RangeSegment(2+n-3, 2+n-3+1), - RAJA_LAMBDA(RAJA::Index_type i) - { - // [-inf] <= 4*x_1 + 2*x_3 <= [ 19 ] - cons[i] = 4*x[0] + 2*x[2]; - }); - } - - if(rankdefic_eq_) { - RAJA::forall(RAJA::RangeSegment(2+n-3+rankdefic_ineq_, m), - RAJA_LAMBDA(RAJA::Index_type i) - { - // 4*x_1 + 2*x_2 == 10 - cons[i] = 4*x[0] + 2*x[1]; - }); - } - return true; } @@ -346,87 +322,85 @@ bool SparseRajaEx2::eval_Jac_cons(const size_type& n, assert(n>=3); assert(nnzJacS == 4 + 2*(n-3) + 2*rankdefic_eq_ + 2*rankdefic_ineq_); - - RAJA::forall(RAJA::RangeSegment(0, 2), - RAJA_LAMBDA(RAJA::Index_type itrow) - { - if(itrow==0) { + + if(iJacS !=nullptr && jJacS != nullptr) { + // serial part + RAJA::forall(RAJA::RangeSegment(0, 1), + RAJA_LAMBDA(RAJA::Index_type itrow) + { // --- constraint 1 body ---> 4*x_1 + 2*x_2 == 10 - if(iJacS !=nullptr && jJacS != nullptr) { - iJacS[2*itrow] = itrow; - jJacS[2*itrow] = 0; - iJacS[2*itrow+1] = itrow; - jJacS[2*itrow+1] = 1; - } - if(MJacS != nullptr) { - MJacS[2*itrow] = 4.0; - MJacS[2*itrow+1] = 2.0; - } - } else if(itrow==1) { + iJacS[0] = 0; + jJacS[0] = 0; + iJacS[1] = 0; + jJacS[1] = 1; // --- constraint 2 body ---> 2*x_1 + x_3 - if(iJacS !=nullptr && jJacS != nullptr) { - iJacS[2*itrow] = itrow; - jJacS[2*itrow] = 0; - iJacS[2*itrow+1] = itrow; - jJacS[2*itrow+1] = 2; + iJacS[2] = 1; + jJacS[2] = 0; + iJacS[3] = 1; + jJacS[3] = 2; + + if(rankdefic_ineq_) { + // [-inf] <= 4*x_1 + 2*x_3 <= [ 19 ] + iJacS[2*n-2] = n-1; + jJacS[2*n-2] = 0; + iJacS[2*n-1] = n-1; + jJacS[2*n-1] = 2; } - if(MJacS != nullptr) { - MJacS[2*itrow] = 2.0; - MJacS[2*itrow+1] = 1.0; - } - } - }); - RAJA::forall(RAJA::RangeSegment(2, n-1), - RAJA_LAMBDA(RAJA::Index_type itrow) - { - // --- constraint 3 body ---> 2*x_1 + 0.5*x_i, for i>=4 - if(iJacS !=nullptr && jJacS != nullptr) { + if(rankdefic_eq_) { + // 4*x_1 + 2*x_2 == 10 + iJacS[2*m-2] = m-1; + jJacS[2*m-2] = 0; + iJacS[2*m-1] = m-1; + jJacS[2*m-1] = 1; + } + }); + + RAJA::forall(RAJA::RangeSegment(2, n-1), + RAJA_LAMBDA(RAJA::Index_type itrow) + { + // --- constraint 3 body ---> 2*x_1 + 0.5*x_i, for i>=4 iJacS[2*itrow] = itrow; jJacS[2*itrow] = 0; iJacS[2*itrow+1] = itrow; jJacS[2*itrow+1] = itrow+1; - } - if(MJacS != nullptr) { - MJacS[2*itrow] = 2.0; - MJacS[2*itrow+1] = 0.5; - } - }); + }); + + } - if(rankdefic_ineq_) { - RAJA::forall(RAJA::RangeSegment(n-1, n), + if(MJacS != nullptr) { + // serial part + RAJA::forall(RAJA::RangeSegment(0, 1), RAJA_LAMBDA(RAJA::Index_type itrow) { - // [-inf] <= 4*x_1 + 2*x_3 <= [ 19 ] - if(iJacS !=nullptr && jJacS != nullptr) { - iJacS[2*itrow] = itrow; - jJacS[2*itrow] = 0; - iJacS[2*itrow+1] = itrow; - jJacS[2*itrow+1] = 2; - } - if(MJacS != nullptr) { - MJacS[2*itrow] = 4.0; - MJacS[2*itrow+1] = 2.0; + // --- constraint 1 body ---> 4*x_1 + 2*x_2 == 10 + MJacS[0] = 4.0; + MJacS[1] = 2.0; + // --- constraint 2 body ---> 2*x_1 + x_3 + MJacS[2] = 2.0; + MJacS[3] = 1.0; + + if(rankdefic_ineq_) { + // [-inf] <= 4*x_1 + 2*x_3 <= [ 19 ] + MJacS[2*n-2] = 4.0; + MJacS[2*n-1] = 2.0; + } + + if(rankdefic_eq_) { + // 4*x_1 + 2*x_2 == 10 + MJacS[2*m-2] = 4.0; + MJacS[2*m-1] = 2.0; } }); - } - if(rankdefic_eq_) { - RAJA::forall(RAJA::RangeSegment(n-1+rankdefic_ineq_, n+rankdefic_ineq_), + RAJA::forall(RAJA::RangeSegment(2, n-1), RAJA_LAMBDA(RAJA::Index_type itrow) { - // 4*x_1 + 2*x_2 == 10 - if(iJacS !=nullptr && jJacS != nullptr) { - iJacS[2*itrow] = itrow; - jJacS[2*itrow] = 0; - iJacS[2*itrow+1] = itrow; - jJacS[2*itrow+1] = 1; - } - if(MJacS != nullptr) { - MJacS[2*itrow] = 4.0; - MJacS[2*itrow+1] = 2.0; - } + // --- constraint 3 body ---> 2*x_1 + 0.5*x_i, for i>=4 + MJacS[2*itrow] = 2.0; + MJacS[2*itrow+1] = 0.5; }); + } return true; @@ -444,29 +418,29 @@ bool SparseRajaEx2::eval_Hess_Lagr(const size_type& n, index_type* jHSS, double* MHSS) { - //Note: lambda is not used since all the constraints are linear and, therefore, do - //not contribute to the Hessian of the Lagrangian - assert(nnzHSS == n); - - if(iHSS!=nullptr && jHSS!=nullptr) { - RAJA::forall(RAJA::RangeSegment(0, n), - RAJA_LAMBDA(RAJA::Index_type i) - { - iHSS[i] = i; - jHSS[i] = i; - }); - } + //Note: lambda is not used since all the constraints are linear and, therefore, do + //not contribute to the Hessian of the Lagrangian + assert(nnzHSS == n); - int convex_obj = (int) convex_obj_; - double scal_neg_obj = scal_neg_obj_; - if(MHSS!=nullptr) { - RAJA::forall(RAJA::RangeSegment(0, n), - RAJA_LAMBDA(RAJA::Index_type i) - { - MHSS[i] = obj_factor * ( (2*convex_obj-1) * scal_neg_obj * 3 * std::pow(x[i]-1., 2) + 1); - }); - } - return true; + if(iHSS!=nullptr && jHSS!=nullptr) { + RAJA::forall(RAJA::RangeSegment(0, n), + RAJA_LAMBDA(RAJA::Index_type i) + { + iHSS[i] = i; + jHSS[i] = i; + }); + } + + int convex_obj = (int) convex_obj_; + double scal_neg_obj = scal_neg_obj_; + if(MHSS!=nullptr) { + RAJA::forall(RAJA::RangeSegment(0, n), + RAJA_LAMBDA(RAJA::Index_type i) + { + MHSS[i] = obj_factor * ( (2*convex_obj-1) * scal_neg_obj * 3 * std::pow(x[i]-1., 2) + 1); + }); + } + return true; } diff --git a/src/LinAlg/hiopLinSolver.cpp b/src/LinAlg/hiopLinSolver.cpp index 480aa9e28..868981175 100644 --- a/src/LinAlg/hiopLinSolver.cpp +++ b/src/LinAlg/hiopLinSolver.cpp @@ -100,7 +100,6 @@ namespace hiop { //this constructor (will call the 1-parameter constructor below) so they avoid creating //the triplet matrix M_ = LinearAlgebraFactory::create_matrix_sparse(nlp->options->GetString("mem_space"), n, n, nnz); - M_host_ = LinearAlgebraFactory::create_matrix_sparse("default", n, n, nnz); //this class will own `M_` sys_mat_owned_ = true; nlp_ = nlp; @@ -113,14 +112,12 @@ namespace hiop { sys_mat_owned_ = false; nlp_ = nlp; perf_report_ = "on"==hiop::tolower(nlp->options->GetString("time_kkt")); - M_host_ = nullptr; } hiopLinSolverSymSparse::hiopLinSolverSymSparse(hiopMatrixSparse* M, hiopNlpFormulation* nlp) { M_ = M; sys_mat_owned_ = false; - M_host_ = nullptr; nlp_ = nlp; perf_report_ = "on"==hiop::tolower(nlp->options->GetString("time_kkt")); } @@ -128,7 +125,6 @@ namespace hiop { hiopLinSolverNonSymSparse::hiopLinSolverNonSymSparse(int n, int nnz, hiopNlpFormulation* nlp) { M_ = LinearAlgebraFactory::create_matrix_sparse(nlp->options->GetString("mem_space"), n, n, nnz); - M_host_ = nullptr; sys_mat_owned_ = false; nlp_ = nlp; perf_report_ = "on"==hiop::tolower(nlp->options->GetString("time_kkt")); diff --git a/src/LinAlg/hiopLinSolver.hpp b/src/LinAlg/hiopLinSolver.hpp index db953be1f..79fe53bc5 100644 --- a/src/LinAlg/hiopLinSolver.hpp +++ b/src/LinAlg/hiopLinSolver.hpp @@ -136,8 +136,7 @@ class hiopLinSolverSparseBase : public hiopLinSolver public: hiopLinSolverSparseBase() : M_(nullptr), - sys_mat_owned_(true), - M_host_(nullptr) + sys_mat_owned_(true) { } @@ -145,7 +144,6 @@ class hiopLinSolverSparseBase : public hiopLinSolver { if(sys_mat_owned_) { delete M_; - delete M_host_; } } @@ -159,14 +157,12 @@ class hiopLinSolverSparseBase : public hiopLinSolver if(sys_mat_owned_) { assert(false && "system matrix should not have been allocated internally when calling set_sys_matrix"); delete M_; - delete M_host_; } sys_mat_owned_ = false; M_ = M; } protected: hiopMatrixSparse* M_; - hiopMatrixSparse* M_host_; // TODO: for cusolver, delete this later! bool sys_mat_owned_; }; diff --git a/src/LinAlg/hiopLinSolverSparseCUSOLVER.cpp b/src/LinAlg/hiopLinSolverSparseCUSOLVER.cpp index 5403d29a0..b50537324 100644 --- a/src/LinAlg/hiopLinSolverSparseCUSOLVER.cpp +++ b/src/LinAlg/hiopLinSolverSparseCUSOLVER.cpp @@ -252,12 +252,12 @@ namespace hiop } else { // update matrix for(int k = 0; k < nnz_; k++) { - kVal_[k] = M_host_->M()[index_covert_CSR2Triplet_[k]]; + kVal_[k] = M_->M()[index_covert_CSR2Triplet_[k]]; } for(int i = 0; i < n_; i++) { if(index_covert_extra_Diag2CSR_[i] != -1) kVal_[index_covert_extra_Diag2CSR_[i]] - += M_host_->M()[M_host_->numberOfNonzeros() - n_ + i]; + += M_->M()[M_->numberOfNonzeros() - n_ + i]; } } // else @@ -469,10 +469,10 @@ namespace hiop // // off-diagonal part kRowPtr_[0] = 0; - for(int k = 0; k < M_host_->numberOfNonzeros() - n_; k++) { - if(M_host_->i_row()[k] != M_host_->j_col()[k]) { - kRowPtr_[M_host_->i_row()[k] + 1]++; - kRowPtr_[M_host_->j_col()[k] + 1]++; + for(int k = 0; k < M_->numberOfNonzeros() - n_; k++) { + if(M_->i_row()[k] != M_->j_col()[k]) { + kRowPtr_[M_->i_row()[k] + 1]++; + kRowPtr_[M_->j_col()[k] + 1]++; nnz_ += 2; } } @@ -502,16 +502,16 @@ namespace hiop index_covert_extra_Diag2CSR_[k] = -1; } - for(int k = 0; k < M_host_->numberOfNonzeros() - n_; k++) { - rowID_tmp = M_host_->i_row()[k]; - colID_tmp = M_host_->j_col()[k]; + for(int k = 0; k < M_->numberOfNonzeros() - n_; k++) { + rowID_tmp = M_->i_row()[k]; + colID_tmp = M_->j_col()[k]; if(rowID_tmp == colID_tmp) { nnz_tmp = nnz_each_row_tmp[rowID_tmp] + kRowPtr_[rowID_tmp]; jCol_[nnz_tmp] = colID_tmp; - kVal_[nnz_tmp] = M_host_->M()[k]; + kVal_[nnz_tmp] = M_->M()[k]; index_covert_CSR2Triplet_[nnz_tmp] = k; - kVal_[nnz_tmp] += M_host_->M()[M_host_->numberOfNonzeros() - n_ + rowID_tmp]; + kVal_[nnz_tmp] += M_->M()[M_->numberOfNonzeros() - n_ + rowID_tmp]; index_covert_extra_Diag2CSR_[rowID_tmp] = nnz_tmp; nnz_each_row_tmp[rowID_tmp]++; @@ -519,12 +519,12 @@ namespace hiop } else { nnz_tmp = nnz_each_row_tmp[rowID_tmp] + kRowPtr_[rowID_tmp]; jCol_[nnz_tmp] = colID_tmp; - kVal_[nnz_tmp] = M_host_->M()[k]; + kVal_[nnz_tmp] = M_->M()[k]; index_covert_CSR2Triplet_[nnz_tmp] = k; nnz_tmp = nnz_each_row_tmp[colID_tmp] + kRowPtr_[colID_tmp]; jCol_[nnz_tmp] = rowID_tmp; - kVal_[nnz_tmp] = M_host_->M()[k]; + kVal_[nnz_tmp] = M_->M()[k]; index_covert_CSR2Triplet_[nnz_tmp] = k; nnz_each_row_tmp[rowID_tmp]++; @@ -538,8 +538,8 @@ namespace hiop assert(nnz_each_row_tmp[i] == kRowPtr_[i + 1] - kRowPtr_[i] - 1); nnz_tmp = nnz_each_row_tmp[i] + kRowPtr_[i]; jCol_[nnz_tmp] = i; - kVal_[nnz_tmp] = M_host_->M()[M_host_->numberOfNonzeros() - n_ + i]; - index_covert_CSR2Triplet_[nnz_tmp] = M_host_->numberOfNonzeros() - n_ + i; + kVal_[nnz_tmp] = M_->M()[M_->numberOfNonzeros() - n_ + i]; + index_covert_CSR2Triplet_[nnz_tmp] = M_->numberOfNonzeros() - n_ + i; total_nnz_tmp += 1; std::vector ind_temp(kRowPtr_[i + 1] - kRowPtr_[i]); @@ -1626,14 +1626,17 @@ namespace hiop const int& nnz, hiopNlpFormulation* nlp) : hiopLinSolverSymSparseCUSOLVER(n, nnz, nlp), - rhs_host_{nullptr} + rhs_host_{nullptr}, + M_host_{nullptr} { rhs_host_ = LinearAlgebraFactory::create_vector("default", n); + M_host_ = LinearAlgebraFactory::create_matrix_sparse("default", n, n, nnz); } hiopLinSolverSymSparseCUSOLVERGPU::~hiopLinSolverSymSparseCUSOLVERGPU() { delete rhs_host_; + delete M_host_; } int hiopLinSolverSymSparseCUSOLVERGPU::matrixChanged() @@ -1657,8 +1660,18 @@ namespace hiop checkCudaErrors(cudaMemcpy(irow_host, irow_dev, sizeof(index_type) * nnz, cudaMemcpyHostToHost)); checkCudaErrors(cudaMemcpy(jcol_host, jcol_dev, sizeof(index_type) * nnz, cudaMemcpyHostToHost)); } + + hiopMatrixSparse* swap_ptr = M_; + M_ = M_host_; + M_host_ = swap_ptr; + + int vret = hiopLinSolverSymSparseCUSOLVER::matrixChanged(); - return hiopLinSolverSymSparseCUSOLVER::matrixChanged(); + swap_ptr = M_; + M_ = M_host_; + M_host_ = swap_ptr; + + return vret; } bool hiopLinSolverSymSparseCUSOLVERGPU::solve(hiopVector& x) @@ -1672,8 +1685,16 @@ namespace hiop checkCudaErrors(cudaMemcpy(mval_host, mval_dev, sizeof(double) * n_, cudaMemcpyHostToHost)); } + hiopMatrixSparse* swap_ptr = M_; + M_ = M_host_; + M_host_ = swap_ptr; + bool bret = hiopLinSolverSymSparseCUSOLVER::solve(*rhs_host_); - + + swap_ptr = M_; + M_ = M_host_; + M_host_ = swap_ptr; + if("device" == nlp_->options->GetString("mem_space")) { checkCudaErrors(cudaMemcpy(mval_dev, mval_host, sizeof(double) * n_, cudaMemcpyHostToDevice)); } else { diff --git a/src/LinAlg/hiopLinSolverSparseCUSOLVER.hpp b/src/LinAlg/hiopLinSolverSparseCUSOLVER.hpp index db6ea84e0..1833afe67 100644 --- a/src/LinAlg/hiopLinSolverSparseCUSOLVER.hpp +++ b/src/LinAlg/hiopLinSolverSparseCUSOLVER.hpp @@ -223,6 +223,7 @@ class hiopLinSolverSymSparseCUSOLVERGPU : public hiopLinSolverSymSparseCUSOLVER private: hiopVector* rhs_host_; + hiopMatrixSparse* M_host_; }; // Forward declaration of LU class From 353268c27052b65deab71d585a38b5b08495e5a8 Mon Sep 17 00:00:00 2001 From: nychiang Date: Mon, 6 Mar 2023 16:42:19 -0800 Subject: [PATCH 08/10] try to fix marianas raja issue --- src/Drivers/Sparse/NlpSparseRajaEx2.cpp | 32 +++++++++++++++++-------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/src/Drivers/Sparse/NlpSparseRajaEx2.cpp b/src/Drivers/Sparse/NlpSparseRajaEx2.cpp index 1d730ba39..55628a224 100644 --- a/src/Drivers/Sparse/NlpSparseRajaEx2.cpp +++ b/src/Drivers/Sparse/NlpSparseRajaEx2.cpp @@ -186,6 +186,10 @@ bool SparseRajaEx2::get_cons_info(const size_type& m, double* clow, double* cupp assert(m==n_cons_); size_type n = n_vars_; assert(m-1 == n-1+rankdefic_ineq_); + + // RAJA doesn't like member objects + bool rankdefic_eq = rankdefic_eq_; + bool rankdefic_ineq = rankdefic_ineq_; // serial part RAJA::forall(RAJA::RangeSegment(0, 1), @@ -198,14 +202,14 @@ bool SparseRajaEx2::get_cons_info(const size_type& m, double* clow, double* cupp cupp[1] = 1e20; type[1] = hiopInterfaceBase::hiopNonlinear; - if(rankdefic_ineq_) { + if(rankdefic_ineq) { // [-inf] <= 4*x_1 + 2*x_3 <= [ 19 ] clow[n-1] = -1e+20; cupp[n-1] = 19.; type[n-1] = hiopInterfaceBase::hiopNonlinear; } - if(rankdefic_eq_) { + if(rankdefic_eq) { // 4*x_1 + 2*x_2 == 10 clow[m-1] = 10; cupp[m-1] = 10; @@ -279,6 +283,10 @@ bool SparseRajaEx2::eval_cons(const size_type& n, const size_type& m, const doub assert(m==n_cons_); assert(n_cons_==2+n-3+rankdefic_eq_+rankdefic_ineq_); + // RAJA doesn't like member objects + bool rankdefic_eq = rankdefic_eq_; + bool rankdefic_ineq = rankdefic_ineq_; + // serial part RAJA::forall(RAJA::RangeSegment(0, 1), RAJA_LAMBDA(RAJA::Index_type i) @@ -288,12 +296,12 @@ bool SparseRajaEx2::eval_cons(const size_type& n, const size_type& m, const doub // --- constraint 2 body ---> 2*x_1 + x_3 cons[1] = 2*x[0] + 1*x[2]; - if(rankdefic_ineq_) { + if(rankdefic_ineq) { // [-inf] <= 4*x_1 + 2*x_3 <= [ 19 ] cons[n-1] = 4*x[0] + 2*x[2]; } - if(rankdefic_eq_) { + if(rankdefic_eq) { // 4*x_1 + 2*x_2 == 10 cons[m-1] = 4*x[0] + 2*x[1]; } @@ -322,7 +330,11 @@ bool SparseRajaEx2::eval_Jac_cons(const size_type& n, assert(n>=3); assert(nnzJacS == 4 + 2*(n-3) + 2*rankdefic_eq_ + 2*rankdefic_ineq_); - + + // RAJA doesn't like member objects + bool rankdefic_eq = rankdefic_eq_; + bool rankdefic_ineq = rankdefic_ineq_; + if(iJacS !=nullptr && jJacS != nullptr) { // serial part RAJA::forall(RAJA::RangeSegment(0, 1), @@ -339,7 +351,7 @@ bool SparseRajaEx2::eval_Jac_cons(const size_type& n, iJacS[3] = 1; jJacS[3] = 2; - if(rankdefic_ineq_) { + if(rankdefic_ineq) { // [-inf] <= 4*x_1 + 2*x_3 <= [ 19 ] iJacS[2*n-2] = n-1; jJacS[2*n-2] = 0; @@ -347,7 +359,7 @@ bool SparseRajaEx2::eval_Jac_cons(const size_type& n, jJacS[2*n-1] = 2; } - if(rankdefic_eq_) { + if(rankdefic_eq) { // 4*x_1 + 2*x_2 == 10 iJacS[2*m-2] = m-1; jJacS[2*m-2] = 0; @@ -380,13 +392,13 @@ bool SparseRajaEx2::eval_Jac_cons(const size_type& n, MJacS[2] = 2.0; MJacS[3] = 1.0; - if(rankdefic_ineq_) { + if(rankdefic_ineq) { // [-inf] <= 4*x_1 + 2*x_3 <= [ 19 ] MJacS[2*n-2] = 4.0; MJacS[2*n-1] = 2.0; - } + } - if(rankdefic_eq_) { + if(rankdefic_eq) { // 4*x_1 + 2*x_2 == 10 MJacS[2*m-2] = 4.0; MJacS[2*m-1] = 2.0; From 5fdb4d547560f5bfbc2ccfebc872fd394006792e Mon Sep 17 00:00:00 2001 From: nychiang Date: Fri, 10 Mar 2023 16:02:57 -0800 Subject: [PATCH 09/10] host data for NonlinearityType --- src/Drivers/Sparse/NlpSparseRajaEx2.cpp | 32 ++++++++++++++++++------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/src/Drivers/Sparse/NlpSparseRajaEx2.cpp b/src/Drivers/Sparse/NlpSparseRajaEx2.cpp index 55628a224..bd9fca748 100644 --- a/src/Drivers/Sparse/NlpSparseRajaEx2.cpp +++ b/src/Drivers/Sparse/NlpSparseRajaEx2.cpp @@ -159,13 +159,13 @@ bool SparseRajaEx2::get_vars_info(const size_type& n, double *xlow, double* xupp { xlow[0] = -1e20; xupp[0] = 1e20; - type[0] = hiopNonlinear; +// type[0] = hiopNonlinear; xlow[1] = 0.0; xupp[1] = 1e20; - type[1] = hiopNonlinear; +// type[1] = hiopNonlinear; xlow[2] = 1.0; xupp[2] = 10.0; - type[2] = hiopNonlinear; +// type[2] = hiopNonlinear; }); if(n>3) { @@ -174,10 +174,17 @@ bool SparseRajaEx2::get_vars_info(const size_type& n, double *xlow, double* xupp { xlow[i] = 0.5; xupp[i] = 1e20; - type[i] = hiopNonlinear; +// type[i] = hiopNonlinear; }); } + // Use a sequential policy for host computations for now + RAJA::forall(RAJA::RangeSegment(0, n), + [=] (RAJA::Index_type i) + { + type[i] = hiopNonlinear; + }); + return true; } @@ -197,23 +204,23 @@ bool SparseRajaEx2::get_cons_info(const size_type& m, double* clow, double* cupp { clow[0] = 10.0; cupp[0] = 10.0; - type[0] = hiopInterfaceBase::hiopNonlinear; +// type[0] = hiopInterfaceBase::hiopNonlinear; clow[1] = 5.0; cupp[1] = 1e20; - type[1] = hiopInterfaceBase::hiopNonlinear; +// type[1] = hiopInterfaceBase::hiopNonlinear; if(rankdefic_ineq) { // [-inf] <= 4*x_1 + 2*x_3 <= [ 19 ] clow[n-1] = -1e+20; cupp[n-1] = 19.; - type[n-1] = hiopInterfaceBase::hiopNonlinear; +// type[n-1] = hiopInterfaceBase::hiopNonlinear; } if(rankdefic_eq) { // 4*x_1 + 2*x_2 == 10 clow[m-1] = 10; cupp[m-1] = 10; - type[m-1] = hiopInterfaceBase::hiopNonlinear; +// type[m-1] = hiopInterfaceBase::hiopNonlinear; } }); @@ -223,10 +230,17 @@ bool SparseRajaEx2::get_cons_info(const size_type& m, double* clow, double* cupp { clow[conidx] = 1.0; cupp[conidx] = 2*n; - type[conidx] = hiopInterfaceBase::hiopNonlinear; +// type[conidx] = hiopInterfaceBase::hiopNonlinear; }); } + // Must be a sequential host policy for now + RAJA::forall(RAJA::RangeSegment(0, m), + [=] (RAJA::Index_type i) + { + type[i] = hiopNonlinear; + }); + return true; } From 3a4ee914bf651ec1451aaa748c7523c94ee8d95d Mon Sep 17 00:00:00 2001 From: nychiang Date: Mon, 13 Mar 2023 10:52:53 -0700 Subject: [PATCH 10/10] clean the code --- src/Optimization/hiopKKTLinSysSparse.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Optimization/hiopKKTLinSysSparse.cpp b/src/Optimization/hiopKKTLinSysSparse.cpp index 517b9fd79..c3c43c832 100644 --- a/src/Optimization/hiopKKTLinSysSparse.cpp +++ b/src/Optimization/hiopKKTLinSysSparse.cpp @@ -64,9 +64,6 @@ #include "hiopLinSolverSparseGinkgo.hpp" #endif -#ifdef HIOP_USE_GPU -#include "hiopMatrixRajaSparseTriplet.hpp" -#endif #endif namespace hiop