diff --git a/src/LinAlg/VectorCudaKernels.cu b/src/LinAlg/VectorCudaKernels.cu index b1c9a5911..405dfff78 100644 --- a/src/LinAlg/VectorCudaKernels.cu +++ b/src/LinAlg/VectorCudaKernels.cu @@ -640,6 +640,89 @@ __global__ void copyToStartingAt_w_pattern_cu(int n_src, } } +/** @brief process variable bounds */ +__global__ void process_bounds_cu(int n, + const double* xl, + const double* xu, + double* ixl, + double* ixu, + int* bnds_low, + int* bnds_upp, + int* bnds_lu, + int* fixed_vars, + double fixed_var_tol) +{ + const int num_threads = blockDim.x * gridDim.x; + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + + for (int i = tid; i < n; i += num_threads) { + // preemptive loop to reduce number of iterations? + bnds_low[i] = 0; + bnds_upp[i] = 0; + bnds_lu[i] = 0; + fixed_vars[i] = 0; + + if(xl[i] > -1e20) { + ixl[i] = 1.; + bnds_low[i] = 1; + if(xu[i] < 1e20) { + bnds_lu[i] = 1; + } + } else { + ixl[i] = 0.; + } + + if(xu[i] < 1e20) { + ixu[i] = 1.; + bnds_upp[i] = 1; + } else { + ixu[i] = 0.; + } + + if(xu[i] < 1e20 && + fabs(xl[i]-xu[i]) <= fixed_var_tol*fmax(1.,std::fabs(xu[i]))) { + fixed_vars[i] = 1; + } + } +} + +/** @brief relax variable bounds */ +__global__ void relax_bounds_cu(int n, + double* xla, + double* xua, + double fixed_var_tol, + double fixed_var_perturb) +{ + const int num_threads = blockDim.x * gridDim.x; + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + + for (int i = tid; i < n; i += num_threads) { + double xuabs = fabs(xua[i]); + if(fabs(xua[i]-xla[i]) <= fixed_var_tol*fmax(1.,xuabs)) { + xua[i] += fixed_var_perturb * std::fmax(1.,xuabs); + xla[i] -= fixed_var_perturb * std::fmax(1.,xuabs); + } + } +} + +/** @brief set d_ptr[i] = 1 if d1[i] == d2[i], otherwirse 0 */ +__global__ void set_if_match_cu(int n, + int* d_ptr, + const double* d1, + const double* d2) +{ + const int num_threads = blockDim.x * gridDim.x; + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + + for (int i = tid; i < n; i += num_threads) { + if(d1[i]==d2[i]) { + d_ptr[i] = 1; + } else { + d_ptr[i] = 0; + } + } +} + namespace hiop { namespace cuda @@ -1112,7 +1195,6 @@ double min_w_pattern_kernel(int n, const double* d1, const double* id, double ma thrust::device_ptr ret_dev_ptr = thrust::min_element(thrust::device, dv_ptr, dv_ptr+n); - // TODO: how to return double from device to host? double *ret_host = new double[1]; double *ret_ptr = thrust::raw_pointer_cast(ret_dev_ptr); cudaError_t cuerr = cudaMemcpy(ret_host, ret_ptr, (1)*sizeof(double), cudaMemcpyDeviceToHost); @@ -1276,7 +1358,88 @@ void copyToStartingAt_w_pattern_kernel(int n_src, dd); } +int num_match_local_kernel(int n, const double* d1, const double* d2) +{ + int num_blocks = (n+block_size-1)/block_size; + + // TODO: how to avoid this temp vec? + thrust::device_ptr dv_ptr = thrust::device_malloc(n*sizeof(int)); + int* d_ptr = thrust::raw_pointer_cast(dv_ptr); + + // set d_ptr[i] = 1 if d1[i] == d2[i], otherwirse 0 + set_if_match_cu<<>>(n, d_ptr, d1, d2); + + int rval = thrust::reduce(thrust::device, d_ptr, d_ptr+n, 0, thrust::plus()); + + thrust::device_free(dv_ptr); + + return rval; +} +/** @brief process variable bounds */ +void process_bounds_local_kernel(int n_local, + const double* xl, + const double* xu, + double* ixl, + double* ixu, + int& n_bnds_low, + int& n_bnds_upp, + int& n_bnds_lu, + int& n_fixed_vars, + double fixed_var_tol) +{ + int num_blocks = (n_local+block_size-1)/block_size; + + thrust::device_ptr bnds_low_d_ptr = thrust::device_malloc(n_local*sizeof(int)); + int* bnds_low_r_ptr = thrust::raw_pointer_cast(bnds_low_d_ptr); + + thrust::device_ptr bnds_upp_d_ptr = thrust::device_malloc(n_local*sizeof(int)); + int* bnds_upp_r_ptr = thrust::raw_pointer_cast(bnds_upp_d_ptr); + + thrust::device_ptr bnds_lu_d_ptr = thrust::device_malloc(n_local*sizeof(int)); + int* bnds_lu_r_ptr = thrust::raw_pointer_cast(bnds_lu_d_ptr); + + thrust::device_ptr n_fixed_vars_d_ptr = thrust::device_malloc(n_local*sizeof(int)); + int* n_fixed_vars_r_ptr = thrust::raw_pointer_cast(n_fixed_vars_d_ptr); + + // set values + process_bounds_cu<<>>(n_local, + xl, + xu, + ixl, + ixu, + bnds_low_r_ptr, + bnds_upp_r_ptr, + bnds_lu_r_ptr, + n_fixed_vars_r_ptr, + fixed_var_tol); + + // compute sum + n_bnds_low = thrust::reduce(thrust::device, bnds_low_d_ptr, bnds_low_d_ptr+n_local, 0.0, thrust::plus()); + n_bnds_upp = thrust::reduce(thrust::device, bnds_upp_d_ptr, bnds_upp_d_ptr+n_local, 0.0, thrust::plus()); + n_bnds_lu = thrust::reduce(thrust::device, bnds_lu_d_ptr, bnds_lu_d_ptr+n_local, 0.0, thrust::plus()); + n_fixed_vars = thrust::reduce(thrust::device, n_fixed_vars_d_ptr, n_fixed_vars_d_ptr+n_local, 0.0, thrust::plus()); + + thrust::device_free(bnds_low_d_ptr); + thrust::device_free(bnds_upp_d_ptr); + thrust::device_free(bnds_lu_d_ptr); + thrust::device_free(n_fixed_vars_d_ptr); +} + +/** @brief relax variable bounds */ +void relax_bounds_kernel(int n_local, + double* xl, + double* xu, + double fixed_var_tol, + double fixed_var_perturb) +{ + int num_blocks = (n_local+block_size-1)/block_size; + relax_bounds_cu<<>>(n_local, + xl, + xu, + fixed_var_tol, + fixed_var_perturb); +} /// for hiopVectorIntCuda /** diff --git a/src/LinAlg/VectorCudaKernels.hpp b/src/LinAlg/VectorCudaKernels.hpp index 90fe1f355..3d8cf6cba 100644 --- a/src/LinAlg/VectorCudaKernels.hpp +++ b/src/LinAlg/VectorCudaKernels.hpp @@ -296,6 +296,28 @@ void copyToStartingAt_w_pattern_kernel(int n_src, double *vd, const double* dd); +/// @brief return the numbers of identical elements between two vectors +int num_match_local_kernel(int n, const double* d1, const double* d2); + +/** @brief process variable bounds */ +void process_bounds_local_kernel(int n_local, + const double* xl, + const double* xu, + double* ixl, + double* ixu, + int& n_bnds_low, + int& n_bnds_upp, + int& n_bnds_lu, + int& n_fixed_vars, + double fixed_var_tol); + +/** @brief relax variable bounds */ +void relax_bounds_kernel(int n_local, + double* xl, + double* xu, + double fixed_var_tol, + double fixed_var_perturb); + /// for hiopVectorIntCuda /** * @brief Set the vector entries to be a linear space of starting at i0 containing evenly diff --git a/src/LinAlg/VectorHipKernels.cpp b/src/LinAlg/VectorHipKernels.cpp index 413ee183f..bcea53a66 100644 --- a/src/LinAlg/VectorHipKernels.cpp +++ b/src/LinAlg/VectorHipKernels.cpp @@ -635,6 +635,89 @@ __global__ void copyToStartingAt_w_pattern_hip(int n_src, } } +/** @brief process variable bounds */ +__global__ void process_bounds_hip(int n, + const double* xl, + const double* xu, + double* ixl, + double* ixu, + double* bnds_low, + double* bnds_upp, + double* bnds_lu, + double* fixed_vars, + double fixed_var_tol) +{ + const int num_threads = blockDim.x * gridDim.x; + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + + for (int i = tid; i < n; i += num_threads) { + // preemptive loop to reduce number of iterations? + bnds_low[i] = 0; + bnds_upp[i] = 0; + bnds_lu[i] = 0; + fixed_vars[i] = 0; + + if(xl[i] > -1e20) { + ixl[i] = 1.; + bnds_low[i] = 1; + if(xu[i] < 1e20) { + bnds_lu[i] = 1; + } + } else { + ixl[i] = 0.; + } + + if(xu[i] < 1e20) { + ixu[i] = 1.; + bnds_upp[i] = 1; + } else { + ixu[i] = 0.; + } + + if(xu[i] < 1e20 && + fabs(xl[i]-xu[i]) <= fixed_var_tol*fmax(1.,std::fabs(xu[i]))) { + fixed_vars[i] = 1; + } + } +} + +/** @brief relax variable bounds */ +__global__ void relax_bounds_hip(int n, + double* xla, + double* xua, + double fixed_var_tol, + double fixed_var_perturb) +{ + const int num_threads = blockDim.x * gridDim.x; + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + + for (int i = tid; i < n; i += num_threads) { + double xuabs = fabs(xua[i]); + if(fabs(xua[i]-xla[i]) <= fixed_var_tol*fmax(1.,xuabs)) { + xua[i] += fixed_var_perturb * std::fmax(1.,xuabs); + xla[i] -= fixed_var_perturb * std::fmax(1.,xuabs); + } + } +} + +/** @brief set d_ptr[i] = 1 if d1[i] == d2[i], otherwirse 0 */ +__global__ void set_if_match_hip(int n, + int* d_ptr, + const double* d1, + const double* d2) +{ + const int num_threads = blockDim.x * gridDim.x; + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + + for (int i = tid; i < n; i += num_threads) { + if(d1[i]==d2[i]) { + d_ptr[i] = 1; + } else { + d_ptr[i] = 0; + } + } +} + namespace hiop { namespace hip @@ -1107,7 +1190,6 @@ double min_w_pattern_kernel(int n, const double* d1, const double* id, double ma thrust::device_ptr ret_dev_ptr = thrust::min_element(thrust::device, dv_ptr, dv_ptr+n); - // TODO: how to return double from device to host? double *ret_host = new double[1]; double *ret_ptr = thrust::raw_pointer_cast(ret_dev_ptr); hipError_t cuerr = hipMemcpy(ret_host, ret_ptr, (1)*sizeof(double), hipMemcpyDeviceToHost); @@ -1271,7 +1353,88 @@ void copyToStartingAt_w_pattern_kernel(int n_src, dd); } +int num_match_local_kernel(int n, const double* d1, const double* d2) +{ + int num_blocks = (n+block_size-1)/block_size; + + // TODO: how to avoid this temp vec? + thrust::device_ptr dv_ptr = thrust::device_malloc(n*sizeof(int)); + int* d_ptr = thrust::raw_pointer_cast(dv_ptr); + + // set d_ptr[i] = 1 if d1[i] == d2[i], otherwirse 0 + set_if_match_hip<<>>(n, d_ptr, d1, d2); + + int rval = thrust::reduce(thrust::device, d_ptr, d_ptr+n, 0, thrust::plus()); + + thrust::device_free(dv_ptr); + + return rval; +} +/** @brief process variable bounds */ +void process_bounds_local_kernel(int n_local, + const double* xl, + const double* xu, + double* ixl, + double* ixu, + int& n_bnds_low, + int& n_bnds_upp, + int& n_bnds_lu, + int& n_fixed_vars, + double fixed_var_tol) +{ + int num_blocks = (n_local+block_size-1)/block_size; + + thrust::device_ptr bnds_low_d_ptr = thrust::device_malloc(n_local*sizeof(int)); + int* bnds_low_r_ptr = thrust::raw_pointer_cast(bnds_low_d_ptr); + + thrust::device_ptr bnds_upp_d_ptr = thrust::device_malloc(n_local*sizeof(int)); + int* bnds_upp_r_ptr = thrust::raw_pointer_cast(bnds_upp_d_ptr); + + thrust::device_ptr bnds_lu_d_ptr = thrust::device_malloc(n_local*sizeof(int)); + int* bnds_lu_r_ptr = thrust::raw_pointer_cast(bnds_lu_d_ptr); + + thrust::device_ptr n_fixed_vars_d_ptr = thrust::device_malloc(n_local*sizeof(int)); + int* n_fixed_vars_r_ptr = thrust::raw_pointer_cast(n_fixed_vars_d_ptr); + + // set values + process_bounds_hip<<>>(n, + xl, + xu, + ixl, + ixu, + bnds_low_r_ptr, + bnds_upp_r_ptr, + bnds_lu_r_ptr, + n_fixed_vars_r_ptr, + fixed_var_tol); + + // compute sum + n_bnds_low = thrust::reduce(thrust::device, bnds_low_d_ptr, bnds_low_d_ptr+n_local, 0.0, thrust::plus()); + n_bnds_upp = thrust::reduce(thrust::device, bnds_upp_d_ptr, bnds_upp_d_ptr+n_local, 0.0, thrust::plus()); + n_bnds_lu = thrust::reduce(thrust::device, bnds_lu_d_ptr, bnds_lu_d_ptr+n_local, 0.0, thrust::plus()); + n_fixed_vars = thrust::reduce(thrust::device, n_fixed_vars_d_ptr, n_fixed_vars_d_ptr+n_local, 0.0, thrust::plus()); + + thrust::device_free(bnds_low_d_ptr); + thrust::device_free(bnds_upp_d_ptr); + thrust::device_free(bnds_lu_d_ptr); + thrust::device_free(n_fixed_vars_d_ptr); +} + +/** @brief relax variable bounds */ +void relax_bounds_kernel(int n_local, + double* xl, + double* xu, + double fixed_var_tol, + double fixed_var_perturb) +{ + int num_blocks = (n_local+block_size-1)/block_size; + relax_bounds_hip<<>>(n_local, + xl, + xu, + fixed_var_tol, + fixed_var_perturb); +} /// for hiopVectorIntHip /** diff --git a/src/LinAlg/VectorHipKernels.hpp b/src/LinAlg/VectorHipKernels.hpp index b1c0b8022..ece799f18 100644 --- a/src/LinAlg/VectorHipKernels.hpp +++ b/src/LinAlg/VectorHipKernels.hpp @@ -296,6 +296,28 @@ void copyToStartingAt_w_pattern_kernel(int n_src, double *vd, const double* dd); +/// @brief return the numbers of identical elements between two vectors +int num_match_local_kernel(int n, const double* d1, const double* d2); + +/** @brief process variable bounds */ +void process_bounds_local_kernel(int n_local, + const double* xl, + const double* xu, + double* ixl, + double* ixu, + int& n_bnds_low, + int& n_bnds_upp, + int& n_bnds_lu, + int& n_fixed_vars, + double fixed_var_tol); + +/** @brief relax variable bounds */ +void relax_bounds_kernel(int n_local, + double* xl, + double* xu, + double fixed_var_tol, + double fixed_var_perturb); + /// for hiopVectorIntHip /** * @brief Set the vector entries to be a linear space of starting at i0 containing evenly diff --git a/src/LinAlg/hiopVector.hpp b/src/LinAlg/hiopVector.hpp index 289583481..80c720004 100644 --- a/src/LinAlg/hiopVector.hpp +++ b/src/LinAlg/hiopVector.hpp @@ -998,10 +998,58 @@ class hiopVector * @brief check if `this` vector is identical to `vec` * * @param[in] vec - vector used to be compared with `this` - * @todo: add unit test, or should we remove this function? + * @todo: should we remove this function? */ virtual bool is_equal(const hiopVector& vec) const = 0; + /** + * @brief return the numbers of identical elements between two vectors + * + * @param[in] vec - vector used to be compared with `this` + * @post `vec` is not modified + */ + virtual size_type num_match(const hiopVector& vec) const = 0; + + /** + * @brief preprocess bounds in a form supported by the NLP formulation. Returns counts of + * the variables with lower, upper, and lower and upper bounds, as well of the fixed + * variables. + * + * @param[in] this - lower bound of primal variable `x` + * @param[in] xu - lower bound of primal variable `x` + * @param[in] ixl - index of the variables with lower bounds + * @param[in] ixu - index of the variables with upper bounds + * @param[out] n_bnds_low - number of variables with lower bounds only + * @param[out] n_bnds_upp - number of variables with upper bounds only + * @param[out] n_bnds_lu - number of variables with both lower and upper bounds + * @param[out] n_fixed_vars - number of fixed variables + * @param[in] fixed_var_tol - tolerance used to define fixed variables + * + * @pre this is a local method + */ + virtual bool process_bounds_local(const hiopVector& xu, + hiopVector& ixl, + hiopVector& ixu, + size_type& n_bnds_low, + size_type& n_bnds_upp, + size_type& n_bnds_lu, + size_type& n_fixed_vars, + const double& fixed_var_tol) = 0; + + /** + * @brief relax variable bounds + * + * @param[in] this - lower bound of primal variable `x` + * @param[in] xu - lower bound of primal variable `x` + * @param[in] fixed_var_tol - tolerance used to define fixed variables + * @param[in] fixed_var_perturb - perturbation added to bounds + * + * @pre this is a local method + */ + virtual void relax_bounds_vec(hiopVector& xu, + const double& fixed_var_tol, + const double& fixed_var_perturb) = 0; + protected: size_type n_; //we assume sequential data protected: diff --git a/src/LinAlg/hiopVectorCuda.cpp b/src/LinAlg/hiopVectorCuda.cpp index 0be1e2eaf..1bf877992 100644 --- a/src/LinAlg/hiopVectorCuda.cpp +++ b/src/LinAlg/hiopVectorCuda.cpp @@ -1087,6 +1087,69 @@ bool hiopVectorCuda::is_equal(const hiopVector& vec) const assert(false&&"NOT needed. Remove this func. TODO"); } +size_type hiopVectorCuda::num_match(const hiopVector& vec) const +{ + const double* dd = data_; + const double* vd = vec.local_data_const(); + int sum_match = sum_match = hiop::cuda::num_match_local_kernel(n_local_, dd, vd); + +#ifdef HIOP_USE_MPI + int sumG; + int ierr=MPI_Allreduce(&sum_match, &sumG, 1, MPI_INT, MPI_SUM, comm_); assert(MPI_SUCCESS==ierr); + return sumG; +#endif + return sum_match; +} + +bool hiopVectorCuda::process_bounds_local(const hiopVector& xu, + hiopVector& ixl, + hiopVector& ixu, + size_type& n_bnds_low, + size_type& n_bnds_upp, + size_type& n_bnds_lu, + size_type& n_fixed_vars, + const double& fixed_var_tol) +{ +#ifdef HIOP_DEEPCHECKS + assert(xu.get_local_size()==n_local_); + assert(ixl.get_local_size()==n_local_); + assert(ixu.get_local_size()==n_local_); +#endif + const double* xl_arr = data_; + const double* xu_arr = xu.local_data_const(); + double* ixl_arr = ixl.local_data(); + double* ixu_arr = ixu.local_data(); + + hiop::cuda::process_bounds_local_kernel(n_local_, + xl_arr, + xu_arr, + ixl_arr, + ixu_arr, + n_bnds_low, + n_bnds_upp, + n_bnds_lu, + n_fixed_vars, + fixed_var_tol); + + return true; +} + +void hiopVectorCuda::relax_bounds_vec(hiopVector& xu, + const double& fixed_var_tol, + const double& fixed_var_perturb) +{ +#ifdef HIOP_DEEPCHECKS + assert(xu.get_local_size()==n_local_); +#endif + double* xl_arr = data_; + double* xu_arr = xu.local_data(); + + hiop::cuda::relax_bounds_kernel(n_local_, + xl_arr, + xu_arr, + fixed_var_tol, + fixed_var_perturb); +} } // namespace hiop diff --git a/src/LinAlg/hiopVectorCuda.hpp b/src/LinAlg/hiopVectorCuda.hpp index 5d504521d..7ec1b3faa 100644 --- a/src/LinAlg/hiopVectorCuda.hpp +++ b/src/LinAlg/hiopVectorCuda.hpp @@ -347,6 +347,21 @@ class hiopVectorCuda : public hiopVector /// @brief check if `this` vector is identical to `vec` virtual bool is_equal(const hiopVector& vec) const; + virtual size_type num_match(const hiopVector& vec) const; + + virtual bool process_bounds_local(const hiopVector& xu, + hiopVector& ixl, + hiopVector& ixu, + size_type& n_bnds_low, + size_type& n_bnds_upp, + size_type& n_bnds_lu, + size_type& n_fixed_vars, + const double& fixed_var_tol); + + virtual void relax_bounds_vec(hiopVector& xu, + const double& fixed_var_tol, + const double& fixed_var_perturb); + /* functions for this class */ inline MPI_Comm get_mpi_comm() const { return comm_; } diff --git a/src/LinAlg/hiopVectorHip.cpp b/src/LinAlg/hiopVectorHip.cpp index ab5b88f15..e2a173468 100644 --- a/src/LinAlg/hiopVectorHip.cpp +++ b/src/LinAlg/hiopVectorHip.cpp @@ -940,9 +940,9 @@ bool hiopVectorHip::matchesPattern(const hiopVector& pattern) /** @brief Adjusts duals. */ void hiopVectorHip::adjustDuals_plh(const hiopVector& xvec, - const hiopVector& ixvec, - const double& mu, - const double& kappa) + const hiopVector& ixvec, + const double& mu, + const double& kappa) { #ifdef HIOP_DEEPCHECKS assert(xvec.get_local_size()==n_local_); @@ -1091,6 +1091,69 @@ bool hiopVectorHip::is_equal(const hiopVector& vec) const assert(false&&"NOT needed. Remove this func. TODO"); } +size_type hiopVectorHip::num_match(const hiopVector& vec) const +{ + const double* dd = data_; + const double* vd = vec.local_data_const(); + int sum_match = sum_match = hiop::cuda::num_match_local_kernel(n_local_, dd, vd); + +#ifdef HIOP_USE_MPI + int sumG; + int ierr=MPI_Allreduce(&sum_match, &sumG, 1, MPI_INT, MPI_SUM, comm_); assert(MPI_SUCCESS==ierr); + return sumG; +#endif + return sum_match; +} + +bool hiopVectorHip::process_bounds_local(const hiopVector& xu, + hiopVector& ixl, + hiopVector& ixu, + size_type& n_bnds_low, + size_type& n_bnds_upp, + size_type& n_bnds_lu, + size_type& n_fixed_vars, + const double& fixed_var_tol) +{ +#ifdef HIOP_DEEPCHECKS + assert(xu.get_local_size()==n_local_); + assert(ixl.get_local_size()==n_local_); + assert(ixu.get_local_size()==n_local_); +#endif + const double* xl_arr = data_; + const double* xu_arr = xu.local_data_const(); + double* ixl_arr = ixl.local_data(); + double* ixu_arr = ixu.local_data(); + + hiop::hip::process_bounds_local_kernel(n_local_, + xl_arr, + xu_arr, + ixl_arr, + ixu_arr, + n_bnds_low, + n_bnds_upp, + n_bnds_lu, + n_fixed_vars, + fixed_var_tol); + + return true; +} + +void hiopVectorHip::relax_bounds_vec(hiopVector& xu, + const double& fixed_var_tol, + const double& fixed_var_perturb) +{ +#ifdef HIOP_DEEPCHECKS + assert(xu.get_local_size()==n_local_); +#endif + const double* xl_arr = data_; + const double* xu_arr = xu.local_data_const(); + + hiop::hip::process_bounds_local_kernel(n_local_, + xl_arr, + xu_arr, + fixed_var_tol, + fixed_var_perturb); +} } // namespace hiop diff --git a/src/LinAlg/hiopVectorHip.hpp b/src/LinAlg/hiopVectorHip.hpp index addaf1d03..de594201c 100644 --- a/src/LinAlg/hiopVectorHip.hpp +++ b/src/LinAlg/hiopVectorHip.hpp @@ -349,6 +349,21 @@ class hiopVectorHip : public hiopVector /// @brief check if `this` vector is identical to `vec` virtual bool is_equal(const hiopVector& vec) const; + virtual size_type num_match(const hiopVector& vec) const; + + virtual bool process_bounds_local(const hiopVector& xu, + hiopVector& ixl, + hiopVector& ixu, + size_type& n_bnds_low, + size_type& n_bnds_upp, + size_type& n_bnds_lu, + size_type& n_fixed_vars, + const double& fixed_var_tol); + + virtual void relax_bounds_vec(hiopVector& xu, + const double& fixed_var_tol, + const double& fixed_var_perturb); + /* functions for this class */ inline MPI_Comm get_mpi_comm() const { return comm_; } diff --git a/src/LinAlg/hiopVectorPar.cpp b/src/LinAlg/hiopVectorPar.cpp index 0fb38bd89..b50927729 100644 --- a/src/LinAlg/hiopVectorPar.cpp +++ b/src/LinAlg/hiopVectorPar.cpp @@ -1283,5 +1283,127 @@ bool hiopVectorPar::is_equal(const hiopVector& vec) const } +size_type hiopVectorPar::num_match(const hiopVector& vec) const +{ + if(n_local_ != vec.get_local_size()) { + return 0; + } + int sum_match = 0; + const double* data_v = vec.local_data_const(); + for(auto i=0; ilocal_data_const(); + const double* xu_vec = xu.local_data_const(); + double* ixl_vec = ixl.local_data(); + double* ixu_vec = ixu.local_data(); + + int nlocal = this->get_local_size(); + for(int i=0;i -1e20) { + ixl_vec[i] = 1.; + n_bnds_low++; + if(xu_vec[i] < 1e20) { + n_bnds_lu++; + } + } else { + ixl_vec[i] = 0.; + } + + if(xu_vec[i] < 1e20) { + ixu_vec[i] = 1.; + n_bnds_upp++; + } else { + ixu_vec[i] = 0.; + } + +#ifdef HIOP_DEEPCHECKS + assert(xl_vec[i] <= xu_vec[i] && "please fix the inconsistent bounds, otherwise the problem is infeasible"); +#endif + + if( xu_vec[i] < 1e20 && + fabs(xl_vec[i]-xu_vec[i]) <= fixed_var_tol*std::fmax(1.,std::fabs(xu_vec[i]))) { + n_fixed_vars++; + } else { +#ifdef HIOP_DEEPCHECKS +#define min_dist 1e-8 + const int maxBndsCloseMsgs=3; + int nBndsClose=0; + int myrank_ = 0; + int numranks = 1; +#ifdef HIOP_USE_MPI + int err = MPI_Comm_rank(comm_, &myrank_); assert(err==MPI_SUCCESS); + err = MPI_Comm_size(comm_, &numranks); assert(err==MPI_SUCCESS); +#endif + + if(fixed_var_tollocal_data(); + double *xua = xu.local_data(); + size_type n = this->get_local_size(); + + double xuabs; + for(index_type i=0; i& exec_space() const { return exec_space_; diff --git a/src/LinAlg/hiopVectorRajaImpl.hpp b/src/LinAlg/hiopVectorRajaImpl.hpp index 6dcb111a6..6e8b552cc 100644 --- a/src/LinAlg/hiopVectorRajaImpl.hpp +++ b/src/LinAlg/hiopVectorRajaImpl.hpp @@ -2232,4 +2232,123 @@ bool hiopVectorRaja::is_equal(const hiopVector& vec) const return all_equal; } +template +size_type hiopVectorRaja::num_match(const hiopVector& vec) const +{ +#ifdef HIOP_DEEPCHECKS + const hiopVectorRaja& v = dynamic_cast&>(vec); + assert(v.n_local_ == n_local_); +#endif + + const double* data_v = vec.local_data_const(); + const double* data = data_dev_; + RAJA::ReduceSum< hiop_raja_reduce, int > sum(0); + RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) + { + if(data[i]==data_v[i]) { + sum += 1; + } + }); + int all_equal = sum.get(); + +#ifdef HIOP_USE_MPI + int all_equalG; + int ierr = MPI_Allreduce(&all_equal, &all_equalG, 1, MPI_INT, MPI_SUM, comm_); + assert(MPI_SUCCESS==ierr); + return all_equalG; +#endif + return all_equal; +} + +template +bool hiopVectorRaja::process_bounds_local(const hiopVector& xu, + hiopVector& ixl, + hiopVector& ixu, + size_type& n_bnds_low, + size_type& n_bnds_upp, + size_type& n_bnds_lu, + size_type& n_fixed_vars, + const double& fixed_var_tol) +{ +#ifdef HIOP_DEEPCHECKS + const hiopVectorRaja& vxu = dynamic_cast&>(xu); + const hiopVectorRaja& vixl = dynamic_cast&>(ixl); + const hiopVectorRaja& vixu = dynamic_cast&>(ixu); + assert(vxu.n_local_ == this->n_local_); + assert(vixl.n_local_ == this->n_local_); + assert(vixu.n_local_ == this->n_local_); +#endif + + const double* xl_vec = this->local_data_const(); + const double* xu_vec = xu.local_data_const(); + double* ixl_vec = ixl.local_data(); + double* ixu_vec = ixu.local_data(); + + size_type nlocal = this->get_local_size(); + + RAJA::ReduceSum< hiop_raja_reduce, int > sum_n_bnds_low(0); + RAJA::ReduceSum< hiop_raja_reduce, int > sum_n_bnds_upp(0); + RAJA::ReduceSum< hiop_raja_reduce, int > sum_n_bnds_lu(0); + RAJA::ReduceSum< hiop_raja_reduce, int > sum_n_fixed_vars(0); + + RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, nlocal), + RAJA_LAMBDA(RAJA::Index_type i) + { + if(xl_vec[i] > -1e20) { + ixl_vec[i] = 1.; + sum_n_bnds_low += 1; + if(xu_vec[i] < 1e20) { + sum_n_bnds_lu += 1; + } + } else { + ixl_vec[i] = 0.; + } + + if(xu_vec[i] < 1e20) { + ixu_vec[i] = 1.; + sum_n_bnds_upp += 1; + } else { + ixu_vec[i] = 0.; + } + + if(xu_vec[i] < 1e20 && + fabs(xl_vec[i]-xu_vec[i]) <= fixed_var_tol*std::fmax(1.,std::fabs(xu_vec[i]))) { + sum_n_fixed_vars += 1; + } + }); + + n_bnds_low = sum_n_bnds_low.get(); + n_bnds_upp = sum_n_bnds_upp.get(); + n_bnds_lu = sum_n_bnds_lu.get(); + n_fixed_vars = sum_n_fixed_vars.get(); + + return true; +} + +template +void hiopVectorRaja::relax_bounds_vec(hiopVector& xu, + const double& fixed_var_tol, + const double& fixed_var_perturb) +{ +#ifdef HIOP_DEEPCHECKS + const hiopVectorRaja& vxu = dynamic_cast&>(xu); + assert(vxu.n_local_ == this->n_local_); +#endif + + double *xla = this->local_data(); + double *xua = xu.local_data(); + size_type n = this->get_local_size(); + + RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n), + RAJA_LAMBDA(RAJA::Index_type i) + { + double xuabs = std::fabs(xua[i]); + if(std::fabs(xua[i]-xla[i]) <= fixed_var_tol*std::fmax(1.,xuabs)) { + xua[i] += fixed_var_perturb * std::fmax(1.,xuabs); + xla[i] -= fixed_var_perturb * std::fmax(1.,xuabs); + } + }); +} + } // namespace hiop diff --git a/src/Optimization/hiopNlpFormulation.cpp b/src/Optimization/hiopNlpFormulation.cpp index b8fb9d6d3..ac50c04fa 100644 --- a/src/Optimization/hiopNlpFormulation.cpp +++ b/src/Optimization/hiopNlpFormulation.cpp @@ -193,8 +193,8 @@ bool hiopNlpFormulation::finalizeInitialization() if(strFixedVars_ != options->GetString("fixed_var")) { doinit=true; } - const double fixedVarTol = options->GetNumeric("fixed_var_tolerance"); - if(dFixedVarsTol_ != fixedVarTol) { + const double fixed_var_tol = options->GetNumeric("fixed_var_tolerance"); + if(dFixedVarsTol_ != fixed_var_tol) { doinit=true; } @@ -257,12 +257,12 @@ bool hiopNlpFormulation::finalizeInitialization() //preprocess variables bounds - this is curently done on the CPU // size_type nfixed_vars_local; - process_bounds(n_bnds_low_local_,n_bnds_upp_local_, n_bnds_lu_, nfixed_vars_local); + process_bounds(n_bnds_low_local_, n_bnds_upp_local_, n_bnds_lu_, nfixed_vars_local); /////////////////////////////////////////////////////////////////////////// // Handling of fixed variables ////////////////////////////////////////////////////////////////////////// - dFixedVarsTol_ = fixedVarTol; + dFixedVarsTol_ = fixed_var_tol; size_type nfixed_vars=nfixed_vars_local; #ifdef HIOP_USE_MPI int ierr = MPI_Allreduce(&nfixed_vars_local, &nfixed_vars, 1, MPI_HIOP_SIZE_TYPE, MPI_SUM, comm_); @@ -277,11 +277,15 @@ bool hiopNlpFormulation::finalizeInitialization() // remove free variables // log->printf(hovWarning, "Fixed variables will be removed internally.\n"); + if(options->GetString("compute_mode")=="gpu") { + assert(false && "HiOp hasn't support removing fixed variables under GPU mode yet."); + return false; + } fixedVarsRemover = new hiopFixedVarsRemover(this, *xl_, *xu_, - fixedVarTol, + fixed_var_tol, nfixed_vars, nfixed_vars_local); @@ -417,7 +421,7 @@ bool hiopNlpFormulation::finalizeInitialization() cons_body_ = nullptr; delete cons_Jac_; - cons_Jac_ = NULL; + cons_Jac_ = nullptr; delete cons_lambdas_; cons_lambdas_ = nullptr; @@ -437,91 +441,16 @@ bool hiopNlpFormulation::finalizeInitialization() bool hiopNlpFormulation::process_bounds(size_type& n_bnds_low, size_type& n_bnds_upp, size_type& n_bnds_lu, - size_type& nfixed_vars) + size_type& n_fixed_vars) { - - n_bnds_low = 0; - n_bnds_upp = 0; - n_bnds_lu = 0; - nfixed_vars = 0; - -#if !defined(HIOP_USE_MPI) - int* vec_distrib_ = nullptr; - MPI_Comm comm_ = MPI_COMM_SELF; -#endif - hiopVectorPar xl_tmp(n_vars_, vec_distrib_, comm_); - hiopVectorPar xu_tmp(n_vars_, vec_distrib_, comm_); - hiopVectorPar ixl_tmp(n_vars_, vec_distrib_, comm_); - hiopVectorPar ixu_tmp(n_vars_, vec_distrib_, comm_); - - this->xl_->copy_to_vectorpar(xl_tmp); - this->xu_->copy_to_vectorpar(xu_tmp); - this->ixl_->copy_to_vectorpar(ixl_tmp); - this->ixu_->copy_to_vectorpar(ixu_tmp); - - double *ixl_vec = ixl_tmp.local_data_host(); - double *ixu_vec = ixu_tmp.local_data_host(); - - double* xl_vec = xl_tmp.local_data_host(); - double* xu_vec = xu_tmp.local_data_host(); -#ifdef HIOP_DEEPCHECKS - const int maxBndsCloseMsgs=3; int nBndsClose=0; -#endif - const double fixedVarTol = options->GetNumeric("fixed_var_tolerance"); - int nlocal=xl_->get_local_size(); - for(int i=0;i -1e20) { - ixl_vec[i] = 1.; - n_bnds_low++; - if(xu_vec[i] < 1e20) { - n_bnds_lu++; - } - } else { - ixl_vec[i] = 0.; - } - - if(xu_vec[i] < 1e20) { - ixu_vec[i] = 1.; - n_bnds_upp++; - } else { - ixu_vec[i] = 0.; - } - -#ifdef HIOP_DEEPCHECKS - assert(xl_vec[i] <= xu_vec[i] && "please fix the inconsistent bounds, otherwise the problem is infeasible"); -#endif - - //if(xl_vec[i]==xu_vec[i]) { - if( xu_vec[i]<1e20 && - fabs(xl_vec[i]-xu_vec[i]) <= fixedVarTol*fmax(1.,fabs(xu_vec[i]))) { - nfixed_vars++; - } else { -#ifdef HIOP_DEEPCHECKS -#define min_dist 1e-8 - if(fixedVarTolprintf(hovWarning, - "Lower (%g) and upper bound (%g) for variable %d are very close. " - "Consider fixing this variable or increase 'fixed_var_tolerance'.\n", - i, xl_vec[i], xu_vec[i]); - nBndsClose++; - } - } - if(nBndsClose==maxBndsCloseMsgs) { - log->printf(hovWarning, "[further messages were surpressed]\n"); - nBndsClose++; - } - } -#endif - } - } - - this->xl_->copy_from_vectorpar(xl_tmp); - this->xu_->copy_from_vectorpar(xu_tmp); - this->ixl_->copy_from_vectorpar(ixl_tmp); - this->ixu_->copy_from_vectorpar(ixu_tmp); - + this->xl_->process_bounds_local(*this->xu_, + *this->ixl_, + *this->ixu_, + n_bnds_low, + n_bnds_upp, + n_bnds_lu, + n_fixed_vars, + options->GetNumeric("fixed_var_tolerance")); return true; } @@ -556,24 +485,18 @@ bool hiopNlpFormulation::process_constraints() assert(gl->get_local_size()==n_cons_); assert(gu->get_local_size()==n_cons_); + n_cons_eq_ = gl->num_match(*gu); + n_cons_ineq_ = n_cons_ - n_cons_eq_; + // transfer to host hiopVectorPar gl_host(n_cons_); hiopVectorPar gu_host(n_cons_); gl->copy_to_vectorpar(gl_host); gu->copy_to_vectorpar(gu_host); - + double* gl_vec = gl_host.local_data(); double* gu_vec = gu_host.local_data(); - n_cons_eq_ = 0; - n_cons_ineq_ = 0; - for(int i=0;i=0) { - M_rs[i*nrs+rs_idx] = M_fs[i*nfs+j]; + M_rs[i*nrs+rs_idx] = M_fs[i*nfs+j]; } } } @@ -291,38 +291,19 @@ hiopFixedVarsRelaxer(hiopNlpFormulation* nlp, const size_type& numFixedVars, const size_type& numFixedVars_local) : hiopNlpTransformation(nlp), - xl_copy(NULL), xu_copy(NULL), n_vars(xl.get_size()), n_vars_local(xl.get_local_size()) + n_vars(xl.get_size()), + n_vars_local(xl.get_local_size()) { - //xl_copy = xl.new_copy(); // no need to copy at this point - //xu_copy = xu.new_copy(); // no need to copy at this point } hiopFixedVarsRelaxer::~hiopFixedVarsRelaxer() { - if(xl_copy) delete xl_copy; - if(xu_copy) delete xu_copy; } void hiopFixedVarsRelaxer:: relax(const double& fixed_var_tol, const double& fixed_var_perturb, hiopVector& xl, hiopVector& xu) { - double *xla=xl.local_data(), *xua=xu.local_data(), *v; - size_type n=xl.get_local_size(); - double xuabs; - for(index_type i=0; i1.) { @@ -464,9 +443,4 @@ hiopNLPObjGradScaling::~hiopNLPObjGradScaling() if(scale_factor_cd) delete scale_factor_cd; } - - - - - } //end of namespace diff --git a/src/Optimization/hiopNlpTransforms.hpp b/src/Optimization/hiopNlpTransforms.hpp index 9c7f37d61..d0634bf7f 100644 --- a/src/Optimization/hiopNlpTransforms.hpp +++ b/src/Optimization/hiopNlpTransforms.hpp @@ -153,7 +153,7 @@ class hiopFixedVarsRemover : public hiopNlpTransformation hiopFixedVarsRemover(hiopNlpFormulation* nlp, const hiopVector& xl, const hiopVector& xu, - const double& fixedVarTol, + const double& fixed_var_tol, const size_type& numFixedVars, const size_type& numFixedVars_local); ~hiopFixedVarsRemover(); @@ -287,7 +287,7 @@ class hiopFixedVarsRemover : public hiopNlpTransformation size_type n_fixed_vars_local; size_type n_fixed_vars; - double fixedVarTol; + double fixed_var_tol_; size_type n_fs; //full-space n size_type n_rs; //reduced-space n @@ -326,20 +326,20 @@ class hiopFixedVarsRelaxer : public hiopNlpTransformation virtual ~hiopFixedVarsRelaxer(); /* number of vars in the NLP after the tranformation */ - inline size_type n_post() { /*assert(xl_copy);*/ return n_vars; } //xl_copy->get_size(); } + inline size_type n_post() {return n_vars;} /* number of vars in the NLP to which the tranformation is to be applied */ - virtual size_type n_pre () { /*assert(xl_copy);*/ return n_vars; } //xl_copy->get_size(); } + virtual size_type n_pre() {return n_vars;} - inline size_type n_post_local() { return n_vars_local; } //xl_copy->get_local_size(); } - inline size_type n_pre_local() { return n_vars_local; } //xl_copy->get_local_size(); } + inline size_type n_post_local() {return n_vars_local;} + inline size_type n_pre_local() {return n_vars_local;} inline bool setup() { return true; } void relax(const double& fixed_var_tol, const double& fixed_var_perturb, hiopVector& xl, hiopVector& xu); private: - hiopVector*xl_copy, *xu_copy; - size_type n_vars; int n_vars_local; + size_type n_vars; + size_type n_vars_local; }; /** @@ -514,10 +514,10 @@ class hiopBoundsRelaxer : public hiopNlpTransformation const hiopVector& du); virtual ~hiopBoundsRelaxer(); - inline size_type n_post() { /*assert(xl_copy);*/ return n_vars; } - virtual size_type n_pre () { /*assert(xl_copy);*/ return n_vars; } - inline size_type n_post_local() { return n_vars_local; } - inline size_type n_pre_local() { return n_vars_local; } + inline size_type n_post() {return n_vars;} + virtual size_type n_pre() {return n_vars;} + inline size_type n_post_local() {return n_vars_local;} + inline size_type n_pre_local() {return n_vars_local;} inline bool setup() { return true; } inline hiopVector* apply_to_x(hiopVector& x) diff --git a/tests/LinAlg/vectorTests.hpp b/tests/LinAlg/vectorTests.hpp index 225be242a..78d2f820e 100644 --- a/tests/LinAlg/vectorTests.hpp +++ b/tests/LinAlg/vectorTests.hpp @@ -2050,6 +2050,135 @@ class VectorTests : public TestBase return reduceReturn(fail, &x); } + /** + * @brief Test: + * the number of identical elements between x and y, i.e., x[i] == y[i] + */ + bool vector_num_match(hiop::hiopVector& x, hiop::hiopVector& y, const int rank) + { + const local_ordinal_type Nx = x.get_size(); + int fail = 0; + x.setToConstant(one); + y.setToConstant(one); + + real_type actual = x.num_match(y); + real_type expected = Nx; + + fail += !isEqual(expected, actual); + + if(rank == 0) { + setLocalElement(&x, getLocalSize(&x) - 1, two); + } + actual = x.num_match(y); + expected = Nx - 1; + fail += !isEqual(expected, actual); + + printMessage(fail, __func__, rank); + return reduceReturn(fail, &x); + } + + /** + * @brief Test that hiop correctly processes variable bounds + * + * @note This is local method only + */ + bool vector_process_bounds(hiop::hiopVector& xl, + hiop::hiopVector& xu, + hiop::hiopVector& ixl, + hiop::hiopVector& ixu, + const int rank = 0) + { + const local_ordinal_type N = getLocalSize(&xl); + assert(N == getLocalSize(&xu)); + assert(N == getLocalSize(&ixl)); + assert(N == getLocalSize(&ixu)); + assert(N >= 3); // only test N>=3 + int fail = 0; + + int n_low = 0; + int n_upp = 0; + int n_lu = 0; + int n_fixed = 0; + double fixed_var_tol = 1e-8; + + // xl = [1, .., 1, -inf] + xl.setToConstant(one); + setLocalElement(&xl, N-1, -one/zero); + + // xl = [inf, 1, 2, .., 2] + xu.setToConstant(two); + setLocalElement(&xu, 0, one/zero); + setLocalElement(&xu, 1, one); + + xl.process_bounds_local(xu, ixl, ixu, n_low, n_upp, n_lu, n_fixed, fixed_var_tol); + + // Check that the last element of rank zero's vector is + // zero, and that x_val was added to all other elements + fail += verifyAnswer(&ixl, + [=] (local_ordinal_type i) -> real_type + { + return (i == N-1) ? 0. : 1.; + }); + + fail += verifyAnswer(&ixu, + [=] (local_ordinal_type i) -> real_type + { + return (i == 0) ? 0. : 1.; + }); + + fail += (n_low != N-1); + fail += (n_upp != N-1); + fail += (n_lu != N-2); + fail += (n_fixed != 1); + + printMessage(fail, __func__, rank); + return reduceReturn(fail, &xl); + } + + /** + * @brief Test that hiop correctly relaxes variable bounds + * + * @note This is local method only + */ + bool vector_relax_bounds(hiop::hiopVector& xl, + hiop::hiopVector& xu, + const int rank = 0) + { + const local_ordinal_type N = getLocalSize(&xl); + assert(N == getLocalSize(&xu)); + int fail = 0; + + double fixed_var_tol = 1e-8; + double fixed_var_perturb = 1e-1; + + // xl = [1, .., 1, 2] + xl.setToConstant(one); + setLocalElement(&xl, N-1, two); + + // xl = [2, .., 2] + xu.setToConstant(two); + + xl.relax_bounds_vec(xu, fixed_var_tol, fixed_var_perturb); + + // Check that the last element of rank zero's vector is + // zero, and that x_val was added to all other elements + fail += verifyAnswer(&xl, + [=] (local_ordinal_type i) -> real_type + { + return (i == N-1) ? 1.8 : one; + }); + + fail += verifyAnswer(&xu, + [=] (local_ordinal_type i) -> real_type + { + return (i == N-1) ? 2.2 : two; + }); + + printMessage(fail, __func__, rank); + return reduceReturn(fail, &xl); + } + + /// Returns element _i_ of vector _x_. real_type getLocalElement(hiop::hiopVector* x, local_ordinal_type i) { diff --git a/tests/testVector.cpp b/tests/testVector.cpp index 65834293a..c1863370e 100644 --- a/tests/testVector.cpp +++ b/tests/testVector.cpp @@ -326,6 +326,9 @@ int runTests(const char* mem_space, MPI_Comm comm) fail += test.vectorMatchesPattern(*x, *y, rank); fail += test.vectorAdjustDuals_plh(*x, *y, *z, *a, rank); + fail += test.vector_num_match(*x, *y, rank); + fail += test.vector_process_bounds(*x, *y, *z, *a, rank); + fail += test.vector_relax_bounds(*x, *y, rank); if (rank == 0) { @@ -334,7 +337,7 @@ int runTests(const char* mem_space, MPI_Comm comm) fail += test.vectorIsfinite(*v); } - // TODO: remove + // TODO: remove? //fail += test.vector_is_equal(*x, *y, rank); delete a;