diff --git a/src/LinAlg/VectorCudaKernels.cu b/src/LinAlg/VectorCudaKernels.cu
index b1c9a5911..405dfff78 100644
--- a/src/LinAlg/VectorCudaKernels.cu
+++ b/src/LinAlg/VectorCudaKernels.cu
@@ -640,6 +640,89 @@ __global__ void copyToStartingAt_w_pattern_cu(int n_src,
   }
 }
 
+/** @brief process variable bounds */
+__global__ void process_bounds_cu(int n,
+                                  const double* xl,
+                                  const double* xu,
+                                  double* ixl,
+                                  double* ixu,
+                                  int* bnds_low,
+                                  int* bnds_upp,
+                                  int* bnds_lu,
+                                  int* fixed_vars,
+                                  double fixed_var_tol)
+{
+  const int num_threads = blockDim.x * gridDim.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
+
+  for (int i = tid; i < n; i += num_threads) {
+    // preemptive loop to reduce number of iterations?
+    bnds_low[i] = 0;
+    bnds_upp[i] = 0;
+    bnds_lu[i] = 0;
+    fixed_vars[i] = 0;
+
+    if(xl[i] > -1e20) {
+      ixl[i] = 1.;
+      bnds_low[i] = 1;
+      if(xu[i] < 1e20) {
+        bnds_lu[i] = 1;
+      }
+    } else {
+      ixl[i] = 0.;
+    }
+
+    if(xu[i] < 1e20) {
+      ixu[i] = 1.;
+      bnds_upp[i] = 1;
+    } else {
+      ixu[i] = 0.;
+    }
+
+    if(xu[i] < 1e20 &&
+       fabs(xl[i]-xu[i]) <= fixed_var_tol*fmax(1.,std::fabs(xu[i]))) {
+      fixed_vars[i] = 1;
+    }
+  }
+}
+
+/** @brief relax variable bounds */
+__global__ void relax_bounds_cu(int n,
+                                double* xla,
+                                double* xua,
+                                double fixed_var_tol,
+                                double fixed_var_perturb)
+{
+  const int num_threads = blockDim.x * gridDim.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
+  
+  for (int i = tid; i < n; i += num_threads) {
+    double xuabs = fabs(xua[i]);
+    if(fabs(xua[i]-xla[i]) <= fixed_var_tol*fmax(1.,xuabs)) {
+      xua[i] += fixed_var_perturb * std::fmax(1.,xuabs);
+      xla[i] -= fixed_var_perturb * std::fmax(1.,xuabs);
+    }
+  }
+}
+
+/** @brief set d_ptr[i] = 1 if d1[i] == d2[i], otherwirse 0 */
+__global__ void set_if_match_cu(int n,
+                                int* d_ptr,
+                                const double* d1,
+                                const double* d2)
+{
+  const int num_threads = blockDim.x * gridDim.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
+  
+  for (int i = tid; i < n; i += num_threads) {
+    if(d1[i]==d2[i]) {
+      d_ptr[i] = 1;
+    } else {
+      d_ptr[i] = 0;
+    }
+  }
+}
+
 namespace hiop
 {
 namespace cuda
@@ -1112,7 +1195,6 @@ double min_w_pattern_kernel(int n, const double* d1, const double* id, double ma
 
   thrust::device_ptr<double> ret_dev_ptr = thrust::min_element(thrust::device, dv_ptr, dv_ptr+n);
 
-  // TODO: how to return double from device to host?
   double *ret_host = new double[1];
   double *ret_ptr = thrust::raw_pointer_cast(ret_dev_ptr);
   cudaError_t cuerr = cudaMemcpy(ret_host, ret_ptr, (1)*sizeof(double), cudaMemcpyDeviceToHost);
@@ -1276,7 +1358,88 @@ void copyToStartingAt_w_pattern_kernel(int n_src,
                                                            dd);
 }
 
+int num_match_local_kernel(int n, const double* d1, const double* d2)
+{
+  int num_blocks = (n+block_size-1)/block_size;
+  
+  // TODO: how to avoid this temp vec?
+  thrust::device_ptr<int> dv_ptr = thrust::device_malloc(n*sizeof(int));
+  int* d_ptr = thrust::raw_pointer_cast(dv_ptr);
+
+  // set d_ptr[i] = 1 if d1[i] == d2[i], otherwirse 0
+  set_if_match_cu<<<num_blocks,block_size>>>(n, d_ptr, d1, d2);
+  
+  int rval = thrust::reduce(thrust::device, d_ptr, d_ptr+n, 0, thrust::plus<int>());
+
+  thrust::device_free(dv_ptr);
+  
+  return rval;
+}
 
+/** @brief process variable bounds */
+void process_bounds_local_kernel(int n_local,
+                                 const double* xl,
+                                 const double* xu,
+                                 double* ixl,
+                                 double* ixu,
+                                 int& n_bnds_low,
+                                 int& n_bnds_upp,
+                                 int& n_bnds_lu,
+                                 int& n_fixed_vars,
+                                 double fixed_var_tol)
+{
+  int num_blocks = (n_local+block_size-1)/block_size;
+
+  thrust::device_ptr<int> bnds_low_d_ptr = thrust::device_malloc(n_local*sizeof(int));
+  int* bnds_low_r_ptr = thrust::raw_pointer_cast(bnds_low_d_ptr);
+
+  thrust::device_ptr<int> bnds_upp_d_ptr = thrust::device_malloc(n_local*sizeof(int));
+  int* bnds_upp_r_ptr = thrust::raw_pointer_cast(bnds_upp_d_ptr);
+
+  thrust::device_ptr<int> bnds_lu_d_ptr = thrust::device_malloc(n_local*sizeof(int));
+  int* bnds_lu_r_ptr = thrust::raw_pointer_cast(bnds_lu_d_ptr);
+
+  thrust::device_ptr<int> n_fixed_vars_d_ptr = thrust::device_malloc(n_local*sizeof(int));
+  int* n_fixed_vars_r_ptr = thrust::raw_pointer_cast(n_fixed_vars_d_ptr);
+
+  // set values
+  process_bounds_cu<<<num_blocks,block_size>>>(n_local,
+                                               xl,
+                                               xu,
+                                               ixl,
+                                               ixu,
+                                               bnds_low_r_ptr,
+                                               bnds_upp_r_ptr,
+                                               bnds_lu_r_ptr,
+                                               n_fixed_vars_r_ptr,
+                                               fixed_var_tol);
+
+  // compute sum
+  n_bnds_low = thrust::reduce(thrust::device, bnds_low_d_ptr, bnds_low_d_ptr+n_local, 0.0, thrust::plus<int>());
+  n_bnds_upp = thrust::reduce(thrust::device, bnds_upp_d_ptr, bnds_upp_d_ptr+n_local, 0.0, thrust::plus<int>());
+  n_bnds_lu = thrust::reduce(thrust::device, bnds_lu_d_ptr, bnds_lu_d_ptr+n_local, 0.0, thrust::plus<int>());
+  n_fixed_vars = thrust::reduce(thrust::device, n_fixed_vars_d_ptr, n_fixed_vars_d_ptr+n_local, 0.0, thrust::plus<int>());
+
+  thrust::device_free(bnds_low_d_ptr);
+  thrust::device_free(bnds_upp_d_ptr);
+  thrust::device_free(bnds_lu_d_ptr);
+  thrust::device_free(n_fixed_vars_d_ptr);
+}
+
+/** @brief relax variable bounds */
+void relax_bounds_kernel(int n_local,
+                         double* xl,
+                         double* xu,
+                         double fixed_var_tol,
+                         double fixed_var_perturb)
+{
+  int num_blocks = (n_local+block_size-1)/block_size;
+  relax_bounds_cu<<<num_blocks,block_size>>>(n_local,
+                                             xl,
+                                             xu,
+                                             fixed_var_tol,
+                                             fixed_var_perturb);
+}
 
 /// for hiopVectorIntCuda
 /**
diff --git a/src/LinAlg/VectorCudaKernels.hpp b/src/LinAlg/VectorCudaKernels.hpp
index 90fe1f355..3d8cf6cba 100644
--- a/src/LinAlg/VectorCudaKernels.hpp
+++ b/src/LinAlg/VectorCudaKernels.hpp
@@ -296,6 +296,28 @@ void copyToStartingAt_w_pattern_kernel(int n_src,
                                        double *vd,
                                        const double* dd);
 
+/// @brief return the numbers of identical elements between two vectors
+int num_match_local_kernel(int n, const double* d1, const double* d2);
+
+/** @brief process variable bounds */
+void process_bounds_local_kernel(int n_local,
+                                 const double* xl,
+                                 const double* xu,
+                                 double* ixl,
+                                 double* ixu,
+                                 int& n_bnds_low,
+                                 int& n_bnds_upp,
+                                 int& n_bnds_lu,
+                                 int& n_fixed_vars,
+                                 double fixed_var_tol);
+
+/** @brief relax variable bounds */
+void relax_bounds_kernel(int n_local,
+                         double* xl,
+                         double* xu,
+                         double fixed_var_tol,
+                         double fixed_var_perturb);
+
 /// for hiopVectorIntCuda
 /**
  * @brief Set the vector entries to be a linear space of starting at i0 containing evenly 
diff --git a/src/LinAlg/VectorHipKernels.cpp b/src/LinAlg/VectorHipKernels.cpp
index 413ee183f..bcea53a66 100644
--- a/src/LinAlg/VectorHipKernels.cpp
+++ b/src/LinAlg/VectorHipKernels.cpp
@@ -635,6 +635,89 @@ __global__ void copyToStartingAt_w_pattern_hip(int n_src,
   }
 }
 
+/** @brief process variable bounds */
+__global__ void process_bounds_hip(int n,
+                                   const double* xl,
+                                   const double* xu,
+                                   double* ixl,
+                                   double* ixu,
+                                   double* bnds_low,
+                                   double* bnds_upp,
+                                   double* bnds_lu,
+                                   double* fixed_vars,
+                                   double fixed_var_tol)
+{
+  const int num_threads = blockDim.x * gridDim.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
+
+  for (int i = tid; i < n; i += num_threads) {
+    // preemptive loop to reduce number of iterations?
+    bnds_low[i] = 0;
+    bnds_upp[i] = 0;
+    bnds_lu[i] = 0;
+    fixed_vars[i] = 0;
+
+    if(xl[i] > -1e20) {
+      ixl[i] = 1.;
+      bnds_low[i] = 1;
+      if(xu[i] < 1e20) {
+        bnds_lu[i] = 1;
+      }
+    } else {
+      ixl[i] = 0.;
+    }
+
+    if(xu[i] < 1e20) {
+      ixu[i] = 1.;
+      bnds_upp[i] = 1;
+    } else {
+      ixu[i] = 0.;
+    }
+
+    if(xu[i] < 1e20 &&
+       fabs(xl[i]-xu[i]) <= fixed_var_tol*fmax(1.,std::fabs(xu[i]))) {
+      fixed_vars[i] = 1;
+    }
+  }
+}
+
+/** @brief relax variable bounds */
+__global__ void relax_bounds_hip(int n,
+                                 double* xla,
+                                 double* xua,
+                                 double fixed_var_tol,
+                                 double fixed_var_perturb)
+{
+  const int num_threads = blockDim.x * gridDim.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
+  
+  for (int i = tid; i < n; i += num_threads) {
+    double xuabs = fabs(xua[i]);
+    if(fabs(xua[i]-xla[i]) <= fixed_var_tol*fmax(1.,xuabs)) {
+      xua[i] += fixed_var_perturb * std::fmax(1.,xuabs);
+      xla[i] -= fixed_var_perturb * std::fmax(1.,xuabs);
+    }
+  }
+}
+
+/** @brief set d_ptr[i] = 1 if d1[i] == d2[i], otherwirse 0 */
+__global__ void set_if_match_hip(int n,
+                                 int* d_ptr,
+                                 const double* d1,
+                                 const double* d2)
+{
+  const int num_threads = blockDim.x * gridDim.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
+  
+  for (int i = tid; i < n; i += num_threads) {
+    if(d1[i]==d2[i]) {
+      d_ptr[i] = 1;
+    } else {
+      d_ptr[i] = 0;
+    }
+  }
+}
+
 namespace hiop
 {
 namespace hip
@@ -1107,7 +1190,6 @@ double min_w_pattern_kernel(int n, const double* d1, const double* id, double ma
 
   thrust::device_ptr<double> ret_dev_ptr = thrust::min_element(thrust::device, dv_ptr, dv_ptr+n);
 
-  // TODO: how to return double from device to host?
   double *ret_host = new double[1];
   double *ret_ptr = thrust::raw_pointer_cast(ret_dev_ptr);
   hipError_t cuerr = hipMemcpy(ret_host, ret_ptr, (1)*sizeof(double), hipMemcpyDeviceToHost);
@@ -1271,7 +1353,88 @@ void copyToStartingAt_w_pattern_kernel(int n_src,
                                                            dd);
 }
 
+int num_match_local_kernel(int n, const double* d1, const double* d2)
+{
+  int num_blocks = (n+block_size-1)/block_size;
+  
+  // TODO: how to avoid this temp vec?
+  thrust::device_ptr<int> dv_ptr = thrust::device_malloc(n*sizeof(int));
+  int* d_ptr = thrust::raw_pointer_cast(dv_ptr);
+
+  // set d_ptr[i] = 1 if d1[i] == d2[i], otherwirse 0
+  set_if_match_hip<<<num_blocks,block_size>>>(n, d_ptr, d1, d2);
+  
+  int rval = thrust::reduce(thrust::device, d_ptr, d_ptr+n, 0, thrust::plus<int>());
+
+  thrust::device_free(dv_ptr);
+  
+  return rval;
+}
 
+/** @brief process variable bounds */
+void process_bounds_local_kernel(int n_local,
+                                 const double* xl,
+                                 const double* xu,
+                                 double* ixl,
+                                 double* ixu,
+                                 int& n_bnds_low,
+                                 int& n_bnds_upp,
+                                 int& n_bnds_lu,
+                                 int& n_fixed_vars,
+                                 double fixed_var_tol)
+{
+  int num_blocks = (n_local+block_size-1)/block_size;
+
+  thrust::device_ptr<int> bnds_low_d_ptr = thrust::device_malloc(n_local*sizeof(int));
+  int* bnds_low_r_ptr = thrust::raw_pointer_cast(bnds_low_d_ptr);
+
+  thrust::device_ptr<int> bnds_upp_d_ptr = thrust::device_malloc(n_local*sizeof(int));
+  int* bnds_upp_r_ptr = thrust::raw_pointer_cast(bnds_upp_d_ptr);
+
+  thrust::device_ptr<int> bnds_lu_d_ptr = thrust::device_malloc(n_local*sizeof(int));
+  int* bnds_lu_r_ptr = thrust::raw_pointer_cast(bnds_lu_d_ptr);
+
+  thrust::device_ptr<int> n_fixed_vars_d_ptr = thrust::device_malloc(n_local*sizeof(int));
+  int* n_fixed_vars_r_ptr = thrust::raw_pointer_cast(n_fixed_vars_d_ptr);
+
+  // set values
+  process_bounds_hip<<<num_blocks,block_size>>>(n,
+                                                xl,
+                                                xu,
+                                                ixl,
+                                                ixu,
+                                                bnds_low_r_ptr,
+                                                bnds_upp_r_ptr,
+                                                bnds_lu_r_ptr,
+                                                n_fixed_vars_r_ptr,
+                                                fixed_var_tol);
+
+  // compute sum
+  n_bnds_low = thrust::reduce(thrust::device, bnds_low_d_ptr, bnds_low_d_ptr+n_local, 0.0, thrust::plus<int>());
+  n_bnds_upp = thrust::reduce(thrust::device, bnds_upp_d_ptr, bnds_upp_d_ptr+n_local, 0.0, thrust::plus<int>());
+  n_bnds_lu = thrust::reduce(thrust::device, bnds_lu_d_ptr, bnds_lu_d_ptr+n_local, 0.0, thrust::plus<int>());
+  n_fixed_vars = thrust::reduce(thrust::device, n_fixed_vars_d_ptr, n_fixed_vars_d_ptr+n_local, 0.0, thrust::plus<int>());
+
+  thrust::device_free(bnds_low_d_ptr);
+  thrust::device_free(bnds_upp_d_ptr);
+  thrust::device_free(bnds_lu_d_ptr);
+  thrust::device_free(n_fixed_vars_d_ptr);
+}
+
+/** @brief relax variable bounds */
+void relax_bounds_kernel(int n_local,
+                         double* xl,
+                         double* xu,
+                         double fixed_var_tol,
+                         double fixed_var_perturb)
+{
+  int num_blocks = (n_local+block_size-1)/block_size;
+  relax_bounds_hip<<<num_blocks,block_size>>>(n_local,
+                                              xl,
+                                              xu,
+                                              fixed_var_tol,
+                                              fixed_var_perturb);
+}
 
 /// for hiopVectorIntHip
 /**
diff --git a/src/LinAlg/VectorHipKernels.hpp b/src/LinAlg/VectorHipKernels.hpp
index b1c0b8022..ece799f18 100644
--- a/src/LinAlg/VectorHipKernels.hpp
+++ b/src/LinAlg/VectorHipKernels.hpp
@@ -296,6 +296,28 @@ void copyToStartingAt_w_pattern_kernel(int n_src,
                                        double *vd,
                                        const double* dd);
 
+/// @brief return the numbers of identical elements between two vectors
+int num_match_local_kernel(int n, const double* d1, const double* d2);
+
+/** @brief process variable bounds */
+void process_bounds_local_kernel(int n_local,
+                                 const double* xl,
+                                 const double* xu,
+                                 double* ixl,
+                                 double* ixu,
+                                 int& n_bnds_low,
+                                 int& n_bnds_upp,
+                                 int& n_bnds_lu,
+                                 int& n_fixed_vars,
+                                 double fixed_var_tol);
+
+/** @brief relax variable bounds */
+void relax_bounds_kernel(int n_local,
+                         double* xl,
+                         double* xu,
+                         double fixed_var_tol,
+                         double fixed_var_perturb);
+
 /// for hiopVectorIntHip
 /**
  * @brief Set the vector entries to be a linear space of starting at i0 containing evenly 
diff --git a/src/LinAlg/hiopVector.hpp b/src/LinAlg/hiopVector.hpp
index 289583481..80c720004 100644
--- a/src/LinAlg/hiopVector.hpp
+++ b/src/LinAlg/hiopVector.hpp
@@ -998,10 +998,58 @@ class hiopVector
    * @brief check if `this` vector is identical to `vec`
    *
    * @param[in] vec - vector used to be compared with `this`
-   * @todo: add unit test, or should we remove this function?
+   * @todo: should we remove this function?
    */
   virtual bool is_equal(const hiopVector& vec) const = 0;
 
+  /**
+   * @brief return the numbers of identical elements between two vectors
+   *
+   * @param[in] vec - vector used to be compared with `this`
+   * @post `vec` is not modified
+   */
+  virtual size_type num_match(const hiopVector& vec) const = 0;
+  
+  /**
+   * @brief preprocess bounds in a form supported by the NLP formulation. Returns counts of
+   * the variables with lower, upper, and lower and upper bounds, as well of the fixed 
+   * variables.
+   *
+   * @param[in] this - lower bound of primal variable `x`
+   * @param[in] xu - lower bound of primal variable `x`
+   * @param[in] ixl - index of the variables with lower bounds
+   * @param[in] ixu - index of the variables with upper bounds
+   * @param[out] n_bnds_low - number of variables with lower bounds only
+   * @param[out] n_bnds_upp - number of variables with upper bounds only
+   * @param[out] n_bnds_lu - number of variables with both lower and upper bounds
+   * @param[out] n_fixed_vars - number of fixed variables
+   * @param[in] fixed_var_tol - tolerance used to define fixed variables
+   * 
+   * @pre this is a local method
+   */
+  virtual bool process_bounds_local(const hiopVector& xu,
+                                    hiopVector& ixl,
+                                    hiopVector& ixu,
+                                    size_type& n_bnds_low,
+                                    size_type& n_bnds_upp,
+                                    size_type& n_bnds_lu,
+                                    size_type& n_fixed_vars,
+                                    const double& fixed_var_tol) = 0;
+
+  /**
+   * @brief relax variable bounds
+   *
+   * @param[in] this - lower bound of primal variable `x`
+   * @param[in] xu - lower bound of primal variable `x`
+   * @param[in] fixed_var_tol - tolerance used to define fixed variables
+   * @param[in] fixed_var_perturb - perturbation added to bounds
+   * 
+   * @pre this is a local method
+   */
+  virtual void relax_bounds_vec(hiopVector& xu,
+                                const double& fixed_var_tol,
+                                const double& fixed_var_perturb) = 0;
+
 protected:
   size_type n_; //we assume sequential data
 protected:
diff --git a/src/LinAlg/hiopVectorCuda.cpp b/src/LinAlg/hiopVectorCuda.cpp
index 0be1e2eaf..1bf877992 100644
--- a/src/LinAlg/hiopVectorCuda.cpp
+++ b/src/LinAlg/hiopVectorCuda.cpp
@@ -1087,6 +1087,69 @@ bool hiopVectorCuda::is_equal(const hiopVector& vec) const
   assert(false&&"NOT needed. Remove this func. TODO");
 }
 
+size_type hiopVectorCuda::num_match(const hiopVector& vec) const
+{
+  const double* dd = data_;
+  const double* vd = vec.local_data_const();
+  int sum_match = sum_match = hiop::cuda::num_match_local_kernel(n_local_, dd, vd);
+
+#ifdef HIOP_USE_MPI
+  int sumG;
+  int ierr=MPI_Allreduce(&sum_match, &sumG, 1, MPI_INT, MPI_SUM, comm_); assert(MPI_SUCCESS==ierr);
+  return sumG;
+#endif
+  return sum_match;
+}
+
+bool hiopVectorCuda::process_bounds_local(const hiopVector& xu,
+                                          hiopVector& ixl,
+                                          hiopVector& ixu,
+                                          size_type& n_bnds_low,
+                                          size_type& n_bnds_upp,
+                                          size_type& n_bnds_lu,
+                                          size_type& n_fixed_vars,
+                                          const double& fixed_var_tol)
+{
+#ifdef HIOP_DEEPCHECKS
+  assert(xu.get_local_size()==n_local_);
+  assert(ixl.get_local_size()==n_local_);
+  assert(ixu.get_local_size()==n_local_);
+#endif
+  const double* xl_arr = data_;
+  const double* xu_arr = xu.local_data_const();
+  double* ixl_arr = ixl.local_data();
+  double* ixu_arr = ixu.local_data();
+  
+  hiop::cuda::process_bounds_local_kernel(n_local_,
+                                          xl_arr,
+                                          xu_arr,
+                                          ixl_arr,
+                                          ixu_arr,
+                                          n_bnds_low,
+                                          n_bnds_upp,
+                                          n_bnds_lu,
+                                          n_fixed_vars,
+                                          fixed_var_tol);
+
+  return true;
+}
+
+void hiopVectorCuda::relax_bounds_vec(hiopVector& xu,
+                                      const double& fixed_var_tol,
+                                      const double& fixed_var_perturb)
+{
+#ifdef HIOP_DEEPCHECKS
+  assert(xu.get_local_size()==n_local_);
+#endif
+  double* xl_arr = data_;
+  double* xu_arr = xu.local_data();
+  
+  hiop::cuda::relax_bounds_kernel(n_local_,
+                                  xl_arr,
+                                  xu_arr,
+                                  fixed_var_tol,
+                                  fixed_var_perturb);
+}
 
 } // namespace hiop
 
diff --git a/src/LinAlg/hiopVectorCuda.hpp b/src/LinAlg/hiopVectorCuda.hpp
index 5d504521d..7ec1b3faa 100644
--- a/src/LinAlg/hiopVectorCuda.hpp
+++ b/src/LinAlg/hiopVectorCuda.hpp
@@ -347,6 +347,21 @@ class hiopVectorCuda : public hiopVector
   /// @brief check if `this` vector is identical to `vec`
   virtual bool is_equal(const hiopVector& vec) const;
 
+  virtual size_type num_match(const hiopVector& vec) const;
+
+  virtual bool process_bounds_local(const hiopVector& xu,
+                                    hiopVector& ixl,
+                                    hiopVector& ixu,
+                                    size_type& n_bnds_low,
+                                    size_type& n_bnds_upp,
+                                    size_type& n_bnds_lu,
+                                    size_type& n_fixed_vars,
+                                    const double& fixed_var_tol);
+
+  virtual void relax_bounds_vec(hiopVector& xu,
+                                const double& fixed_var_tol,
+                                const double& fixed_var_perturb);
+
   /* functions for this class */
   inline MPI_Comm get_mpi_comm() const { return comm_; }
 
diff --git a/src/LinAlg/hiopVectorHip.cpp b/src/LinAlg/hiopVectorHip.cpp
index ab5b88f15..e2a173468 100644
--- a/src/LinAlg/hiopVectorHip.cpp
+++ b/src/LinAlg/hiopVectorHip.cpp
@@ -940,9 +940,9 @@ bool hiopVectorHip::matchesPattern(const hiopVector& pattern)
 
 /** @brief Adjusts duals. */
 void hiopVectorHip::adjustDuals_plh(const hiopVector& xvec, 
-                                     const hiopVector& ixvec,
-                                     const double& mu,
-                                     const double& kappa)
+                                    const hiopVector& ixvec,
+                                    const double& mu,
+                                    const double& kappa)
 {
 #ifdef HIOP_DEEPCHECKS
   assert(xvec.get_local_size()==n_local_);
@@ -1091,6 +1091,69 @@ bool hiopVectorHip::is_equal(const hiopVector& vec) const
   assert(false&&"NOT needed. Remove this func. TODO");
 }
 
+size_type hiopVectorHip::num_match(const hiopVector& vec) const
+{
+  const double* dd = data_;
+  const double* vd = vec.local_data_const();
+  int sum_match = sum_match = hiop::cuda::num_match_local_kernel(n_local_, dd, vd);
+
+#ifdef HIOP_USE_MPI
+  int sumG;
+  int ierr=MPI_Allreduce(&sum_match, &sumG, 1, MPI_INT, MPI_SUM, comm_); assert(MPI_SUCCESS==ierr);
+  return sumG;
+#endif
+  return sum_match;
+}
+
+bool hiopVectorHip::process_bounds_local(const hiopVector& xu,
+                                         hiopVector& ixl,
+                                         hiopVector& ixu,
+                                         size_type& n_bnds_low,
+                                         size_type& n_bnds_upp,
+                                         size_type& n_bnds_lu,
+                                         size_type& n_fixed_vars,
+                                         const double& fixed_var_tol)
+{
+#ifdef HIOP_DEEPCHECKS
+  assert(xu.get_local_size()==n_local_);
+  assert(ixl.get_local_size()==n_local_);
+  assert(ixu.get_local_size()==n_local_);
+#endif
+  const double* xl_arr = data_;
+  const double* xu_arr = xu.local_data_const();
+  double* ixl_arr = ixl.local_data();
+  double* ixu_arr = ixu.local_data();
+  
+  hiop::hip::process_bounds_local_kernel(n_local_,
+                                         xl_arr,
+                                         xu_arr,
+                                         ixl_arr,
+                                         ixu_arr,
+                                         n_bnds_low,
+                                         n_bnds_upp,
+                                         n_bnds_lu,
+                                         n_fixed_vars,
+                                         fixed_var_tol);
+
+  return true;
+}
+
+void hiopVectorHip::relax_bounds_vec(hiopVector& xu,
+                                     const double& fixed_var_tol,
+                                     const double& fixed_var_perturb)
+{
+#ifdef HIOP_DEEPCHECKS
+  assert(xu.get_local_size()==n_local_);
+#endif
+  const double* xl_arr = data_;
+  const double* xu_arr = xu.local_data_const();
+  
+  hiop::hip::process_bounds_local_kernel(n_local_,
+                                         xl_arr,
+                                         xu_arr,
+                                         fixed_var_tol,
+                                         fixed_var_perturb);
+}
 
 } // namespace hiop
 
diff --git a/src/LinAlg/hiopVectorHip.hpp b/src/LinAlg/hiopVectorHip.hpp
index addaf1d03..de594201c 100644
--- a/src/LinAlg/hiopVectorHip.hpp
+++ b/src/LinAlg/hiopVectorHip.hpp
@@ -349,6 +349,21 @@ class hiopVectorHip : public hiopVector
   /// @brief check if `this` vector is identical to `vec`
   virtual bool is_equal(const hiopVector& vec) const;
 
+  virtual size_type num_match(const hiopVector& vec) const;
+
+  virtual bool process_bounds_local(const hiopVector& xu,
+                                    hiopVector& ixl,
+                                    hiopVector& ixu,
+                                    size_type& n_bnds_low,
+                                    size_type& n_bnds_upp,
+                                    size_type& n_bnds_lu,
+                                    size_type& n_fixed_vars,
+                                    const double& fixed_var_tol);
+
+  virtual void relax_bounds_vec(hiopVector& xu,
+                                const double& fixed_var_tol,
+                                const double& fixed_var_perturb);
+
   /* functions for this class */
   inline MPI_Comm get_mpi_comm() const { return comm_; }
 
diff --git a/src/LinAlg/hiopVectorPar.cpp b/src/LinAlg/hiopVectorPar.cpp
index 0fb38bd89..b50927729 100644
--- a/src/LinAlg/hiopVectorPar.cpp
+++ b/src/LinAlg/hiopVectorPar.cpp
@@ -1283,5 +1283,127 @@ bool hiopVectorPar::is_equal(const hiopVector& vec) const
 }
 
 
+size_type hiopVectorPar::num_match(const hiopVector& vec) const
+{
+  if(n_local_ != vec.get_local_size()) {
+    return 0;
+  }
+  int sum_match = 0;
+  const double* data_v = vec.local_data_const();
+  for(auto i=0; i<n_local_; ++i) {
+    if(data_[i]==data_v[i]) {
+      sum_match++;
+    }
+  }
+
+#ifdef HIOP_USE_MPI
+  int sumG;
+  int ierr=MPI_Allreduce(&sum_match, &sumG, 1, MPI_INT, MPI_SUM, comm_); assert(MPI_SUCCESS==ierr);
+  return sumG;
+#endif
+  return sum_match;
+}
+
+bool hiopVectorPar::process_bounds_local(const hiopVector& xu,
+                                         hiopVector& ixl,
+                                         hiopVector& ixu,
+                                         size_type& n_bnds_low,
+                                         size_type& n_bnds_upp,
+                                         size_type& n_bnds_lu,
+                                         size_type& n_fixed_vars,
+                                         const double& fixed_var_tol)
+{
+  n_bnds_low = 0;
+  n_bnds_upp = 0;
+  n_bnds_lu = 0;
+  n_fixed_vars = 0;
+
+  const double* xl_vec = this->local_data_const();
+  const double* xu_vec = xu.local_data_const();
+  double* ixl_vec = ixl.local_data();
+  double* ixu_vec = ixu.local_data();
+
+  int nlocal = this->get_local_size();
+  for(int i=0;i <nlocal; i++) {
+    if(xl_vec[i] > -1e20) {
+      ixl_vec[i] = 1.;
+      n_bnds_low++;
+      if(xu_vec[i] < 1e20) {
+        n_bnds_lu++;
+      }
+    } else {
+      ixl_vec[i] = 0.;
+    }
+
+    if(xu_vec[i] < 1e20) {
+      ixu_vec[i] = 1.;
+      n_bnds_upp++;
+    } else {
+      ixu_vec[i] = 0.;
+    }
+
+#ifdef HIOP_DEEPCHECKS
+    assert(xl_vec[i] <= xu_vec[i] && "please fix the inconsistent bounds, otherwise the problem is infeasible");
+#endif
+
+    if( xu_vec[i] < 1e20 &&
+        fabs(xl_vec[i]-xu_vec[i]) <= fixed_var_tol*std::fmax(1.,std::fabs(xu_vec[i]))) {
+      n_fixed_vars++;
+    } else {
+#ifdef HIOP_DEEPCHECKS
+#define min_dist 1e-8
+      const int maxBndsCloseMsgs=3;
+      int nBndsClose=0;
+      int myrank_ = 0;
+      int numranks = 1;  
+#ifdef HIOP_USE_MPI
+      int err = MPI_Comm_rank(comm_, &myrank_); assert(err==MPI_SUCCESS);
+      err = MPI_Comm_size(comm_, &numranks); assert(err==MPI_SUCCESS);
+#endif
+
+      if(fixed_var_tol<min_dist) { 
+        if(nBndsClose<maxBndsCloseMsgs) {
+          if(std::fabs(xl_vec[i]-xu_vec[i]) / std::max(1.,std::fabs(xu_vec[i]))<min_dist) {
+            if(myrank_==0) {
+              fprintf(stdout,
+                      "Lower (%g) and upper bound (%g) for variable %d are very close. "
+                      "Consider fixing this variable or increase 'fixed_var_tolerance'.\n",
+                      i, xl_vec[i], xu_vec[i]);
+            }
+            nBndsClose++;
+          }
+        }
+        if(nBndsClose==maxBndsCloseMsgs) {
+          if(myrank_==0) {
+            fprintf(stdout,
+                    "[further messages were surpressed]\n");
+          }
+          nBndsClose++;
+        }
+      }
+#endif
+    }
+  } // end of for(int i=0;i <nlocal; i++) loop
+
+  return true;
+}
+
+void hiopVectorPar::relax_bounds_vec(hiopVector& xu,
+                                     const double& fixed_var_tol,
+                                     const double& fixed_var_perturb)
+{
+  double *xla = this->local_data();
+  double *xua = xu.local_data();
+  size_type n = this->get_local_size();
+
+  double xuabs;
+  for(index_type i=0; i<n; i++) {
+    xuabs = std::fabs(xua[i]);
+    if(std::fabs(xua[i]-xla[i]) <= fixed_var_tol*std::fmax(1.,xuabs)) {
+      xua[i] += fixed_var_perturb*std::fmax(1.,xuabs);
+      xla[i] -= fixed_var_perturb*std::fmax(1.,xuabs);
+    }
+  }
+}
 
 };
diff --git a/src/LinAlg/hiopVectorPar.hpp b/src/LinAlg/hiopVectorPar.hpp
index 3f8a464bd..ae10ad7db 100644
--- a/src/LinAlg/hiopVectorPar.hpp
+++ b/src/LinAlg/hiopVectorPar.hpp
@@ -309,6 +309,21 @@ class hiopVectorPar : public hiopVector
 
   virtual bool is_equal(const hiopVector& vec) const;
 
+  virtual size_type num_match(const hiopVector& vec) const;
+
+  virtual bool process_bounds_local(const hiopVector& xu,
+                                    hiopVector& ixl,
+                                    hiopVector& ixu,
+                                    size_type& n_bnds_low,
+                                    size_type& n_bnds_upp,
+                                    size_type& n_bnds_lu,
+                                    size_type& n_fixed_vars,
+                                    const double& fixed_var_tol);
+
+  virtual void relax_bounds_vec(hiopVector& xu,
+                                const double& fixed_var_tol,
+                                const double& fixed_var_perturb);
+
   /**
    * @brief accessor to the execution policy
    */
diff --git a/src/LinAlg/hiopVectorRaja.hpp b/src/LinAlg/hiopVectorRaja.hpp
index 31cd4a2e9..ae0b936cc 100644
--- a/src/LinAlg/hiopVectorRaja.hpp
+++ b/src/LinAlg/hiopVectorRaja.hpp
@@ -312,6 +312,21 @@ class hiopVectorRaja : public hiopVector
 
   virtual bool is_equal(const hiopVector& vec) const;
 
+  virtual size_type num_match(const hiopVector& vec) const;
+
+  virtual bool process_bounds_local(const hiopVector& xu,
+                                    hiopVector& ixl,
+                                    hiopVector& ixu,
+                                    size_type& n_bnds_low,
+                                    size_type& n_bnds_upp,
+                                    size_type& n_bnds_lu,
+                                    size_type& n_fixed_vars,
+                                    const double& fixed_var_tol);
+
+  virtual void relax_bounds_vec(hiopVector& xu,
+                                const double& fixed_var_tol,
+                                const double& fixed_var_perturb);
+
   const ExecSpace<MEMBACKEND, EXECPOLICYRAJA>& exec_space() const
   {
     return exec_space_;
diff --git a/src/LinAlg/hiopVectorRajaImpl.hpp b/src/LinAlg/hiopVectorRajaImpl.hpp
index 6dcb111a6..6e8b552cc 100644
--- a/src/LinAlg/hiopVectorRajaImpl.hpp
+++ b/src/LinAlg/hiopVectorRajaImpl.hpp
@@ -2232,4 +2232,123 @@ bool hiopVectorRaja<MEM, POL>::is_equal(const hiopVector& vec) const
   return all_equal;
 }
 
+template<class MEM, class POL>
+size_type hiopVectorRaja<MEM, POL>::num_match(const hiopVector& vec) const
+{
+#ifdef HIOP_DEEPCHECKS
+  const hiopVectorRaja& v = dynamic_cast<const hiopVectorRaja<MEM, POL>&>(vec);
+  assert(v.n_local_ == n_local_);
+#endif 
+
+  const double* data_v = vec.local_data_const();
+  const double* data = data_dev_;
+  RAJA::ReduceSum< hiop_raja_reduce, int > sum(0);
+  RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n_local_),
+    RAJA_LAMBDA(RAJA::Index_type i) 
+    {
+      if(data[i]==data_v[i]) {
+        sum += 1;        
+      }
+    });
+  int all_equal = sum.get();
+  
+#ifdef HIOP_USE_MPI
+  int all_equalG;
+  int ierr = MPI_Allreduce(&all_equal, &all_equalG, 1, MPI_INT, MPI_SUM, comm_);
+  assert(MPI_SUCCESS==ierr);
+  return all_equalG;
+#endif  
+  return all_equal;
+}
+
+template<class MEM, class POL>
+bool hiopVectorRaja<MEM, POL>::process_bounds_local(const hiopVector& xu,
+                                                    hiopVector& ixl,
+                                                    hiopVector& ixu,
+                                                    size_type& n_bnds_low,
+                                                    size_type& n_bnds_upp,
+                                                    size_type& n_bnds_lu,
+                                                    size_type& n_fixed_vars,
+                                                    const double& fixed_var_tol)
+{
+#ifdef HIOP_DEEPCHECKS
+  const hiopVectorRaja& vxu = dynamic_cast<const hiopVectorRaja<MEM, POL>&>(xu);
+  const hiopVectorRaja& vixl = dynamic_cast<const hiopVectorRaja<MEM, POL>&>(ixl);
+  const hiopVectorRaja& vixu = dynamic_cast<const hiopVectorRaja<MEM, POL>&>(ixu);
+  assert(vxu.n_local_ == this->n_local_);
+  assert(vixl.n_local_ == this->n_local_);
+  assert(vixu.n_local_ == this->n_local_);
+#endif
+
+  const double* xl_vec = this->local_data_const();
+  const double* xu_vec = xu.local_data_const();
+  double* ixl_vec = ixl.local_data();
+  double* ixu_vec = ixu.local_data();
+
+  size_type nlocal = this->get_local_size();
+
+  RAJA::ReduceSum< hiop_raja_reduce, int > sum_n_bnds_low(0);
+  RAJA::ReduceSum< hiop_raja_reduce, int > sum_n_bnds_upp(0);
+  RAJA::ReduceSum< hiop_raja_reduce, int > sum_n_bnds_lu(0);
+  RAJA::ReduceSum< hiop_raja_reduce, int > sum_n_fixed_vars(0);
+
+  RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, nlocal),
+    RAJA_LAMBDA(RAJA::Index_type i) 
+    {
+      if(xl_vec[i] > -1e20) {
+        ixl_vec[i] = 1.;
+        sum_n_bnds_low += 1;
+        if(xu_vec[i] < 1e20) {
+          sum_n_bnds_lu += 1;
+        }
+      } else {
+        ixl_vec[i] = 0.;
+      }
+
+      if(xu_vec[i] < 1e20) {
+        ixu_vec[i] = 1.;
+        sum_n_bnds_upp += 1;
+      } else {
+        ixu_vec[i] = 0.;
+      }
+
+      if(xu_vec[i] < 1e20 &&
+         fabs(xl_vec[i]-xu_vec[i]) <= fixed_var_tol*std::fmax(1.,std::fabs(xu_vec[i]))) {
+        sum_n_fixed_vars += 1;
+      }
+    });
+
+  n_bnds_low = sum_n_bnds_low.get();
+  n_bnds_upp = sum_n_bnds_upp.get();
+  n_bnds_lu = sum_n_bnds_lu.get();
+  n_fixed_vars = sum_n_fixed_vars.get();
+
+  return true;
+}
+
+template<class MEM, class POL>
+void hiopVectorRaja<MEM, POL>::relax_bounds_vec(hiopVector& xu,
+                                                const double& fixed_var_tol,
+                                                const double& fixed_var_perturb)
+{
+#ifdef HIOP_DEEPCHECKS
+  const hiopVectorRaja& vxu = dynamic_cast<const hiopVectorRaja<MEM, POL>&>(xu);
+  assert(vxu.n_local_ == this->n_local_);
+#endif
+
+  double *xla = this->local_data();
+  double *xua = xu.local_data();
+  size_type n = this->get_local_size();
+
+  RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n),
+    RAJA_LAMBDA(RAJA::Index_type i) 
+    {
+      double xuabs = std::fabs(xua[i]);
+      if(std::fabs(xua[i]-xla[i]) <= fixed_var_tol*std::fmax(1.,xuabs)) {
+        xua[i] += fixed_var_perturb * std::fmax(1.,xuabs);
+        xla[i] -= fixed_var_perturb * std::fmax(1.,xuabs);
+      }
+    });
+}
+
 } // namespace hiop
diff --git a/src/Optimization/hiopNlpFormulation.cpp b/src/Optimization/hiopNlpFormulation.cpp
index b8fb9d6d3..ac50c04fa 100644
--- a/src/Optimization/hiopNlpFormulation.cpp
+++ b/src/Optimization/hiopNlpFormulation.cpp
@@ -193,8 +193,8 @@ bool hiopNlpFormulation::finalizeInitialization()
   if(strFixedVars_ != options->GetString("fixed_var")) {
     doinit=true;
   }
-  const double fixedVarTol = options->GetNumeric("fixed_var_tolerance");
-  if(dFixedVarsTol_ != fixedVarTol) {
+  const double fixed_var_tol = options->GetNumeric("fixed_var_tolerance");
+  if(dFixedVarsTol_ != fixed_var_tol) {
     doinit=true;
   }
 
@@ -257,12 +257,12 @@ bool hiopNlpFormulation::finalizeInitialization()
   //preprocess variables bounds - this is curently done on the CPU
   //
   size_type nfixed_vars_local;
-  process_bounds(n_bnds_low_local_,n_bnds_upp_local_, n_bnds_lu_, nfixed_vars_local);
+  process_bounds(n_bnds_low_local_, n_bnds_upp_local_, n_bnds_lu_, nfixed_vars_local);
 
   ///////////////////////////////////////////////////////////////////////////
   //  Handling of fixed variables
   //////////////////////////////////////////////////////////////////////////
-  dFixedVarsTol_ = fixedVarTol;  
+  dFixedVarsTol_ = fixed_var_tol;
   size_type nfixed_vars=nfixed_vars_local;
 #ifdef HIOP_USE_MPI
   int ierr = MPI_Allreduce(&nfixed_vars_local, &nfixed_vars, 1, MPI_HIOP_SIZE_TYPE, MPI_SUM, comm_); 
@@ -277,11 +277,15 @@ bool hiopNlpFormulation::finalizeInitialization()
       // remove free variables
       //
       log->printf(hovWarning, "Fixed variables will be removed internally.\n");
+      if(options->GetString("compute_mode")=="gpu") {
+        assert(false && "HiOp hasn't support removing fixed variables under GPU mode yet.");
+        return false;
+      }
 
       fixedVarsRemover = new hiopFixedVarsRemover(this,
                                                   *xl_,
                                                   *xu_,
-                                                  fixedVarTol,
+                                                  fixed_var_tol,
                                                   nfixed_vars,
                                                   nfixed_vars_local);
       
@@ -417,7 +421,7 @@ bool hiopNlpFormulation::finalizeInitialization()
   cons_body_ = nullptr;
   
   delete cons_Jac_;
-  cons_Jac_ = NULL;
+  cons_Jac_ = nullptr;
 
   delete cons_lambdas_;
   cons_lambdas_ = nullptr;
@@ -437,91 +441,16 @@ bool hiopNlpFormulation::finalizeInitialization()
 bool hiopNlpFormulation::process_bounds(size_type& n_bnds_low,
                                         size_type& n_bnds_upp,
                                         size_type& n_bnds_lu,
-                                        size_type& nfixed_vars)
+                                        size_type& n_fixed_vars)
 {
-
-  n_bnds_low = 0;
-  n_bnds_upp = 0;
-  n_bnds_lu = 0;
-  nfixed_vars = 0;
-
-#if !defined(HIOP_USE_MPI)
-  int* vec_distrib_ = nullptr;
-  MPI_Comm comm_ = MPI_COMM_SELF;
-#endif  
-  hiopVectorPar xl_tmp(n_vars_, vec_distrib_, comm_);
-  hiopVectorPar xu_tmp(n_vars_, vec_distrib_, comm_);
-  hiopVectorPar ixl_tmp(n_vars_, vec_distrib_, comm_);
-  hiopVectorPar ixu_tmp(n_vars_, vec_distrib_, comm_);
-  
-  this->xl_->copy_to_vectorpar(xl_tmp);
-  this->xu_->copy_to_vectorpar(xu_tmp);
-  this->ixl_->copy_to_vectorpar(ixl_tmp);
-  this->ixu_->copy_to_vectorpar(ixu_tmp);
-  
-  double *ixl_vec = ixl_tmp.local_data_host();
-  double *ixu_vec = ixu_tmp.local_data_host();
-
-  double* xl_vec = xl_tmp.local_data_host();
-  double* xu_vec = xu_tmp.local_data_host();
-#ifdef HIOP_DEEPCHECKS
-  const int maxBndsCloseMsgs=3; int nBndsClose=0;
-#endif
-  const double fixedVarTol = options->GetNumeric("fixed_var_tolerance");
-  int nlocal=xl_->get_local_size();
-  for(int i=0;i<nlocal; i++) {
-    if(xl_vec[i] > -1e20) {
-      ixl_vec[i] = 1.;
-      n_bnds_low++;
-      if(xu_vec[i] < 1e20) {
-        n_bnds_lu++;
-      }
-    } else {
-      ixl_vec[i] = 0.;
-    }
-
-    if(xu_vec[i] < 1e20) {
-      ixu_vec[i] = 1.;
-      n_bnds_upp++;
-    } else {
-      ixu_vec[i] = 0.;
-    }
-
-#ifdef HIOP_DEEPCHECKS
-    assert(xl_vec[i] <= xu_vec[i] && "please fix the inconsistent bounds, otherwise the problem is infeasible");
-#endif
-
-    //if(xl_vec[i]==xu_vec[i]) {
-    if( xu_vec[i]<1e20 &&
-        fabs(xl_vec[i]-xu_vec[i]) <= fixedVarTol*fmax(1.,fabs(xu_vec[i]))) {
-      nfixed_vars++;
-    } else {
-#ifdef HIOP_DEEPCHECKS
-#define min_dist 1e-8
-      if(fixedVarTol<min_dist) { 
-        if(nBndsClose<maxBndsCloseMsgs) {
-          if(fabs(xl_vec[i]-xu_vec[i]) / std::max(1.,fabs(xu_vec[i]))<min_dist) {
-            log->printf(hovWarning, 
-                        "Lower (%g) and upper bound (%g) for variable %d are very close. "
-                        "Consider fixing this variable or increase 'fixed_var_tolerance'.\n",
-                        i, xl_vec[i], xu_vec[i]);
-            nBndsClose++;
-          }
-        } 
-        if(nBndsClose==maxBndsCloseMsgs) {
-          log->printf(hovWarning, "[further messages were surpressed]\n");
-          nBndsClose++;
-        }
-      }
-#endif
-    }
-  }
-  
-  this->xl_->copy_from_vectorpar(xl_tmp);
-  this->xu_->copy_from_vectorpar(xu_tmp);
-  this->ixl_->copy_from_vectorpar(ixl_tmp);
-  this->ixu_->copy_from_vectorpar(ixu_tmp);
-
+  this->xl_->process_bounds_local(*this->xu_,
+                                  *this->ixl_,
+                                  *this->ixu_,
+                                  n_bnds_low,
+                                  n_bnds_upp,
+                                  n_bnds_lu,
+                                  n_fixed_vars,
+                                  options->GetNumeric("fixed_var_tolerance"));
   return true;
 } 
 
@@ -556,24 +485,18 @@ bool hiopNlpFormulation::process_constraints()
   assert(gl->get_local_size()==n_cons_);
   assert(gu->get_local_size()==n_cons_);
 
+  n_cons_eq_ = gl->num_match(*gu);
+  n_cons_ineq_ = n_cons_ - n_cons_eq_;
+
   // transfer to host 
   hiopVectorPar gl_host(n_cons_);
   hiopVectorPar gu_host(n_cons_);
   gl->copy_to_vectorpar(gl_host);
   gu->copy_to_vectorpar(gu_host);
-
+  
   double* gl_vec = gl_host.local_data();
   double* gu_vec = gu_host.local_data();
-  n_cons_eq_ = 0;
-  n_cons_ineq_ = 0; 
-  for(int i=0;i<n_cons_; i++) {
-    if(gl_vec[i]==gu_vec[i]) {
-      n_cons_eq_++;
-    } else {
-      n_cons_ineq_++;
-    }
-  }
-  
+
   /* Allocate host  c_rhs, dl, and du (all serial in this formulation) for on host processing. */
   hiopVectorPar c_rhs_host(n_cons_eq_);
   cons_eq_type_ = new hiopInterfaceBase::NonlinearityType[n_cons_eq_];
diff --git a/src/Optimization/hiopNlpFormulation.hpp b/src/Optimization/hiopNlpFormulation.hpp
index b8b107c0b..771dffa88 100644
--- a/src/Optimization/hiopNlpFormulation.hpp
+++ b/src/Optimization/hiopNlpFormulation.hpp
@@ -290,13 +290,13 @@ class hiopNlpFormulation
 #endif
 protected:
   /* Preprocess bounds in a form supported by the NLP formulation. Returns counts of
-   * the variables with lower, upper, and lower and lower bounds, as well of the fixed 
+   * the variables with lower, upper, and lower and upper bounds, as well of the fixed
    * variables. 
    */
   virtual bool process_bounds(size_type& n_bnds_low,
                               size_type& n_bnds_upp,
                               size_type& n_bnds_lu,
-                              size_type& nfixed_vars);
+                              size_type& n_fixed_vars);
   /* Preprocess constraints in a form supported the NLP formulation. */
   virtual bool process_constraints();
 protected:
diff --git a/src/Optimization/hiopNlpTransforms.cpp b/src/Optimization/hiopNlpTransforms.cpp
index 456cf89b4..4696a9cb0 100644
--- a/src/Optimization/hiopNlpTransforms.cpp
+++ b/src/Optimization/hiopNlpTransforms.cpp
@@ -65,11 +65,11 @@ hiopFixedVarsRemover::
 hiopFixedVarsRemover(hiopNlpFormulation* nlp,
                      const hiopVector& xl,
                      const hiopVector& xu,
-                     const double& fixedVarTol_,
+                     const double& fixed_var_tol,
                      const size_type& numFixedVars,
                      const size_type& numFixedVars_local)
   : hiopNlpTransformation(nlp),
-    n_fixed_vars_local(numFixedVars_local), fixedVarTol(fixedVarTol_),
+    n_fixed_vars_local(numFixedVars_local), fixed_var_tol_(fixed_var_tol),
     Jacc_fs(NULL), Jacd_fs(NULL),
     fs2rs_idx_map(xl.get_local_size()),
     x_rs_ref_(nullptr), Jacc_rs_ref(NULL), Jacd_rs_ref(NULL)
@@ -158,7 +158,7 @@ bool hiopFixedVarsRemover::setupDecisionVectorPart()
   int it_rs=0; 
   for(int i=0;i<n_fs_local; i++) {
     //if(xl_vec[i]==xu_vec[i]) {
-    if(fabs(xl_vec[i]-xu_vec[i])<= fixedVarTol*fmax(1.,fabs(xu_vec[i]))) {
+    if(fabs(xl_vec[i]-xu_vec[i])<= fixed_var_tol_*fmax(1.,fabs(xu_vec[i]))) {
       fs2rs_idx_map[i]=-1;
     } else {
       fs2rs_idx_map[i]=it_rs;
@@ -278,7 +278,7 @@ void hiopFixedVarsRemover::applyInvToMatrix(const double* M_fs, const int& m_in,
     for(int j=0; j<fs2rs_idx_map.size(); j++) {  
       rs_idx = fs2rs_idx_map[j];
       if(rs_idx>=0) {
-  	M_rs[i*nrs+rs_idx] = M_fs[i*nfs+j];
+        M_rs[i*nrs+rs_idx] = M_fs[i*nfs+j];
       }
     }
   }
@@ -291,38 +291,19 @@ hiopFixedVarsRelaxer(hiopNlpFormulation* nlp,
                      const size_type& numFixedVars,
                      const size_type& numFixedVars_local)
   : hiopNlpTransformation(nlp),
-    xl_copy(NULL), xu_copy(NULL), n_vars(xl.get_size()), n_vars_local(xl.get_local_size())
+    n_vars(xl.get_size()),
+    n_vars_local(xl.get_local_size())
 {
-  //xl_copy = xl.new_copy(); // no need to copy at this point
-  //xu_copy = xu.new_copy(); // no need to copy at this point
 }
 
 hiopFixedVarsRelaxer::~hiopFixedVarsRelaxer()
 {
-  if(xl_copy) delete xl_copy;
-  if(xu_copy) delete xu_copy;
 }
 
 void hiopFixedVarsRelaxer::
 relax(const double& fixed_var_tol, const double& fixed_var_perturb, hiopVector& xl, hiopVector& xu)
 {
-  double *xla=xl.local_data(), *xua=xu.local_data(), *v;
-  size_type n=xl.get_local_size();
-  double xuabs;
-  for(index_type i=0; i<n; i++) {
-    xuabs = fabs(xua[i]);
-    if(fabs(xua[i]-xla[i])<= fixed_var_tol*fmax(1.,xuabs)) {
-
-      xua[i] += fixed_var_perturb*fmax(1.,xuabs);
-      xla[i] -= fixed_var_perturb*fmax(1.,xuabs);
-      //if(xla[i]==xua[i]) {
-      // this does not apply anymore
-      //if fixed a zero or less,  increase upper bound
-      //if fixed at positive val, decrease lower bound
-      //if(xua[i]<=0.)      xua[i] += fixed_var_perturb*fmax(1.,fabs(xua[i]));
-      //else                xla[i] -= fixed_var_perturb*fmax(1.,fabs(xla[i]));
-    }
-  }
+  xl.relax_bounds_vec(xu, fixed_var_tol, fixed_var_perturb);
 }
 
 hiopBoundsRelaxer::
@@ -332,8 +313,12 @@ hiopBoundsRelaxer(hiopNlpFormulation* nlp,
                   const hiopVector& dl,
                   const hiopVector& du)
   : hiopNlpTransformation(nlp),
-    xl_ori(NULL), xu_ori(NULL), dl_ori(NULL), du_ori(NULL),
-    n_vars(xl.get_size()), n_vars_local(xl.get_local_size()),
+    xl_ori(nullptr),
+    xu_ori(nullptr),
+    dl_ori(nullptr),
+    du_ori(nullptr),
+    n_vars(xl.get_size()),
+    n_vars_local(xl.get_local_size()),
     n_ineq(dl.get_size())
 {
   xl_ori = xl.new_copy();
@@ -344,18 +329,10 @@ hiopBoundsRelaxer(hiopNlpFormulation* nlp,
 
 hiopBoundsRelaxer::~hiopBoundsRelaxer()
 {
-  if(xl_ori) {
-    delete xl_ori;
-  }
-  if(xu_ori) {
-    delete xu_ori;
-  }
-  if(dl_ori) {
-    delete dl_ori;
-  }
-  if(du_ori) {
-    delete du_ori;
-  }
+  delete xl_ori;
+  delete xu_ori;
+  delete dl_ori;
+  delete du_ori;
 }
 
 void hiopBoundsRelaxer::
@@ -425,9 +402,11 @@ hiopNLPObjGradScaling::hiopNLPObjGradScaling(hiopNlpFormulation* nlp,
                                              hiopVectorInt& cons_eq_mapping, 
                                              hiopVectorInt& cons_ineq_mapping)
   : hiopNlpTransformation(nlp),
-    n_vars(gradf.get_size()), n_vars_local(gradf.get_local_size()),
+    n_vars(gradf.get_size()),
+    n_vars_local(gradf.get_local_size()),
     scale_factor_obj(1.),
-    n_eq(c.get_size()), n_ineq(d.get_size())
+    n_eq(c.get_size()),
+    n_ineq(d.get_size())
 {
   scale_factor_obj = max_grad/gradf.infnorm();
   if(scale_factor_obj>1.) {
@@ -464,9 +443,4 @@ hiopNLPObjGradScaling::~hiopNLPObjGradScaling()
   if(scale_factor_cd) delete scale_factor_cd;
 }
 
-
-
-
-
-
 } //end of namespace
diff --git a/src/Optimization/hiopNlpTransforms.hpp b/src/Optimization/hiopNlpTransforms.hpp
index 9c7f37d61..d0634bf7f 100644
--- a/src/Optimization/hiopNlpTransforms.hpp
+++ b/src/Optimization/hiopNlpTransforms.hpp
@@ -153,7 +153,7 @@ class hiopFixedVarsRemover : public hiopNlpTransformation
   hiopFixedVarsRemover(hiopNlpFormulation* nlp,
                        const hiopVector& xl,
                        const hiopVector& xu,
-                       const double& fixedVarTol,
+                       const double& fixed_var_tol,
                        const size_type& numFixedVars,
                        const size_type& numFixedVars_local);
   ~hiopFixedVarsRemover();
@@ -287,7 +287,7 @@ class hiopFixedVarsRemover : public hiopNlpTransformation
   size_type n_fixed_vars_local;
   size_type n_fixed_vars;
 
-  double fixedVarTol;
+  double fixed_var_tol_;
 
   size_type n_fs; //full-space n
   size_type n_rs; //reduced-space n
@@ -326,20 +326,20 @@ class hiopFixedVarsRelaxer : public hiopNlpTransformation
   virtual ~hiopFixedVarsRelaxer();
 
   /* number of vars in the NLP after the tranformation */
-  inline size_type n_post()  { /*assert(xl_copy);*/ return n_vars; } //xl_copy->get_size(); }
+  inline size_type n_post() {return n_vars;}
   /* number of vars in the NLP to which the tranformation is to be applied */
-  virtual size_type n_pre () { /*assert(xl_copy);*/ return n_vars; } //xl_copy->get_size(); }
+  virtual size_type n_pre() {return n_vars;}
 
-  inline size_type n_post_local()  { return n_vars_local; } //xl_copy->get_local_size(); }
-  inline size_type n_pre_local()  { return n_vars_local; } //xl_copy->get_local_size(); }
+  inline size_type n_post_local() {return n_vars_local;}
+  inline size_type n_pre_local() {return n_vars_local;}
 
   inline bool setup() { return true; }
 
   void relax(const double& fixed_var_tol, const double& fixed_var_perturb, 
 	     hiopVector& xl, hiopVector& xu);
 private:
-  hiopVector*xl_copy, *xu_copy;
-  size_type  n_vars; int n_vars_local;
+  size_type n_vars;
+  size_type n_vars_local;
 };
 
 /** 
@@ -514,10 +514,10 @@ class hiopBoundsRelaxer : public hiopNlpTransformation
                     const hiopVector& du);
   virtual ~hiopBoundsRelaxer();
 
-  inline size_type n_post()  { /*assert(xl_copy);*/ return n_vars; }
-  virtual size_type n_pre () { /*assert(xl_copy);*/ return n_vars; }
-  inline size_type n_post_local()  { return n_vars_local; }
-  inline size_type n_pre_local()  { return n_vars_local; }
+  inline size_type n_post() {return n_vars;}
+  virtual size_type n_pre() {return n_vars;}
+  inline size_type n_post_local() {return n_vars_local;}
+  inline size_type n_pre_local() {return n_vars_local;}
   inline bool setup() { return true; }
 
   inline hiopVector* apply_to_x(hiopVector& x)
diff --git a/tests/LinAlg/vectorTests.hpp b/tests/LinAlg/vectorTests.hpp
index 225be242a..78d2f820e 100644
--- a/tests/LinAlg/vectorTests.hpp
+++ b/tests/LinAlg/vectorTests.hpp
@@ -2050,6 +2050,135 @@ class VectorTests : public TestBase
     return reduceReturn(fail, &x);
   }
 
+  /**
+   * @brief Test:
+   * the number of identical elements between x and y, i.e., x[i] == y[i] 
+   */
+  bool vector_num_match(hiop::hiopVector& x, hiop::hiopVector& y, const int rank)
+  {
+    const local_ordinal_type Nx = x.get_size();
+    int fail = 0;
+    x.setToConstant(one);
+    y.setToConstant(one);
+    
+    real_type actual = x.num_match(y);
+    real_type expected = Nx;
+    
+    fail += !isEqual(expected, actual);
+
+    if(rank == 0) {
+      setLocalElement(&x, getLocalSize(&x) - 1, two);
+    }
+    actual = x.num_match(y);
+    expected = Nx - 1;
+    fail += !isEqual(expected, actual);
+
+    printMessage(fail, __func__, rank);
+    return reduceReturn(fail, &x);
+  }
+
+  /**
+   * @brief Test that hiop correctly processes variable bounds
+   * 
+   * @note This is local method only
+   */
+  bool vector_process_bounds(hiop::hiopVector& xl,
+                             hiop::hiopVector& xu,
+                             hiop::hiopVector& ixl,
+                             hiop::hiopVector& ixu,
+                             const int rank = 0)
+  {
+    const local_ordinal_type N = getLocalSize(&xl);
+    assert(N == getLocalSize(&xu));
+    assert(N == getLocalSize(&ixl));
+    assert(N == getLocalSize(&ixu));
+    assert(N >= 3); // only test N>=3
+    int fail = 0;
+ 
+    int n_low = 0;
+    int n_upp = 0;
+    int n_lu = 0;
+    int n_fixed = 0;
+    double fixed_var_tol = 1e-8;
+
+    // xl = [1, .., 1, -inf]
+    xl.setToConstant(one);
+    setLocalElement(&xl, N-1, -one/zero);
+
+    // xl = [inf, 1, 2, .., 2]
+    xu.setToConstant(two);
+    setLocalElement(&xu, 0, one/zero);
+    setLocalElement(&xu, 1, one);
+
+    xl.process_bounds_local(xu, ixl, ixu, n_low, n_upp, n_lu, n_fixed, fixed_var_tol);
+
+    // Check that the last element of rank zero's vector is
+    // zero, and that x_val was added to all other elements
+    fail += verifyAnswer(&ixl,
+      [=] (local_ordinal_type i) -> real_type
+      {
+        return (i == N-1) ? 0. : 1.;
+      });
+
+    fail += verifyAnswer(&ixu,
+      [=] (local_ordinal_type i) -> real_type
+      {
+        return (i == 0) ? 0. : 1.;
+      });
+    
+    fail += (n_low != N-1);
+    fail += (n_upp != N-1);
+    fail += (n_lu != N-2);
+    fail += (n_fixed != 1);
+
+    printMessage(fail, __func__, rank);
+    return reduceReturn(fail, &xl);
+  }
+
+  /**
+   * @brief Test that hiop correctly relaxes variable bounds
+   * 
+   * @note This is local method only
+   */
+  bool vector_relax_bounds(hiop::hiopVector& xl,
+                           hiop::hiopVector& xu,
+                           const int rank = 0)
+  {
+    const local_ordinal_type N = getLocalSize(&xl);
+    assert(N == getLocalSize(&xu));
+    int fail = 0;
+
+    double fixed_var_tol = 1e-8;
+    double fixed_var_perturb = 1e-1;
+
+    // xl = [1, .., 1, 2]
+    xl.setToConstant(one);
+    setLocalElement(&xl, N-1, two);
+
+    // xl = [2, .., 2]
+    xu.setToConstant(two);
+
+    xl.relax_bounds_vec(xu, fixed_var_tol, fixed_var_perturb);
+
+    // Check that the last element of rank zero's vector is
+    // zero, and that x_val was added to all other elements
+    fail += verifyAnswer(&xl,
+      [=] (local_ordinal_type i) -> real_type
+      {
+        return (i == N-1) ? 1.8 : one;
+      });
+
+    fail += verifyAnswer(&xu,
+      [=] (local_ordinal_type i) -> real_type
+      {
+        return (i == N-1) ? 2.2 : two;
+      });
+
+    printMessage(fail, __func__, rank);
+    return reduceReturn(fail, &xl);
+  }
+
+
   /// Returns element _i_ of vector _x_.
   real_type getLocalElement(hiop::hiopVector* x, local_ordinal_type i)
   {
diff --git a/tests/testVector.cpp b/tests/testVector.cpp
index 65834293a..c1863370e 100644
--- a/tests/testVector.cpp
+++ b/tests/testVector.cpp
@@ -326,6 +326,9 @@ int runTests(const char* mem_space, MPI_Comm comm)
 
   fail += test.vectorMatchesPattern(*x, *y, rank);
   fail += test.vectorAdjustDuals_plh(*x, *y, *z, *a, rank);
+  fail += test.vector_num_match(*x, *y, rank);
+  fail += test.vector_process_bounds(*x, *y, *z, *a, rank);
+  fail += test.vector_relax_bounds(*x, *y, rank);
 
   if (rank == 0)
   {
@@ -334,7 +337,7 @@ int runTests(const char* mem_space, MPI_Comm comm)
     fail += test.vectorIsfinite(*v);
   }
 
-  // TODO: remove
+  // TODO: remove?
   //fail += test.vector_is_equal(*x, *y, rank);
 
   delete a;