Skip to content

Commit

Permalink
Merge pull request #516 from oooo26/master
Browse files Browse the repository at this point in the history
Implement clipping on beta
  • Loading branch information
Mamba413 committed Aug 5, 2023
2 parents d7e537c + 9b40936 commit d5e90db
Show file tree
Hide file tree
Showing 10 changed files with 126 additions and 20 deletions.
16 changes: 14 additions & 2 deletions python/abess/bess_base.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import sys
import numbers
import warnings
import numpy as np
Expand Down Expand Up @@ -207,7 +208,9 @@ def fit(self,
is_normal=True,
sample_weight=None,
cv_fold_id=None,
sparse_matrix=False):
sparse_matrix=False,
beta_low=None,
beta_high=None):
r"""
The fit function is used to transfer
the information of data and return the fit result.
Expand Down Expand Up @@ -584,6 +587,15 @@ def fit(self,
else:
always_select_list = np.array(self.always_select, dtype="int32")

# beta range
if beta_low is None:
beta_low = -sys.float_info.max
if beta_high is None:
beta_high = sys.float_info.max
if beta_low > beta_high:
raise ValueError(
"Please make sure beta_low <= beta_high.")

# unused
n_lambda = 100
early_stop = False
Expand All @@ -607,7 +619,7 @@ def fit(self,
self.primary_model_fit_epsilon, early_stop,
self.approximate_Newton, self.thread, self.covariance_update,
sparse_matrix, self.splicing_type, self.important_search,
A_init_list, self.fit_intercept)
A_init_list, self.fit_intercept, beta_low, beta_high)

self.coef_ = result[0].squeeze()
self.intercept_ = result[1].squeeze()
Expand Down
9 changes: 9 additions & 0 deletions python/pytest/test_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,15 @@ def test_base():
else:
assert False

# clipping
try:
model = abess.LinearRegression()
model.fit([[1]], [1], beta_low=1, beta_high=0)
except ValueError as e:
print(e)
else:
assert False

# incompatible shape
try:
model.fit([1, 1, 1], [1])
Expand Down
50 changes: 50 additions & 0 deletions python/pytest/test_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,56 @@ def test_normalize():
assert_value(model1.coef_, model2.coef_)
assert_value(model1.intercept_, model2.intercept_)

@staticmethod
def test_clipping():
np.random.seed(0)
n = 200
p = 20
k = 5

# range: (0, 10)
coef1 = np.zeros(p)
coef1[np.random.choice(p, k, replace=False)
] = np.random.uniform(0, 10, size=k)
data1 = abess.make_glm_data(
n=n, p=p, k=k, family='gaussian', coef_=coef1)

model1 = abess.LinearRegression()
model1.fit(data1.x, data1.y, beta_low=0, beta_high=10)
assert_value(model1.coef_, data1.coef_, 0.1, 0.1)

# range: (-100, -90)
coef2 = np.zeros(p)
coef2[np.random.choice(p, k, replace=False)
] = np.random.uniform(-100, -90, size=k)
data2 = abess.make_glm_data(
n=n, p=p, k=k, family='gaussian', coef_=coef2)

model2 = abess.LinearRegression()
model2.fit(data2.x, data2.y, beta_low=-100, beta_high=-90)
assert_value(model2.coef_, data2.coef_, 0.1, 0.1)

# one-side
model1.fit(data1.x, data1.y, beta_low=0)
assert_value(model1.coef_, data1.coef_, 0.1, 0.1)
model1.fit(data1.x, data1.y, beta_high=10)
assert_value(model1.coef_, data1.coef_, 0.1, 0.1)

# force range
coef3 = np.zeros(p)
coef3[np.random.choice(p, k, replace=False)
] = np.random.choice([-11, 11], size=k)
data3 = abess.make_glm_data(
n=n, p=p, k=k, family='gaussian', coef_=coef3)

model3 = abess.LinearRegression()
model3.fit(data3.x, data3.y, beta_low=-10, beta_high=10, is_normal=False)
assert_fit(model3.coef_, data3.coef_)
assert (model3.coef_ >= -10).all()
assert (model3.coef_ <= 10).all()
assert (model3.coef_[data3.coef_ == -11] == -10).all()
assert (model3.coef_[data3.coef_ == 11] == 10).all()

@staticmethod
def test_possible_input():
np.random.seed(2)
Expand Down
4 changes: 2 additions & 2 deletions python/src/pywrap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ std::tuple<Eigen::MatrixXd, Eigen::VectorXd, double, double, double> pywrap_GLM(
double lambda_max, int n_lambda, int screening_size, Eigen::VectorXi always_select_Vec,
int primary_model_fit_max_iter, double primary_model_fit_epsilon, bool early_stop, bool approximate_Newton,
int thread, bool covariance_update, bool sparse_matrix, int splicing_type, int sub_search,
Eigen::VectorXi A_init_Vec, bool fit_intercept) {
Eigen::VectorXi A_init_Vec, bool fit_intercept, double beta_low, double beta_high) {
List mylist = abessGLM_API(x_Mat, y_Mat, n, p, normalize_type, weight_Vec, algorithm_type, model_type, max_iter,
exchange_num, path_type, is_warm_start, eval_type, ic_coef, Kfold, sequence_Vec,
lambda_sequence_Vec, s_min, s_max, lambda_min, lambda_max, n_lambda, screening_size,
gindex_Vec, always_select_Vec, primary_model_fit_max_iter, primary_model_fit_epsilon,
early_stop, approximate_Newton, thread, covariance_update, sparse_matrix, splicing_type,
sub_search, cv_fold_id_Vec, A_init_Vec, fit_intercept);
sub_search, cv_fold_id_Vec, A_init_Vec, fit_intercept, beta_low, beta_high);

std::tuple<Eigen::MatrixXd, Eigen::VectorXd, double, double, double> output;
int y_col = y_Mat.cols();
Expand Down
13 changes: 13 additions & 0 deletions src/Algorithm.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ class Algorithm {
int sub_search; // size of sub_searching in splicing
int U_size;

double beta_range[2] = {-DBL_MAX, DBL_MAX};

Algorithm() = default;

virtual ~Algorithm(){};
Expand Down Expand Up @@ -161,6 +163,17 @@ class Algorithm {

void update_exchange_num(int exchange_num) { this->exchange_num = exchange_num; }

void update_beta_range(double beta_low, double beta_high) {
if (beta_low > beta_high) {
this->beta_range[0] = -DBL_MAX;
this->beta_range[1] = DBL_MAX;
} else {
this->beta_range[0] = beta_low;
this->beta_range[1] = beta_high;
}
// std::cout << "beta range: " << beta_low << "," << beta_high << std::endl;
}

virtual void update_tau(int train_n, int N) {
if (train_n == 1) {
this->tau = 0.0;
Expand Down
8 changes: 8 additions & 0 deletions src/AlgorithmGLM.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ class _abessGLM : public Algorithm<T1, T2, T3, T4> {
// Fitting method 2: Iteratively Reweighted Least Squares
return this->_IRLS_fit(X, y, weights, beta, coef0, loss0, A, g_index, g_size);
}
trunc(beta, this->beta_range);
return true;
};
virtual double loss_function(T4 &X, T1 &y, Eigen::VectorXd &weights, T2 &beta, T3 &coef0, Eigen::VectorXi &A,
Expand Down Expand Up @@ -421,6 +422,8 @@ class abessLm : public _abessGLM<Eigen::VectorXd, Eigen::VectorXd, double, T4> {
Eigen::VectorXd beta_full = XTX.ldlt().solve(XTy);

extract_beta_coef0(beta_full, beta, coef0, this->fit_intercept);

trunc(beta, this->beta_range);
return true;
};

Expand Down Expand Up @@ -777,6 +780,7 @@ class abessCox : public _abessGLM<Eigen::VectorXd, Eigen::VectorXd, double, T4>
}

beta = beta0;
trunc(beta, this->beta_range);
return true;
};

Expand Down Expand Up @@ -990,6 +994,7 @@ class abessMLm : public _abessGLM<Eigen::MatrixXd, Eigen::MatrixXd, Eigen::Vecto
Eigen::MatrixXd beta0 = XTX.ldlt().solve(X.adjoint() * y);

extract_beta_coef0(beta0, beta, coef0, this->fit_intercept);
trunc(beta, this->beta_range);
return true;
// if (X.cols() == 0)
// {
Expand Down Expand Up @@ -1351,6 +1356,7 @@ class abessMultinomial : public _abessGLM<Eigen::MatrixXd, Eigen::MatrixXd, Eige
}

extract_beta_coef0(beta0, beta, coef0, this->fit_intercept);
trunc(beta, this->beta_range);
return true;
};

Expand Down Expand Up @@ -1566,6 +1572,7 @@ class abessGamma : public _abessGLM<Eigen::VectorXd, Eigen::VectorXd, double, T4
// Fitting method 2: Iteratively Reweighted Least Squares
return this->_IRLS_fit(X, y, weights, beta, coef0, loss0, A, g_index, g_size);
}
trunc(beta, this->beta_range);
return true;
};
};
Expand Down Expand Up @@ -1799,6 +1806,7 @@ class abessOrdinal : public _abessGLM<Eigen::MatrixXd, Eigen::MatrixXd, Eigen::V
beta.col(i) = coef.tail(p).eval();
}
coef0.head(k) = coef.head(k);
trunc(beta, this->beta_range);
return true;
}

Expand Down
27 changes: 15 additions & 12 deletions src/api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ List abessGLM_API(Eigen::MatrixXd x, Eigen::MatrixXd y, int n, int p, int normal
Eigen::VectorXi g_index, Eigen::VectorXi always_select, int primary_model_fit_max_iter,
double primary_model_fit_epsilon, bool early_stop, bool approximate_Newton, int thread,
bool covariance_update, bool sparse_matrix, int splicing_type, int sub_search,
Eigen::VectorXi cv_fold_id, Eigen::VectorXi A_init, bool fit_intercept) {
Eigen::VectorXi cv_fold_id, Eigen::VectorXi A_init, bool fit_intercept, double beta_low,
double beta_high) {
#ifdef _OPENMP
// Eigen::initParallel();
int max_thread = omp_get_max_threads();
Expand Down Expand Up @@ -190,12 +191,12 @@ List abessGLM_API(Eigen::MatrixXd x, Eigen::MatrixXd y, int n, int p, int normal
out_result = abessWorkflow<Eigen::VectorXd, Eigen::VectorXd, double, Eigen::MatrixXd>(
x, y_vec, n, p, normalize_type, weight, algorithm_type, path_type, is_warm_start, ic_type, ic_coef,
Kfold, parameters, screening_size, g_index, early_stop, thread, sparse_matrix, cv_fold_id, A_init,
algorithm_list_uni_dense);
beta_low, beta_high, algorithm_list_uni_dense);
} else {
out_result = abessWorkflow<Eigen::MatrixXd, Eigen::MatrixXd, Eigen::VectorXd, Eigen::MatrixXd>(
x, y, n, p, normalize_type, weight, algorithm_type, path_type, is_warm_start, ic_type, ic_coef, Kfold,
parameters, screening_size, g_index, early_stop, thread, sparse_matrix, cv_fold_id, A_init,
algorithm_list_mul_dense);
parameters, screening_size, g_index, early_stop, thread, sparse_matrix, cv_fold_id, A_init, beta_low,
beta_high, algorithm_list_mul_dense);
}
} else {
Eigen::SparseMatrix<double> sparse_x(n, p);
Expand All @@ -220,12 +221,12 @@ List abessGLM_API(Eigen::MatrixXd x, Eigen::MatrixXd y, int n, int p, int normal
out_result = abessWorkflow<Eigen::VectorXd, Eigen::VectorXd, double, Eigen::SparseMatrix<double>>(
sparse_x, y_vec, n, p, normalize_type, weight, algorithm_type, path_type, is_warm_start, ic_type,
ic_coef, Kfold, parameters, screening_size, g_index, early_stop, thread, sparse_matrix, cv_fold_id,
A_init, algorithm_list_uni_sparse);
A_init, beta_low, beta_high, algorithm_list_uni_sparse);
} else {
out_result = abessWorkflow<Eigen::MatrixXd, Eigen::MatrixXd, Eigen::VectorXd, Eigen::SparseMatrix<double>>(
sparse_x, y, n, p, normalize_type, weight, algorithm_type, path_type, is_warm_start, ic_type, ic_coef,
Kfold, parameters, screening_size, g_index, early_stop, thread, sparse_matrix, cv_fold_id, A_init,
algorithm_list_mul_sparse);
beta_low, beta_high, algorithm_list_mul_sparse);
}
}

Expand Down Expand Up @@ -305,6 +306,7 @@ List abessPCA_API(Eigen::MatrixXd x, int n, int p, int normalize_type, Eigen::Ve
#endif
List out_result_next;
int num = 0;
double beta_low = -DBL_MAX, beta_high = DBL_MAX;

if (!sparse_matrix) {
while (num++ < pca_num) {
Expand All @@ -324,7 +326,7 @@ List abessPCA_API(Eigen::MatrixXd x, int n, int p, int normalize_type, Eigen::Ve
out_result_next = abessWorkflow<Eigen::VectorXd, Eigen::VectorXd, double, Eigen::MatrixXd>(
x, y_vec, n, p, normalize_type, weight, algorithm_type, path_type, is_warm_start, ic_type, ic_coef,
Kfold, parameters, screening_size, g_index, early_stop, thread, sparse_matrix, cv_fold_id, A_init,
algorithm_list_uni_dense);
beta_low, beta_high, algorithm_list_uni_dense);
Eigen::VectorXd beta_next;
#ifdef R_BUILD
beta_next = out_result_next["beta"];
Expand Down Expand Up @@ -406,7 +408,7 @@ List abessPCA_API(Eigen::MatrixXd x, int n, int p, int normalize_type, Eigen::Ve
out_result_next = abessWorkflow<Eigen::VectorXd, Eigen::VectorXd, double, Eigen::SparseMatrix<double>>(
sparse_x, y_vec, n, p, normalize_type, weight, algorithm_type, path_type, is_warm_start, ic_type,
ic_coef, Kfold, parameters, screening_size, g_index, early_stop, thread, sparse_matrix, cv_fold_id,
A_init, algorithm_list_uni_sparse);
A_init, beta_low, beta_high, algorithm_list_uni_sparse);
Eigen::VectorXd beta_next;
#ifdef R_BUILD
beta_next = out_result_next["beta"];
Expand Down Expand Up @@ -486,6 +488,7 @@ List abessRPCA_API(Eigen::MatrixXd x, int n, int p, int max_iter, int exchange_n
int model_type = 10, algorithm_type = 6;
int Kfold = 1;
int normalize_type = 0;
double beta_low = -DBL_MAX, beta_high = DBL_MAX;
Eigen::VectorXi cv_fold_id = Eigen::VectorXi::Zero(0);
Eigen::VectorXd weight = Eigen::VectorXd::Ones(n);
Eigen::VectorXd y_vec = Eigen::VectorXd::Zero(n);
Expand Down Expand Up @@ -519,8 +522,8 @@ List abessRPCA_API(Eigen::MatrixXd x, int n, int p, int max_iter, int exchange_n
if (!sparse_matrix) {
out_result = abessWorkflow<Eigen::VectorXd, Eigen::VectorXd, double, Eigen::MatrixXd>(
x, y_vec, n, p, normalize_type, weight, algorithm_type, path_type, is_warm_start, ic_type, ic_coef, Kfold,
parameters, screening_size, g_index, early_stop, thread, sparse_matrix, cv_fold_id, A_init,
algorithm_list_uni_dense);
parameters, screening_size, g_index, early_stop, thread, sparse_matrix, cv_fold_id, A_init, beta_low,
beta_high, algorithm_list_uni_dense);

} else {
Eigen::SparseMatrix<double> sparse_x(n, p);
Expand All @@ -541,8 +544,8 @@ List abessRPCA_API(Eigen::MatrixXd x, int n, int p, int max_iter, int exchange_n

out_result = abessWorkflow<Eigen::VectorXd, Eigen::VectorXd, double, Eigen::SparseMatrix<double>>(
sparse_x, y_vec, n, p, normalize_type, weight, algorithm_type, path_type, is_warm_start, ic_type, ic_coef,
Kfold, parameters, screening_size, g_index, early_stop, thread, sparse_matrix, cv_fold_id, A_init,
algorithm_list_uni_sparse);
Kfold, parameters, screening_size, g_index, early_stop, thread, sparse_matrix, cv_fold_id, A_init, beta_low,
beta_high, algorithm_list_uni_sparse);
}

for (int i = 0; i < algorithm_list_size; i++) {
Expand Down
3 changes: 2 additions & 1 deletion src/api.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ List abessGLM_API(Eigen::MatrixXd x, Eigen::MatrixXd y, int n, int p, int normal
Eigen::VectorXi g_index, Eigen::VectorXi always_select, int primary_model_fit_max_iter,
double primary_model_fit_epsilon, bool early_stop, bool approximate_Newton, int thread,
bool covariance_update, bool sparse_matrix, int splicing_type, int sub_search,
Eigen::VectorXi cv_fold_id, Eigen::VectorXi A_init, bool fit_intercept);
Eigen::VectorXi cv_fold_id, Eigen::VectorXi A_init, bool fit_intercept, double beta_low,
double beta_high);

List abessPCA_API(Eigen::MatrixXd x, int n, int p, int normalize_type, Eigen::VectorXd weight, Eigen::MatrixXd sigma,
int max_iter, int exchange_num, int path_type, bool is_warm_start, int ic_type, double ic_coef,
Expand Down
11 changes: 9 additions & 2 deletions src/utilities.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,15 +131,15 @@ class Parameters {
Rcout << "==> Parameter List:" << endl;
#else
std::cout << "==> Parameter List:" << endl;
#endif
#endif
for (int i = 0; i < (this->sequence).size(); i++) {
int support_size = (this->sequence(i)).support_size;
double lambda = (this->sequence(i)).lambda;
#ifdef R_BUILD
Rcout << " support_size = " << support_size << ", lambda = " << lambda << endl;
#else
std::cout << " support_size = " << support_size << ", lambda = " << lambda << endl;
#endif
#endif
}
}
};
Expand Down Expand Up @@ -508,6 +508,13 @@ void trunc(Eigen::VectorXd &vec, double *trunc_range) {
}
}

void trunc(Eigen::MatrixXd &mat, double *trunc_range) {
for (int i = 0; i < mat.rows(); i++)
for (int j = 0; j < mat.cols(); j++) {
trunc(mat(i, j), trunc_range);
}
}

Eigen::MatrixXd rowwise_add(Eigen::MatrixXd &m, Eigen::VectorXd &v) { return m.rowwise() + v.transpose(); }

Eigen::MatrixXd rowwise_add(Eigen::MatrixXd &m, double &v) {
Expand Down
5 changes: 4 additions & 1 deletion src/workflow.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,16 @@ template <class T1, class T2, class T3, class T4>
List abessWorkflow(T4 &x, T1 &y, int n, int p, int normalize_type, Eigen::VectorXd weight, int algorithm_type,
int path_type, bool is_warm_start, int eval_type, double ic_coef, int Kfold, Parameters parameters,
int screening_size, Eigen::VectorXi g_index, bool early_stop, int thread, bool sparse_matrix,
Eigen::VectorXi &cv_fold_id, Eigen::VectorXi &A_init,
Eigen::VectorXi &cv_fold_id, Eigen::VectorXi &A_init, double beta_low, double beta_high,
vector<Algorithm<T1, T2, T3, T4> *> algorithm_list) {
#ifndef R_BUILD
std::srand(123);
#endif

int algorithm_list_size = algorithm_list.size();
for (int i = 0; i < algorithm_list_size; i++) {
algorithm_list[i]->update_beta_range(beta_low, beta_high);
}

// Size of the candidate set:
// usually it is equal to `p`, the number of variable,
Expand Down

0 comments on commit d5e90db

Please sign in to comment.