Skip to content

Commit

Permalink
[2] Add fit_impl (#5096)
Browse files Browse the repository at this point in the history
  • Loading branch information
jonpsy authored Jul 14, 2020
1 parent 73e795b commit 6351bcb
Show file tree
Hide file tree
Showing 12 changed files with 41 additions and 38 deletions.
15 changes: 15 additions & 0 deletions src/shogun/preprocessor/DensePreprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,21 @@ DensePreprocessor<ST>::inverse_transform(std::shared_ptr<Features> features, boo
return std::make_shared<DenseFeatures<ST>>(feat_matrix);
}

template <class ST>
void DensePreprocessor<ST>::fit(std::shared_ptr<Features> f)
{
require(f, "No features provided");

auto dense_features = f->as<DenseFeatures<ST>>();
auto num_features = dense_features->get_num_features();
require(num_features > 0, "Dimension of provided features {} must be positive", num_features);

auto feature_matrix = dense_features->get_feature_matrix();
fit_impl(feature_matrix);

m_fitted.store(true);
}

template <class ST>
SGMatrix<ST>
DensePreprocessor<ST>::inverse_apply_to_matrix(SGMatrix<ST> matrix)
Expand Down
6 changes: 6 additions & 0 deletions src/shogun/preprocessor/DensePreprocessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ template <class ST> class DensePreprocessor : public Preprocessor
/// return a type of preprocessor
EPreprocessorType get_type() const override;

void fit(std::shared_ptr<Features>) override;

protected:
/** Apply preprocessor on matrix. Subclasses should try to apply in
* place to avoid copying.
Expand All @@ -76,6 +78,10 @@ template <class ST> class DensePreprocessor : public Preprocessor
* @return the matrix after applying the preprocessor
*/
virtual SGMatrix<ST> inverse_apply_to_matrix(SGMatrix<ST> matrix);

virtual void fit_impl(const SGMatrix<ST>& feature_matrix)
{
}
};

}
Expand Down
2 changes: 2 additions & 0 deletions src/shogun/preprocessor/FisherLDA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,8 @@ void FisherLDA::fit(std::shared_ptr<Features> features, std::shared_ptr<Labels>
solver_canvar(dense_features, mc);
else
solver_classic(dense_features, mc);

m_fitted.store(true);
}

void FisherLDA::solver_canvar(
Expand Down
3 changes: 1 addition & 2 deletions src/shogun/preprocessor/KernelPCA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ KernelPCA::KernelPCA(std::shared_ptr<Kernel> k) : Preprocessor()

void KernelPCA::init()
{
m_fitted = false;
m_init_features = NULL;
m_transformation_matrix = SGMatrix<float64_t>();
m_bias_vector = SGVector<float64_t>();
Expand Down Expand Up @@ -107,7 +106,7 @@ void KernelPCA::fit(std::shared_ptr<Features> features)
m_bias_vector = SGVector<float64_t>(m_target_dim);
linalg::matrix_prod(m_transformation_matrix, bias_tmp, m_bias_vector, true);

m_fitted = true;
m_fitted.store(true);
io::info("Done");
}

Expand Down
5 changes: 1 addition & 4 deletions src/shogun/preprocessor/PCA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,9 @@ PCA::~PCA()
{
}

void PCA::fit(std::shared_ptr<Features> features)
void PCA::fit_impl(const SGMatrix<float64_t>& feature_matrix)
{

auto feature_matrix =
features->as<DenseFeatures<float64_t>>()->get_feature_matrix();
auto num_vectors = feature_matrix.num_cols;
auto num_features = feature_matrix.num_rows;
io::info("num_examples: {} num_features: {}", num_vectors, num_features);
Expand Down Expand Up @@ -127,7 +125,6 @@ void PCA::fit(std::shared_ptr<Features> features)

// restore feature matrix
fmatrix = fmatrix.colwise() + data_mean;
m_fitted = true;
}

void PCA::init_with_evd(const SGMatrix<float64_t>& feature_matrix, int32_t max_dim_allowed)
Expand Down
4 changes: 2 additions & 2 deletions src/shogun/preprocessor/PCA.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,6 @@ class PCA : public DensePreprocessor<float64_t>
/** destructor */
~PCA() override;

void fit(std::shared_ptr<Features> features) override;

/** apply preprocessor to feature vector
* @param vector feature vector
* @return processed feature vector
Expand Down Expand Up @@ -191,6 +189,8 @@ class PCA : public DensePreprocessor<float64_t>

SGMatrix<float64_t> apply_to_matrix(SGMatrix<float64_t>) override;

void fit_impl(const SGMatrix<float64_t>& feature_matrix) override;

protected:

/** transformation matrix */
Expand Down
11 changes: 3 additions & 8 deletions src/shogun/preprocessor/PruneVarSubMean.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,20 +27,17 @@ PruneVarSubMean::~PruneVarSubMean()

}

void PruneVarSubMean::fit(std::shared_ptr<Features> features)
void PruneVarSubMean::fit_impl(const SGMatrix<float64_t>& feature_matrix)
{

auto simple_features = features->as<DenseFeatures<float64_t>>();
auto num_examples = simple_features->get_num_vectors();
auto num_features = simple_features->get_num_features();
auto num_examples = feature_matrix.num_cols;
auto num_features = feature_matrix.num_rows;

m_idx = SGVector<int32_t>();
m_std = SGVector<float64_t>();

SGVector<float64_t> var(num_features);

auto feature_matrix = simple_features->get_feature_matrix();

// compute mean
m_mean = linalg::rowwise_sum(feature_matrix);
linalg::scale(m_mean, m_mean, 1.0 / num_examples);
Expand Down Expand Up @@ -85,7 +82,6 @@ void PruneVarSubMean::fit(std::shared_ptr<Features> features)
m_num_idx = num_ok;
m_mean = new_mean;

m_fitted = true;
}

SGMatrix<float64_t>
Expand Down Expand Up @@ -137,7 +133,6 @@ SGVector<float64_t> PruneVarSubMean::apply_to_feature_vector(SGVector<float64_t>

void PruneVarSubMean::init()
{
m_fitted = false;
m_divide_by_std = false;
m_num_idx = 0;
m_idx = SGVector<int32_t>();
Expand Down
4 changes: 2 additions & 2 deletions src/shogun/preprocessor/PruneVarSubMean.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@ class PruneVarSubMean : public DensePreprocessor<float64_t>
/** destructor */
~PruneVarSubMean() override;

/// Fit preprocessor into features
void fit(std::shared_ptr<Features> features) override;

/// apply preproc on single feature vector
/// result in feature matrix
Expand All @@ -51,6 +49,8 @@ class PruneVarSubMean : public DensePreprocessor<float64_t>
protected:
SGMatrix<float64_t> apply_to_matrix(SGMatrix<float64_t> matrix) override;

void fit_impl(const SGMatrix<float64_t>& feature_matrix) override;

private:
void init();
void register_parameters();
Expand Down
6 changes: 2 additions & 4 deletions src/shogun/preprocessor/RFFPreprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,12 @@ void RFFPreprocessor::init_basis(int32_t dim_input_space)
random::fill_array(m_offset, uniform, m_prng);
}

void RFFPreprocessor::fit(std::shared_ptr<Features> f)
void RFFPreprocessor::fit_impl(const SGMatrix<float64_t>& feature_matrix)
{
auto num_features = f->as<DenseFeatures<float64_t>>()->get_num_features();
require(num_features > 0, "Dimension of provided features {} must be positive", num_features);
auto num_features = feature_matrix.num_rows;
require(m_kernel, "Kernel not set");

init_basis(num_features);
m_fitted = true;
}

SGVector<float64_t> RFFPreprocessor::apply_to_feature_vector(SGVector<float64_t> vector)
Expand Down
4 changes: 2 additions & 2 deletions src/shogun/preprocessor/RFFPreprocessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,6 @@ class RFFPreprocessor: public RandomMixin<DensePreprocessor<float64_t>> {
return C_DENSE;
};

void fit(std::shared_ptr<Features> f) override;

virtual const char* get_name() const override
{
return "RFFPreprocessor";
Expand Down Expand Up @@ -84,6 +82,8 @@ class RFFPreprocessor: public RandomMixin<DensePreprocessor<float64_t>> {

SGMatrix<float64_t> apply_to_matrix(SGMatrix<float64_t> matrix) override;

void fit_impl(const SGMatrix<float64_t>& feature_matrix) override;

/** Helper method which generates random coefficients and stores in the
* internal members. This method assumes the kernel has been set.
*
Expand Down
9 changes: 3 additions & 6 deletions src/shogun/preprocessor/RescaleFeatures.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,17 @@ RescaleFeatures::~RescaleFeatures()

}

void RescaleFeatures::fit(std::shared_ptr<Features> features)
void RescaleFeatures::fit_impl(const SGMatrix<float64_t>& feature_matrix)
{
auto simple_features = features->as<DenseFeatures<float64_t>>();
int32_t num_examples = simple_features->get_num_vectors();
int32_t num_features = simple_features->get_num_features();
int32_t num_examples = feature_matrix.num_cols;
int32_t num_features = feature_matrix.num_rows;
require(
num_examples > 1, "number of feature vectors should be at least 2!");

io::info("Extracting min and range values for each feature");

m_min = SGVector<float64_t>(num_features);
m_range = SGVector<float64_t>(num_features);
auto feature_matrix = simple_features->get_feature_matrix();
for (index_t i = 0; i < num_features; i++)
{
SGVector<float64_t> vec = feature_matrix.get_row_vector(i);
Expand All @@ -60,7 +58,6 @@ void RescaleFeatures::fit(std::shared_ptr<Features> features)
}
}

m_fitted = true;
}

SGMatrix<float64_t>
Expand Down
10 changes: 2 additions & 8 deletions src/shogun/preprocessor/RescaleFeatures.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,6 @@ namespace shogun
/** dtor */
~RescaleFeatures() override;

/**
* Fit preprocessor into features
*
* @param features the features to derive the min and max values
* from.
*/
void fit(std::shared_ptr<Features> features) override;

/**
* Apply preproc on a single feature vector
*/
Expand All @@ -64,6 +56,8 @@ namespace shogun
protected:
SGMatrix<float64_t> apply_to_matrix(SGMatrix<float64_t> matrix) override;

void fit_impl(const SGMatrix<float64_t>& feature_matrix) override;

/** min */
SGVector<float64_t> m_min;
/** 1.0/(max[i]-min[i]) */
Expand Down

0 comments on commit 6351bcb

Please sign in to comment.