|
| 1 | +""" |
| 2 | +Target Generation |
| 3 | +================= |
| 4 | +
|
| 5 | +This example uses the ``iris`` dataset and tests a regression model in which |
| 6 | +the target variable is generated from some features within the cross-validation |
| 7 | +procedure. We will use the Iris dataset and generate a target variable using |
| 8 | +PCA on the petal features. Then, we will evaluate if a regression model can |
| 9 | +predict the generated target from the sepal features |
| 10 | +
|
| 11 | +.. include:: ../../links.inc |
| 12 | +""" |
| 13 | +# Authors: Federico Raimondo <[email protected]> |
| 14 | +# License: AGPL |
| 15 | + |
| 16 | +from seaborn import load_dataset |
| 17 | +from julearn import run_cross_validation |
| 18 | +from julearn.pipeline import PipelineCreator |
| 19 | +from julearn.utils import configure_logging |
| 20 | + |
| 21 | +############################################################################### |
| 22 | +# Set the logging level to info to see extra information. |
| 23 | +configure_logging(level="DEBUG") |
| 24 | + |
| 25 | +############################################################################### |
| 26 | +df_iris = load_dataset("iris") |
| 27 | + |
| 28 | + |
| 29 | +############################################################################### |
| 30 | +# As features, we will use the sepal length, width and petal length. |
| 31 | +# We will try to predict the species. |
| 32 | + |
| 33 | +X = ["sepal_length", "sepal_width", "petal_length", "petal_width"] |
| 34 | +y = "__generated__" # to indicate to julearn that the target will be generated |
| 35 | + |
| 36 | + |
| 37 | +# Define our feature types |
| 38 | +X_types = { |
| 39 | + "sepal": ["sepal_length", "sepal_width"], |
| 40 | + "petal": ["petal_length", "petal_width"], |
| 41 | +} |
| 42 | + |
| 43 | +############################################################################### |
| 44 | +# We now use a Pipeline Creator to create the pipeline that will generate the |
| 45 | +# features. This special pipeline should be configured to be a "transformer" |
| 46 | +# and apply to the "petal" feature types. |
| 47 | + |
| 48 | +target_creator = PipelineCreator(problem_type="transformer", apply_to="petal") |
| 49 | +target_creator.add("pca", n_components=2) |
| 50 | +# Select only the first component |
| 51 | +target_creator.add("pick_columns", keep="pca__pca0") |
| 52 | + |
| 53 | + |
| 54 | +############################################################################### |
| 55 | +# We now create the pipeline that will be used to predict the target. This |
| 56 | +# pipeline will be a regression pipeline. The step previous to the model should |
| 57 | +# be the the `generate_target`, applying to the "petal" features and using the |
| 58 | +# target_creator pipeline as the transformer. |
| 59 | +creator = PipelineCreator(problem_type="regression") |
| 60 | +creator.add("zscore", apply_to="*") |
| 61 | +creator.add("generate_target", apply_to="petal", transformer=target_creator) |
| 62 | +creator.add("linreg", apply_to="sepal") |
| 63 | + |
| 64 | +############################################################################### |
| 65 | +# We finally evaluate the model within the cross validation. |
| 66 | +scores, model = run_cross_validation( |
| 67 | + X=X, |
| 68 | + y=y, |
| 69 | + X_types=X_types, |
| 70 | + data=df_iris, |
| 71 | + model=creator, |
| 72 | + return_estimator="final", |
| 73 | + cv=2, |
| 74 | +) |
| 75 | + |
| 76 | +print(scores["test_score"]) # type: ignore |
0 commit comments