juaml
diff --git a/‎examples/03_complex_models/run_generate_target.py‎
Lines changed: 76 additions & 0 deletions b/‎examples/03_complex_models/run_generate_target.py‎
Lines changed: 76 additions & 0 deletions
diff --git a/‎julearn/api.py‎
Lines changed: 11 additions & 0 deletions b/‎julearn/api.py‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎julearn/base/column_types.py‎
Lines changed: 54 additions & 1 deletion b/‎julearn/base/column_types.py‎
Lines changed: 54 additions & 1 deletion
diff --git a/‎julearn/base/tests/test_column_types.py‎
Lines changed: 68 additions & 0 deletions b/‎julearn/base/tests/test_column_types.py‎
Lines changed: 68 additions & 0 deletions
diff --git a/‎julearn/conftest.py‎
Lines changed: 3 additions & 1 deletion b/‎julearn/conftest.py‎
Lines changed: 3 additions & 1 deletion
@@ -0,0 +1,76 @@
+"""
+Target Generation
+=================
+
+This example uses the ``iris`` dataset and tests a regression model in which
+the target variable is generated from some features within the cross-validation
+procedure. We will use the Iris dataset and generate a target variable using
+PCA on the petal features. Then, we will evaluate if a regression model can
+predict the generated target from the sepal features
+
+.. include:: ../../links.inc
+"""
+# Authors: Federico Raimondo <[email protected]>
+# License: AGPL
+
+from seaborn import load_dataset
+from julearn import run_cross_validation
+from julearn.pipeline import PipelineCreator
+from julearn.utils import configure_logging
+
+###############################################################################
+# Set the logging level to info to see extra information.
+configure_logging(level="DEBUG")
+
+###############################################################################
+df_iris = load_dataset("iris")
+
+
+###############################################################################
+# As features, we will use the sepal length, width and petal length.
+# We will try to predict the species.
+
+X = ["sepal_length", "sepal_width", "petal_length", "petal_width"]
+y = "__generated__"  # to indicate to julearn that the target will be generated
+
+
+# Define our feature types
+X_types = {
+    "sepal": ["sepal_length", "sepal_width"],
+    "petal": ["petal_length", "petal_width"],
+}
+
+###############################################################################
+# We now use a Pipeline Creator to create the pipeline that will generate the
+# features. This special pipeline should be configured to be a "transformer"
+# and apply to the "petal" feature types.
+
+target_creator = PipelineCreator(problem_type="transformer", apply_to="petal")
+target_creator.add("pca", n_components=2)
+# Select only the first component
+target_creator.add("pick_columns", keep="pca__pca0")
+
+
+###############################################################################
+# We now create the pipeline that will be used to predict the target. This
+# pipeline will be a regression pipeline. The step previous to the model should
+# be the the `generate_target`, applying to the "petal" features and using the
+# target_creator pipeline as the transformer.
+creator = PipelineCreator(problem_type="regression")
+creator.add("zscore", apply_to="*")
+creator.add("generate_target", apply_to="petal", transformer=target_creator)
+creator.add("linreg", apply_to="sepal")
+
+###############################################################################
+# We finally evaluate the model within the cross validation.
+scores, model = run_cross_validation(
+    X=X,
+    y=y,
+    X_types=X_types,
+    data=df_iris,
+    model=creator,
+    return_estimator="final",
+    cv=2,
+)
+
+print(scores["test_score"])  # type: ignore
@@ -209,6 +209,9 @@ def _validata_api_params(  # noqa: C901
 
     wrap_score = False
     if isinstance(model, (PipelineCreator, list)):
+        logger.debug(
+            "Generating pipeline from PipelineCreator or list of them"
+        )
         if preprocess is not None:
             raise_error(
                 "If model is a PipelineCreator (or list of), "
@@ -242,6 +245,7 @@ def _validata_api_params(  # noqa: C901
             expanded_models.extend(m.split())
 
         has_target_transformer = expanded_models[-1]._added_target_transformer
+        has_target_generator = expanded_models[-1]._added_target_generator
         all_pipelines = [
             model.to_pipeline(X_types=X_types, search_params=search_params)
             for model in expanded_models
@@ -255,12 +259,16 @@ def _validata_api_params(  # noqa: C901
             pipeline = all_pipelines[0]
 
         if has_target_transformer:
+            logger.debug("Pipeline has target transformer")
             if isinstance(pipeline, BaseSearchCV):
                 last_step = pipeline.estimator[-1]  # type: ignore
             else:
                 last_step = pipeline[-1]
             if not last_step.can_inverse_transform():
                 wrap_score = True
+        if has_target_generator:
+            logger.debug("Pipeline has target generator")
+            wrap_score = True
         problem_type = model[0].problem_type
 
     elif not isinstance(model, (str, BaseEstimator)):
@@ -317,12 +325,15 @@ def _validata_api_params(  # noqa: C901
                 f"The following model_params are incorrect: {unused_params}"
             )
         has_target_transformer = pipeline_creator._added_target_transformer
+        has_target_generator = pipeline_creator._added_target_generator
         pipeline = pipeline_creator.to_pipeline(
             X_types=X_types, search_params=search_params
         )
 
         if has_target_transformer and not pipeline[-1].can_inverse_transform():
             wrap_score = True
+        if has_target_generator:
+            wrap_score = True
 
     # Log some information
     logger.info("= Data Information =")
 
@@ -4,14 +4,15 @@
 #          Sami Hamdan <[email protected]>
 # License: AGPL
 
-from typing import Callable, Union
+from typing import Any, Callable, Union
 
 from sklearn.compose import make_column_selector
 
 from ..utils.logging import raise_error
 
 
 ColumnTypesLike = Union[list[str], set[str], str, "ColumnTypes"]
+ColumnTypesDict = dict[str, ColumnTypesLike]
 
 
 def change_column_type(column: str, new_type: str):
@@ -240,6 +241,42 @@ def __eq__(self, other: Union["ColumnTypes", str]):
         other = other if isinstance(other, ColumnTypes) else ColumnTypes(other)
         return self._column_types == other._column_types
 
+    def __and__(self, other: "ColumnTypes"):
+        """Get the intersection of the column_types.
+
+        Parameters
+        ----------
+        other : ColumnTypes
+            The other column_types to get the intersection with.
+
+        Returns
+        -------
+        ColumnTypes
+            The intersection of the column_types.
+
+        """
+        return ColumnTypes(self._column_types & other._column_types)
+
+    def __or__(self, other: "ColumnTypes"):
+        """Get the union of the column_types.
+
+        Parameters
+        ----------
+        other : ColumnTypes
+            The other column_types to get the union with.
+
+        Returns
+        -------
+        ColumnTypes
+            The union of the column_types.
+
+        """
+        return ColumnTypes(self._column_types | other._column_types)
+
+    def __len__(self):
+        """Get the number of column_types."""
+        return len(self._column_types)
+
     def __iter__(self):
         """Iterate over the column_types."""
 
@@ -251,6 +288,22 @@ def __repr__(self):
             f"ColumnTypes<types={self._column_types}; pattern={self.pattern}>"
         )
 
+    def filter(self, X_types: dict[str, Any]) -> dict[str, Any]:  # noqa: N803
+        """Filter the X_types based on the column_types.
+
+        Parameters
+        ----------
+        X_types : dict
+            The types of the columns.
+
+        Returns
+        -------
+        dict:
+            The filtered X_types.
+
+        """
+        return {k: v for k, v in X_types.items() if k in self._column_types}
+
     def copy(self) -> "ColumnTypes":
         """Get a copy of the ColumnTypes.
 
 
@@ -253,3 +253,71 @@ def test_ColumnTypes_add(
     """
     summed = ColumnTypes(left).add(right)
     assert summed == ColumnTypes(result)
+
+
+@pytest.mark.parametrize(
+    "left,right,result",
+    [
+        (
+            ["continuous"],
+            ["continuous"],
+            ["continuous"],
+        ),
+        (
+            ["cont", "cat"],
+            "cat",
+            ["cat"],
+        ),
+    ],
+)
+def test_ColumnTypes_and(
+    left: ColumnTypesLike, right: ColumnTypesLike, result: ColumnTypesLike
+) -> None:
+    """Test the ColumnTypes addition.
+
+    Parameters
+    ----------
+    left : ColumnTypesLike
+        The left hand side of the addition.
+    right : ColumnTypesLike
+        The right hand side of the addition.
+    result : ColumnTypes
+        The expected result.
+
+    """
+    anded = ColumnTypes(left) & ColumnTypes(right)
+    assert anded == ColumnTypes(result)
+
+
+@pytest.mark.parametrize(
+    "left,right,result",
+    [
+        (
+            ["continuous"],
+            ["continuous"],
+            ["continuous"],
+        ),
+        (
+            ["cont", "cat"],
+            "cat",
+            ["cont", "cat"],
+        ),
+    ],
+)
+def test_ColumnTypes_or(
+    left: ColumnTypesLike, right: ColumnTypesLike, result: ColumnTypesLike
+) -> None:
+    """Test the ColumnTypes addition.
+
+    Parameters
+    ----------
+    left : ColumnTypesLike
+        The left hand side of the addition.
+    right : ColumnTypesLike
+        The right hand side of the addition.
+    result : ColumnTypes
+        The expected result.
+
+    """
+    orred = ColumnTypes(left) | ColumnTypes(right)
+    assert orred == ColumnTypes(result)
@@ -243,7 +243,9 @@ def model(request: FixtureRequest) -> str:
     return request.param
 
 
-@fixture(params=["regression", "classification"], scope="function")
+@fixture(
+    params=["regression", "classification", "transformer"], scope="function"
+)
 def problem_type(request: FixtureRequest) -> str:
     """Return different problem types.