Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,22 @@ def RECENCY() -> Column:
return F.months_between(F.lit(rfm.FeatureMaker.make_date), F.col("DATE"))
```

#### @template
Provides an option to pass a template string that's to be used to create a text feature, this template is saved in [metadata](#metadata).

```python
import pyspark.sql.functions as F
import rialto.maker as rfm
from pyspark.sql import Column
from rialto.metadata import ValueType as VT

@rfm.feature(VT.numerical)
@rfm.desc("Age of customer")
@rfm.template("Customer is $X years old")
def AGE() -> Column:
return F.col("AGE")
```

#### @param
Inspired by @pytest.mark.parametrize, it has similar interface and fulfills the same role. It allows you to invoke the feature function multiple times with different values of the parameter.
If multiple @params are used, the number of final features will be a product of all parameters. The feature function has to expect a parameter with the same name as the @params name.
Expand Down
8 changes: 6 additions & 2 deletions rialto/maker/containers.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def __init__(self, name: str, callable_object: typing.Callable, value_type: Valu
self.dependencies: typing.List[str] = []
self.type = value_type
self.description = "basic feature"
self.template = "value $X"

def __str__(self) -> str:
"""
Expand All @@ -44,7 +45,8 @@ def __str__(self) -> str:
f"Name: {self.name}\n\t"
f"Parameters: {self.parameters}\n\t"
f"Type: {self.get_type()}\n\t"
f"Description: {self.description}"
f"Description: {self.description}\n\t"
f"Template: {self.template}"
)

def metadata(self) -> FeatureMetadata:
Expand All @@ -53,7 +55,9 @@ def metadata(self) -> FeatureMetadata:

:return: metadata dict
"""
return FeatureMetadata(name=self.get_feature_name(), value_type=self.type, description=self.description)
return FeatureMetadata(
name=self.get_feature_name(), value_type=self.type, description=self.description, template=self.template
)

def get_feature_name(self) -> str:
"""
Expand Down
30 changes: 28 additions & 2 deletions rialto/maker/wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

__all__ = ["feature", "desc", "param", "depends"]
__all__ = ["feature", "desc", "template", "param", "depends"]

import typing
from copy import deepcopy
Expand Down Expand Up @@ -89,7 +89,7 @@ def desc(feature_functions: typing.Union[typing.Callable, FeatureHolder], desc:
Wrap feature with string description, used in metadata

:param feature_functions: FeatureHolder or pure function
:param type: FeatureType enum (numerical, ordinal, nominal)
:param desc: text description of the feature
:return: FeatureHolder
"""
logger.trace(f"Wrapping {feature_functions} with description")
Expand All @@ -109,6 +109,32 @@ def wrapper() -> FeatureHolder:
return wrapper()


@decorator_with_args
def template(feature_functions: typing.Union[typing.Callable, FeatureHolder], template: str):
"""
Wrap feature with string template to create a text feature

:param feature_functions: FeatureHolder or pure function
:param template: string template of the feature
:return: FeatureHolder
"""
logger.trace(f"Wrapping {feature_functions} with text template")

def wrapper() -> FeatureHolder:
if isinstance(feature_functions, FeatureHolder):
for f in feature_functions:
f.template = template
return feature_functions
else:
func_list = FeatureHolder()
new_feature_f = FeatureFunction(feature_functions.__name__, feature_functions)
new_feature_f.template = template
func_list.append(new_feature_f)
return func_list

return wrapper()


@decorator_with_args
def param(feature_functions: typing.Union[typing.Callable, FeatureHolder], parameter_name: str, values: typing.List):
"""
Expand Down
7 changes: 5 additions & 2 deletions rialto/metadata/data_classes/feature_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,16 @@ class FeatureMetadata:
value_type: ValueType
name: str
description: str
template: str = None
group: GroupMetadata = None

def __repr__(self) -> str:
"""Serialize object to string"""
return (
"FeatureMetadata("
f"name={self.name!r}, value_type={self.value_type!r}, "
f"description={self.description!r}, group={self.group!r}, "
f"description={self.description!r}, template={self.template!r}, "
f"group={self.group!r}"
")"
)

Expand All @@ -49,7 +51,7 @@ def to_tuple(self, group_name: str) -> Tuple:
:param group_name: Feature group name
:return: tuple with feature information
"""
return (self.name, self.value_type.value, self.description, group_name)
return (self.name, self.value_type.value, self.description, self.template, group_name)

def add_group(self, group: GroupMetadata) -> Self:
"""
Expand All @@ -73,4 +75,5 @@ def from_spark(cls, record: Row) -> Self:
value_type=ValueType[record.feature_type],
name=record.feature_name,
description=record.feature_description,
template=record.feature_template,
)
6 changes: 4 additions & 2 deletions tests/maker/test_FeatureFunction.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ def test_serialization():
func.parameters["paramC"] = 1
func.parameters["paramA"] = 4
assert (
func.__str__()
== "Name: feature\n\tParameters: {'paramC': 1, 'paramA': 4}\n\tType: nominal\n\tDescription: basic feature"
func.__str__() == "Name: feature\n\tParameters: {'paramC': 1, 'paramA': 4}\n\t"
"Type: nominal\n\tDescription: basic feature\n\tTemplate: value $X"
)


Expand All @@ -68,7 +68,9 @@ def test_metadata():
func.parameters["paramA"] = 4
func.dependencies = ["featureB", "featureC"]
func.description = "nice feature"
func.template = "value $X something"

assert func.metadata().name == "FEATURE_PARAMA_4_PARAMC_1"
assert func.metadata().value_type == ValueType.ordinal
assert func.metadata().description == "nice feature"
assert func.metadata().template == "value $X something"
9 changes: 6 additions & 3 deletions tests/metadata/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
StructField("feature_name", StringType(), True),
StructField("feature_type", StringType(), True),
StructField("feature_description", StringType(), True),
StructField("feature_template", StringType(), True),
StructField("group_name", StringType(), True),
]
)
Expand All @@ -41,8 +42,8 @@
]

feature_base = [
("Feature1", "nominal", "feature1", "Group2"),
("Feature2", "nominal", "feature2", "Group2"),
("Feature1", "nominal", "feature1", "template1", "Group2"),
("Feature2", "nominal", "feature2", "template2", "Group2"),
]

group_md1 = GroupMetadata(
Expand All @@ -64,4 +65,6 @@
features=["Feature1", "Feature2"],
)

feature_md1 = FeatureMetadata(name="Feature1", value_type=ValueType.nominal, description="feature1", group=group_md2)
feature_md1 = FeatureMetadata(
name="Feature1", value_type=ValueType.nominal, description="feature1", template="template1", group=group_md2
)