Skip to content

Commit e06b440

Browse files
authored
Merge pull request #277 from mark-druffel/feat-yaml-actions
Added namespaces to yaml_interrogate
2 parents a3d7ec0 + c41105a commit e06b440

File tree

2 files changed

+437
-25
lines changed

2 files changed

+437
-25
lines changed

pointblank/yaml.py

Lines changed: 131 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
from __future__ import annotations
22

3+
from importlib import import_module
34
from pathlib import Path
4-
from typing import Any, Union
5+
from typing import Any, Iterable, Mapping, Optional, Union
56

67
import yaml
78
from narwhals.typing import FrameT
@@ -17,7 +18,9 @@ class YAMLValidationError(Exception):
1718
pass
1819

1920

20-
def _safe_eval_python_code(code: str) -> Any:
21+
def _safe_eval_python_code(
22+
code: str, namespaces: Optional[Union[Iterable[str], Mapping[str, str]]] = None
23+
) -> Any:
2124
"""Safely evaluate Python code with restricted namespace.
2225
2326
This function provides a controlled environment for executing Python code embedded in YAML
@@ -68,6 +71,7 @@ def _safe_eval_python_code(code: str) -> Any:
6871
"abs": abs,
6972
"round": round,
7073
"print": print,
74+
"__import__": __import__,
7175
},
7276
}
7377

@@ -88,12 +92,25 @@ def _safe_eval_python_code(code: str) -> Any:
8892

8993
safe_namespace["pd"] = pd
9094

91-
# Check for dangerous patterns
95+
if namespaces:
96+
for alias, module_name in (
97+
namespaces.items() if isinstance(namespaces, dict) else ((m, m) for m in namespaces)
98+
):
99+
try:
100+
safe_namespace[alias] = import_module(module_name)
101+
except ImportError as e:
102+
raise ImportError(
103+
f"Could not import requested namespace '{module_name}': {e}"
104+
) from e
105+
106+
# Check for dangerous patterns and be more specific about __import__ to allow legitimate use
92107
dangerous_patterns = [
93-
r"import\s+os",
94-
r"import\s+sys",
95-
r"import\s+subprocess",
96-
r"__import__",
108+
r"import\s+os\b",
109+
r"import\s+sys\b",
110+
r"import\s+subprocess\b",
111+
r"__import__\s*\(\s*['\"]os['\"]",
112+
r"__import__\s*\(\s*['\"]sys['\"]",
113+
r"__import__\s*\(\s*['\"]subprocess['\"]",
97114
r"exec\s*\(",
98115
r"eval\s*\(",
99116
r"open\s*\(",
@@ -142,7 +159,9 @@ def _safe_eval_python_code(code: str) -> Any:
142159
raise YAMLValidationError(f"Error executing Python code '{code}': {e}")
143160

144161

145-
def _process_python_expressions(value: Any) -> Any:
162+
def _process_python_expressions(
163+
value: Any, namespaces: Optional[Union[Iterable[str], Mapping[str, str]]] = None
164+
) -> Any:
146165
"""Process Python code snippets embedded in YAML values.
147166
148167
This function supports the python: block syntax for embedding Python code:
@@ -152,7 +171,7 @@ def _process_python_expressions(value: Any) -> Any:
152171
pl.scan_csv("data.csv").head(10)
153172
154173
Note: col_vals_expr() also supports a shortcut syntax where the expr parameter
155-
can be written directly without the python: wrapper:
174+
can be written directly without the python: wrapper: +
156175
157176
col_vals_expr:
158177
expr: |
@@ -180,14 +199,14 @@ def _process_python_expressions(value: Any) -> Any:
180199
# Handle python: block syntax
181200
if "python" in value and len(value) == 1:
182201
code = value["python"]
183-
return _safe_eval_python_code(code)
202+
return _safe_eval_python_code(code, namespaces=namespaces)
184203

185204
# Recursively process dictionary values
186-
return {k: _process_python_expressions(v) for k, v in value.items()}
205+
return {k: _process_python_expressions(v, namespaces=namespaces) for k, v in value.items()}
187206

188207
elif isinstance(value, list):
189208
# Recursively process list items
190-
return [_process_python_expressions(item) for item in value]
209+
return [_process_python_expressions(item, namespaces=namespaces) for item in value]
191210

192211
else:
193212
# Return primitive types unchanged
@@ -547,7 +566,11 @@ def _parse_schema_spec(self, schema_spec: Any) -> Any:
547566
f"Schema specification must be a dictionary, got: {type(schema_spec)}"
548567
)
549568

550-
def _parse_validation_step(self, step_config: Union[str, dict]) -> tuple[str, dict]:
569+
def _parse_validation_step(
570+
self,
571+
step_config: Union[str, dict],
572+
namespaces: Optional[Union[Iterable[str], Mapping[str, str]]] = None,
573+
) -> tuple[str, dict]:
551574
"""Parse a single validation step from YAML configuration.
552575
553576
Parameters
@@ -598,14 +621,16 @@ def _parse_validation_step(self, step_config: Union[str, dict]) -> tuple[str, di
598621
# Special case: `col_vals_expr()`'s `expr=` parameter can use shortcut syntax
599622
if method_name == "col_vals_expr" and key == "expr" and isinstance(value, str):
600623
# Treat string directly as Python code (shortcut syntax)
601-
processed_parameters[key] = _safe_eval_python_code(value)
624+
processed_parameters[key] = _safe_eval_python_code(value, namespaces=namespaces)
602625
# Special case: `pre=` parameter can use shortcut syntax (like `expr=`)
603626
elif key == "pre" and isinstance(value, str):
604627
# Treat string directly as Python code (shortcut syntax)
605-
processed_parameters[key] = _safe_eval_python_code(value)
628+
processed_parameters[key] = _safe_eval_python_code(value, namespaces=namespaces)
606629
else:
607630
# Normal processing (requires python: block syntax)
608-
processed_parameters[key] = _process_python_expressions(value)
631+
processed_parameters[key] = _process_python_expressions(
632+
value, namespaces=namespaces
633+
)
609634
parameters = processed_parameters
610635

611636
# Convert `columns=` specification
@@ -658,7 +683,9 @@ def _parse_validation_step(self, step_config: Union[str, dict]) -> tuple[str, di
658683

659684
return self.validation_method_map[method_name], parameters
660685

661-
def build_validation(self, config: dict) -> Validate:
686+
def build_validation(
687+
self, config: dict, namespaces: Optional[Union[Iterable[str], Mapping[str, str]]] = None
688+
) -> Validate:
662689
"""Convert YAML config to Validate object.
663690
664691
Parameters
@@ -693,7 +720,9 @@ def build_validation(self, config: dict) -> Validate:
693720
# Set actions if provided
694721
if "actions" in config:
695722
# Process actions: handle `python:` block syntax for callables
696-
processed_actions = _process_python_expressions(config["actions"])
723+
processed_actions = _process_python_expressions(
724+
config["actions"], namespaces=namespaces
725+
)
697726
# Convert to Actions object
698727
validate_kwargs["actions"] = Actions(**processed_actions)
699728

@@ -713,7 +742,9 @@ def build_validation(self, config: dict) -> Validate:
713742

714743
# Add validation steps
715744
for step_config in config["steps"]:
716-
method_name, parameters = self._parse_validation_step(step_config)
745+
method_name, parameters = self._parse_validation_step(
746+
step_config, namespaces=namespaces
747+
)
717748

718749
# Get the method from the validation object
719750
method = getattr(validation, method_name)
@@ -728,7 +759,9 @@ def build_validation(self, config: dict) -> Validate:
728759

729760
return validation
730761

731-
def execute_workflow(self, config: dict) -> Validate:
762+
def execute_workflow(
763+
self, config: dict, namespaces: Optional[Union[Iterable[str], Mapping[str, str]]] = None
764+
) -> Validate:
732765
"""Execute a complete YAML validation workflow.
733766
734767
Parameters
@@ -742,15 +775,19 @@ def execute_workflow(self, config: dict) -> Validate:
742775
Interrogated Validate object with results.
743776
"""
744777
# Build the validation plan
745-
validation = self.build_validation(config)
778+
validation = self.build_validation(config, namespaces=namespaces)
746779

747780
# Execute interrogation to get results
748781
validation = validation.interrogate()
749782

750783
return validation
751784

752785

753-
def yaml_interrogate(yaml: Union[str, Path], set_tbl: Union[FrameT, Any, None] = None) -> Validate:
786+
def yaml_interrogate(
787+
yaml: Union[str, Path],
788+
set_tbl: Union[FrameT, Any, None] = None,
789+
namespaces: Optional[Union[Iterable[str], Mapping[str, str]]] = None,
790+
) -> Validate:
754791
"""Execute a YAML-based validation workflow.
755792
756793
This is the main entry point for YAML-based validation workflows. It takes YAML configuration
@@ -772,6 +809,10 @@ def yaml_interrogate(yaml: Union[str, Path], set_tbl: Union[FrameT, Any, None] =
772809
`tbl` field before executing the validation workflow. This can be any supported table type
773810
including DataFrame objects, Ibis table objects, CSV file paths, Parquet file paths, GitHub
774811
URLs, or database connection strings.
812+
namespaces
813+
Optional module namespaces to make available for Python code execution in YAML
814+
configurations. Can be a dictionary mapping aliases to module names or a list of module
815+
names. See the "Using Namespaces" section below for detailed examples.
775816
776817
Returns
777818
-------
@@ -786,6 +827,71 @@ def yaml_interrogate(yaml: Union[str, Path], set_tbl: Union[FrameT, Any, None] =
786827
If the YAML is invalid, malformed, or execution fails. This includes syntax errors, missing
787828
required fields, unknown validation methods, or data loading failures.
788829
830+
Using Namespaces
831+
----------------
832+
The `namespaces=` parameter enables custom Python modules and functions in YAML configurations.
833+
This is particularly useful for custom action functions and advanced Python expressions.
834+
835+
**Namespace formats:**
836+
837+
- Dictionary format: `{"alias": "module.name"}` maps aliases to module names
838+
- List format: `["module.name", "another.module"]` imports modules directly
839+
840+
**Option 1: Inline expressions (no namespaces needed)**
841+
842+
```{python}
843+
import pointblank as pb
844+
845+
# Simple inline custom action
846+
yaml_config = '''
847+
tbl: small_table
848+
thresholds:
849+
warning: 0.01
850+
actions:
851+
warning:
852+
python: "lambda: print('Custom warning triggered')"
853+
steps:
854+
- col_vals_gt:
855+
columns: [a]
856+
value: 1000
857+
'''
858+
859+
result = pb.yaml_interrogate(yaml_config)
860+
result
861+
```
862+
863+
**Option 2: External functions with namespaces**
864+
865+
```{python}
866+
# Define a custom action function
867+
def my_custom_action():
868+
print("Data validation failed: please check your data.")
869+
870+
# Add to current module for demo
871+
import sys
872+
sys.modules[__name__].my_custom_action = my_custom_action
873+
874+
# YAML that references the external function
875+
yaml_config = '''
876+
tbl: small_table
877+
thresholds:
878+
warning: 0.01
879+
actions:
880+
warning:
881+
python: actions.my_custom_action
882+
steps:
883+
- col_vals_gt:
884+
columns: [a]
885+
value: 1000 # This will fail
886+
'''
887+
888+
# Use namespaces to make the function available
889+
result = pb.yaml_interrogate(yaml_config, namespaces={'actions': '__main__'})
890+
result
891+
```
892+
893+
This approach enables modular, reusable validation workflows with custom business logic.
894+
789895
Examples
790896
--------
791897
```{python}
@@ -928,14 +1034,14 @@ def yaml_interrogate(yaml: Union[str, Path], set_tbl: Union[FrameT, Any, None] =
9281034
# If `set_tbl=` is provided, we need to build the validation workflow and then use `set_tbl()`
9291035
if set_tbl is not None:
9301036
# First build the validation object without interrogation
931-
validation = validator.build_validation(config)
1037+
validation = validator.build_validation(config, namespaces=namespaces)
9321038
# Then replace the table using set_tbl method
9331039
validation = validation.set_tbl(tbl=set_tbl)
9341040
# Finally interrogate with the new table
9351041
return validation.interrogate()
9361042
else:
9371043
# Standard execution without table override (includes interrogation)
938-
return validator.execute_workflow(config)
1044+
return validator.execute_workflow(config, namespaces=namespaces)
9391045

9401046

9411047
def load_yaml_config(file_path: Union[str, Path]) -> dict:
@@ -1414,7 +1520,7 @@ def extract_python_expressions(obj, path=""):
14141520
elif isinstance(step_params["expr"], str):
14151521
original_expressions["expr"] = step_params["expr"]
14161522

1417-
method_name, parameters = validator._parse_validation_step(step_config)
1523+
method_name, parameters = validator._parse_validation_step(step_config, namespaces=None)
14181524

14191525
# Apply the original expressions to override the converted lambda functions
14201526
if method_name == "conjointly" and "expressions" in original_expressions:

0 commit comments

Comments
 (0)