Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add replacer processor #672

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
* adds `desired_cluster_status` option to opensearch output to signal healthy cluster status
* initially run health checks on setup for every configured component
* make `imagePullPolicy` configurable for helm chart deployments
* add `replacer` processor to replace substrings in fields


### Improvements
Expand Down
Empty file.
43 changes: 43 additions & 0 deletions logprep/processor/replacer/processor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
"""
Replacer
============

The `replacer` processor ...

Processor Configuration
^^^^^^^^^^^^^^^^^^^^^^^
.. code-block:: yaml
:linenos:

- samplename:
type: replacer
specific_rules:
- tests/testdata/rules/specific/
generic_rules:
- tests/testdata/rules/generic/

.. autoclass:: logprep.processor.replacer.processor.Replacer.Config
:members:
:undoc-members:
:inherited-members:
:noindex:

.. automodule:: logprep.processor.replacer.processor.Replacer.rule
"""

from attrs import define, field, validators

from logprep.processor.field_manager.processor import FieldManager
from logprep.processor.replacer.rule import ReplacerRule


class Replacer(FieldManager):
"""A processor that ..."""

rule_class = ReplacerRule

def _apply_rules(self, event: dict, rule: ReplacerRule):
for source_field in rule.mapping:
source_field_value = event.get(source_field)
actions = rule.actions[source_field]
pass
104 changes: 104 additions & 0 deletions logprep/processor/replacer/rule.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
"""
Replacer
============

A speaking example:

.. code-block:: yaml
:linenos:
:caption: Given replacer rule

filter: message
replacer:
...
description: '...'

.. code-block:: json
:linenos:
:caption: Incoming event

<INCOMMING_EVENT>

.. code-block:: json
:linenos:
:caption: Processed event

<PROCESSED_EVENT>


.. autoclass:: logprep.processor.replacer.rule.ReplacerRule.Config
:members:
:undoc-members:
:inherited-members:
:noindex:

Examples for replacer:
------------------------------------------------

.. datatemplate:import-module:: tests.unit.processor.replacer.test_replacer
:template: testcase-renderer.tmpl

"""

import re
from typing import Callable, List, Tuple

from attrs import define, field, validators

from logprep.filter.expression.filter_expression import FilterExpression
from logprep.processor.field_manager.rule import FieldManagerRule

REPLACE_ITEM = r"%{(.+)}"

REPLACEMENT_PATTERN = rf".*{REPLACE_ITEM}.*"
START = r"%\{"
END = r"\}"
REPLACEMENT = rf"(?P<replacement>[^{END}])"
DELIMITER = r"([^%]+)"
SEPARATOR = r"(\((?P<separator>\\\)|[^)]+)\))?"
SECTION_MATCH = rf"(?P<partition>(?!{START})){START}(?P<replacement>.*){END}(?P<delimiter>.*)"


class ReplacerRule(FieldManagerRule):
"""..."""

@define(kw_only=True)
class Config(FieldManagerRule.Config):
"""Config for ReplacerRule"""

source_fields: list = field(init=False, factory=list, eq=False)
target_field: list = field(init=False, default="", eq=False)
mapping: dict = field(
validator=[
validators.instance_of(dict),
validators.min_len(1),
validators.deep_mapping(
key_validator=validators.instance_of(str),
value_validator=validators.matches_re(REPLACEMENT_PATTERN),
),
]
)
"""A mapping of fieldnames to patterns to replace"""

actions: dict

def __init__(
self, filter_rule: FilterExpression, config: "ReplacerRule.Config", processor_name: str
):
super().__init__(filter_rule, config, processor_name)
self._set_mapping_actions()

def _set_mapping_actions(self):
self.actions = {}
for source_field, pattern in self._config.mapping.items():
actions = []
if not re.match(rf"^{REPLACEMENT_PATTERN}.*", pattern):
pattern = "%{}" + pattern
sections = re.findall(r"%\{[^%]+", pattern)
for section in sections:
section_match = re.match(SECTION_MATCH, section)
replacement = section_match.group("replacement")
delimiter = section_match.group("delimiter")
delimiter = None if delimiter == "" else delimiter
actions.append((section, replacement))
self.actions[source_field] = actions
2 changes: 2 additions & 0 deletions logprep/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
from logprep.processor.list_comparison.processor import ListComparison
from logprep.processor.pre_detector.processor import PreDetector
from logprep.processor.pseudonymizer.processor import Pseudonymizer
from logprep.processor.replacer.processor import Replacer
from logprep.processor.requester.processor import Requester
from logprep.processor.selective_extractor.processor import SelectiveExtractor
from logprep.processor.string_splitter.processor import StringSplitter
Expand Down Expand Up @@ -74,6 +75,7 @@ class Registry:
"pre_detector": PreDetector,
"pseudonymizer": Pseudonymizer,
"requester": Requester,
"replacer": Replacer,
"selective_extractor": SelectiveExtractor,
"string_splitter": StringSplitter,
"template_replacer": TemplateReplacer,
Expand Down
10 changes: 10 additions & 0 deletions tests/testdata/unit/replacer/generic_rules/replacer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[
{
"filter": "another-message",
"replacer": {
"mapping": {
"field": "message with %{replace that}"
}
}
}
]
10 changes: 10 additions & 0 deletions tests/testdata/unit/replacer/specific_rules/replacer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[
{
"filter": "message",
"replacer": {
"mapping": {
"field": "message with %{replace this}"
}
}
}
]
Empty file.
43 changes: 43 additions & 0 deletions tests/unit/processor/replacer/test_replacer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# pylint: disable=missing-docstring
import pytest

from logprep.processor.base.exceptions import ProcessingWarning
from tests.unit.processor.base import BaseProcessorTestCase

test_cases = [ # testcase, rule, event, expected
(
"Basic testcase",
{
"filter": "message",
"replacer": {
"mapping": {"message": "this is %{replace this}"},
},
},
{"message": "this is test"},
{"message": "this is replace this"},
)
]

failure_test_cases = [] # testcase, rule, event, expected


class TestReplacer(BaseProcessorTestCase):

CONFIG: dict = {
"type": "replacer",
"specific_rules": ["tests/testdata/unit/replacer/specific_rules"],
"generic_rules": ["tests/testdata/unit/replacer/generic_rules"],
}

@pytest.mark.parametrize("testcase, rule, event, expected", test_cases)
def test_testcases(self, testcase, rule, event, expected):
self._load_specific_rule(rule)
self.object.process(event)
assert event == expected, testcase

@pytest.mark.parametrize("testcase, rule, event, expected", failure_test_cases)
def test_testcases_failure_handling(self, testcase, rule, event, expected):
self._load_specific_rule(rule)
with pytest.raises(ProcessingWarning):
self.object.process(event)
assert event == expected, testcase
157 changes: 157 additions & 0 deletions tests/unit/processor/replacer/test_replacer_rule.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
# pylint: disable=protected-access
# pylint: disable=missing-docstring
import pytest

from logprep.processor.base.exceptions import InvalidRuleDefinitionError
from logprep.processor.replacer.rule import ReplacerRule


class TestReplacerRule:
def test_create_from_dict_returns_replacer_rule(self):
rule = {
"filter": "message",
"replacer": {
"mapping": {"test": "this is %{replace this}"},
},
}
rule_dict = ReplacerRule._create_from_dict(rule)
assert isinstance(rule_dict, ReplacerRule)

@pytest.mark.parametrize(
["rule", "error", "message"],
[
(
{
"filter": "message",
"replacer": {
"mapping": {"test": "this is %{replace this}"},
},
},
None,
None,
),
(
{
"filter": "message",
"replacer": {
"mapping": {"test": "this is %{replace this} and %{replace that}"},
},
},
None,
None,
),
(
{
"filter": "message",
"replacer": {
"mapping": {},
},
},
ValueError,
"Length of 'mapping' must be >= 1",
),
(
{
"filter": "message",
"replacer": {
"source_fields": ["test"],
"mapping": {"test": "this is %{replace this}"},
},
},
TypeError,
"unexpected keyword argument 'source_fields'",
),
(
{
"filter": "message",
"replacer": {
"target_field": "test",
"mapping": {"test": "this is %{replace this}"},
},
},
TypeError,
"unexpected keyword argument 'target_field'",
),
(
{
"filter": "message",
"replacer": {
"mapping": {"test": "missing replacement pattern"},
},
},
ValueError,
"'mapping' must match regex",
),
],
)
def test_create_from_dict_validates_config(self, rule, error, message):
if error:
with pytest.raises(error, match=message):
ReplacerRule._create_from_dict(rule)
else:
rule_instance = ReplacerRule._create_from_dict(rule)
assert hasattr(rule_instance, "_config")
for key, value in rule.get("replacer").items():
assert hasattr(rule_instance._config, key)
assert value == getattr(rule_instance._config, key)

@pytest.mark.parametrize(
["testcase", "rule1", "rule2", "equality"],
[
(
"Two rules with same config",
{
"filter": "message",
"replacer": {
"mapping": {"test": "this is %{replace this}"},
},
},
{
"filter": "message",
"replacer": {
"mapping": {"test": "this is %{replace this}"},
},
},
True,
),
(
"Different filter",
{
"filter": "message",
"replacer": {
"mapping": {"test": "this is %{replace this}"},
},
},
{
"filter": "other-filter",
"replacer": {
"mapping": {"test": "this is %{replace this}"},
},
},
False,
),
(
"Different mapping",
{
"filter": "message",
"replacer": {
"mapping": {"test": "this is %{replace this}"},
},
},
{
"filter": "other-filter",
"replacer": {
"mapping": {
"test": "this is %{replace this}",
"other": "this is %{replace this}",
},
},
},
False,
),
],
)
def test_equality(self, testcase, rule1, rule2, equality):
rule1 = ReplacerRule._create_from_dict(rule1)
rule2 = ReplacerRule._create_from_dict(rule2)
assert (rule1 == rule2) == equality, testcase
Loading