From c67124c118ba2725e86dd3d03babf73aecdb859a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Zimmermann?= <101292599+ekneg54@users.noreply.github.com> Date: Fri, 13 Oct 2023 09:57:41 +0200 Subject: [PATCH] add option to ignore missing fields in field_manager (#457) * Fix Changelog --------- Co-authored-by: dtrai2 <95028228+dtrai2@users.noreply.github.com> --- CHANGELOG.md | 16 ++--- logprep/abc/processor.py | 2 + logprep/processor/amides/rule.py | 3 + logprep/processor/calculator/rule.py | 4 ++ logprep/processor/concatenator/processor.py | 6 +- logprep/processor/datetime_extractor/rule.py | 2 + logprep/processor/dissector/processor.py | 8 ++- .../processor/domain_label_extractor/rule.py | 3 +- logprep/processor/domain_resolver/rule.py | 1 + logprep/processor/field_manager/processor.py | 7 +- logprep/processor/field_manager/rule.py | 7 ++ logprep/processor/geoip_enricher/rule.py | 1 + logprep/processor/grokker/processor.py | 8 ++- logprep/processor/ip_informer/processor.py | 2 + logprep/processor/key_checker/rule.py | 1 + logprep/processor/list_comparison/rule.py | 1 + logprep/processor/requester/rule.py | 1 + .../selective_extractor/processor.py | 1 + logprep/processor/selective_extractor/rule.py | 3 + logprep/processor/string_splitter/rule.py | 1 + logprep/processor/timestamp_differ/rule.py | 1 + logprep/processor/timestamper/rule.py | 1 + .../processor/calculator/test_calculator.py | 13 ++++ .../concatenator/test_concatenator.py | 21 ++++++ .../processor/dissector/test_dissector.py | 18 +++++ .../field_manager/test_field_manager.py | 70 +++++++++++++++++++ tests/unit/processor/grokker/test_grokker.py | 29 ++++++++ .../processor/ip_informer/test_ip_informer.py | 44 +++++++++++- .../test_selective_extractor.py | 37 ++++++++++ 29 files changed, 290 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ad6fae82d..83fe1a166 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,24 +11,14 @@ * add a preprocessor to enrich by systems env variables * add option to define rules inline in pipeline config under processor configs `generic_rules` or `specific_rules` - -### Improvements - -* `pre_detector` processor now adds the field `creation_timestamp` to pre-detections. -It contains the time at which a pre-detection was created by the processor. -* add `prometheus` and `grafana` to the quickstart setup to support development -* reimplemented kafka input connector - - move kafka config options to `kafka_config` dictionary - -### Features - +* add option to `field_manager` to ignore missing source fields to suppress warnings and failure tags +* add ignore_missing_source_fields behavior to `calculator`, `concatenator`, `dissector`, `grokker`, `ip_informer`, `selective_extractor` * kafka input connector - implemented manual commit behaviour if `enable.auto.commit: false` - implemented on_commit callback to check for errors during commit - implemented statistics callback to collect metrics from underlying librdkafka library - implemented per partition offset metrics - get logs and handle errors from underlying librdkafka library - * kafka output connector - implemented statistics callback to collect metrics from underlying librdkafka library - get logs and handle errors from underlying librdkafka library @@ -37,8 +27,10 @@ It contains the time at which a pre-detection was created by the processor. * `pre_detector` processor now adds the field `creation_timestamp` to pre-detections. It contains the time at which a pre-detection was created by the processor. +* add `prometheus` and `grafana` to the quickstart setup to support development * provide confluent kafka test setup to run tests against a real kafka cluster + ### Bugfix * fix CVE-2023-37920 Removal of e-Tugra root certificate diff --git a/logprep/abc/processor.py b/logprep/abc/processor.py index a06e8d9e7..997d74224 100644 --- a/logprep/abc/processor.py +++ b/logprep/abc/processor.py @@ -324,6 +324,8 @@ def _has_missing_values(self, event, rule, source_field_dict): dict(filter(lambda x: x[1] in [None, ""], source_field_dict.items())).keys() ) if missing_fields: + if rule.ignore_missing_fields: + return True error = BaseException(f"{self.name}: no value for fields: {missing_fields}") self._handle_warning_error(event, rule, error) return True diff --git a/logprep/processor/amides/rule.py b/logprep/processor/amides/rule.py index 242cde3f4..804d88d80 100644 --- a/logprep/processor/amides/rule.py +++ b/logprep/processor/amides/rule.py @@ -54,3 +54,6 @@ class Config(FieldManagerRule.Config): ) target_field: str = field(validator=validators.instance_of(str), default="amides") mapping: dict = field(default="", init=False, repr=False, eq=False) + ignore_missing_fields: bool = field( + init=False, repr=False, eq=False, default=False, validator=validators.instance_of(bool) + ) diff --git a/logprep/processor/calculator/rule.py b/logprep/processor/calculator/rule.py index 8d5c8639e..bb27c5870 100644 --- a/logprep/processor/calculator/rule.py +++ b/logprep/processor/calculator/rule.py @@ -112,6 +112,10 @@ class Config(FieldManagerRule.Config): """ timeout: int = field(validator=validators.instance_of(int), converter=int, default=1) """The maximum time in seconds for the calculation. Defaults to :code:`1`""" + ignore_missing_fields: bool = field(validator=validators.instance_of(bool), default=False) + """If set to :code:`True` missing fields will be ignored, no warning is logged, + and the event is not tagged with the a failure tag. As soon as one field is missing + no calculation is performed at all. Defaults to :code:`False`""" mapping: dict = field(default="", init=False, repr=False, eq=False) def __attrs_post_init__(self): diff --git a/logprep/processor/concatenator/processor.py b/logprep/processor/concatenator/processor.py index 9a7ba2b6d..3e50a6232 100644 --- a/logprep/processor/concatenator/processor.py +++ b/logprep/processor/concatenator/processor.py @@ -26,8 +26,8 @@ .. automodule:: logprep.processor.concatenator.rule """ -from logprep.abc.processor import Processor from logprep.processor.concatenator.rule import ConcatenatorRule +from logprep.processor.field_manager.processor import FieldManager from logprep.util.helper import get_dotted_field_value @@ -38,7 +38,7 @@ def __init__(self, name: str, message: str): super().__init__(f"Concatenator ({name}): {message}") -class Concatenator(Processor): +class Concatenator(FieldManager): """Concatenates a list of source fields into a new target field.""" rule_class = ConcatenatorRule @@ -56,11 +56,11 @@ def _apply_rules(self, event, rule: ConcatenatorRule): rule : Currently applied concatenator rule. """ - source_field_values = [] for source_field in rule.source_fields: field_value = get_dotted_field_value(event, source_field) source_field_values.append(field_value) + self._handle_missing_fields(event, rule, rule.source_fields, source_field_values) source_field_values = [field for field in source_field_values if field is not None] target_value = f"{rule.separator}".join(source_field_values) diff --git a/logprep/processor/datetime_extractor/rule.py b/logprep/processor/datetime_extractor/rule.py index 690d6e3c6..f52ad6a34 100644 --- a/logprep/processor/datetime_extractor/rule.py +++ b/logprep/processor/datetime_extractor/rule.py @@ -42,6 +42,7 @@ class Config(FieldManagerRule.Config): source_fields: list = field( validator=[ validators.instance_of(list), + validators.max_len(1), validators.deep_iterable(member_validator=validators.instance_of(str)), ], ) @@ -49,3 +50,4 @@ class Config(FieldManagerRule.Config): target_field: str = field(validator=validators.instance_of(str)) """The field where to write the processed values to. """ mapping: dict = field(default="", init=False, repr=False, eq=False) + ignore_missing_fields: bool = field(default=False, init=False, repr=False, eq=False) diff --git a/logprep/processor/dissector/processor.py b/logprep/processor/dissector/processor.py index da6e86065..9fcfb67c0 100644 --- a/logprep/processor/dissector/processor.py +++ b/logprep/processor/dissector/processor.py @@ -29,12 +29,12 @@ """ from typing import Callable, List, Tuple -from logprep.abc.processor import Processor from logprep.processor.dissector.rule import DissectorRule -from logprep.util.helper import get_dotted_field_value, add_field_to +from logprep.processor.field_manager.processor import FieldManager +from logprep.util.helper import add_field_to, get_dotted_field_value -class Dissector(Processor): +class Dissector(FieldManager): """A processor that tokenizes field values to new fields and converts datatypes""" rule_class = DissectorRule @@ -67,6 +67,8 @@ def _get_mappings(self, event, rule) -> List[Tuple[Callable, dict, str, str, str current_field = source_field loop_content = get_dotted_field_value(event, current_field) if loop_content is None: + if rule.ignore_missing_fields: + continue error = BaseException( f"dissector: mapping field '{source_field}' does not exist" ) diff --git a/logprep/processor/domain_label_extractor/rule.py b/logprep/processor/domain_label_extractor/rule.py index 858699c15..b9f311284 100644 --- a/logprep/processor/domain_label_extractor/rule.py +++ b/logprep/processor/domain_label_extractor/rule.py @@ -55,7 +55,7 @@ :inherited-members: :noindex: """ -from attr import field, validators, define +from attr import define, field, validators from logprep.processor.field_manager.rule import FieldManagerRule @@ -75,3 +75,4 @@ class Config(FieldManagerRule.Config): ) """The fields from where to get the values which should be processed.""" mapping: dict = field(default="", init=False, repr=False, eq=False) + ignore_missing_fields: bool = field(default=False, init=False, repr=False, eq=False) diff --git a/logprep/processor/domain_resolver/rule.py b/logprep/processor/domain_resolver/rule.py index 08e3f2f76..8daaf7ab9 100644 --- a/logprep/processor/domain_resolver/rule.py +++ b/logprep/processor/domain_resolver/rule.py @@ -45,3 +45,4 @@ class Config(FieldManagerRule.Config): ) """The field where to write the processor output to. Defaults to :code:`resovled_ip`""" mapping: dict = field(default="", init=False, repr=False, eq=False) + ignore_missing_fields: bool = field(default=False, init=False, repr=False, eq=False) diff --git a/logprep/processor/field_manager/processor.py b/logprep/processor/field_manager/processor.py index ed1c0a3fc..7933a970c 100644 --- a/logprep/processor/field_manager/processor.py +++ b/logprep/processor/field_manager/processor.py @@ -137,10 +137,15 @@ def _overwrite_with_list_from_source_field_values(self, *args): add_and_overwrite(event, target_field, target_field_value) def _handle_missing_fields(self, event, rule, source_fields, field_values): + if rule.ignore_missing_fields: + return False if None in field_values: error = self._get_missing_fields_error(source_fields, field_values) self._handle_warning_error( - event, rule, error, failure_tags=["_field_manager_missing_field_warning"] + event, + rule, + error, + failure_tags=[f"_{self.rule_class.rule_type}_missing_field_warning"], ) return True return False diff --git a/logprep/processor/field_manager/rule.py b/logprep/processor/field_manager/rule.py index 97133c3f4..9f68deee4 100644 --- a/logprep/processor/field_manager/rule.py +++ b/logprep/processor/field_manager/rule.py @@ -125,6 +125,9 @@ class Config(Rule.Config): If the target field does not exist, a new field will be added with the source field value as list. Defaults to :code:`False`. """ + ignore_missing_fields: bool = field(validator=validators.instance_of(bool), default=False) + """If set to :code:`True` missing fields will be ignored, no warning is logged and the event + is not tagged with the failure tag. Defaults to :code:`False`""" def __attrs_post_init__(self): # ensures no split operations during processing @@ -161,4 +164,8 @@ def overwrite_target(self): def extend_target_list(self): return self._config.extend_target_list + @property + def ignore_missing_fields(self): + return self._config.ignore_missing_fields + # pylint: enable=missing-function-docstring diff --git a/logprep/processor/geoip_enricher/rule.py b/logprep/processor/geoip_enricher/rule.py index fa1ed0448..475b8e75d 100644 --- a/logprep/processor/geoip_enricher/rule.py +++ b/logprep/processor/geoip_enricher/rule.py @@ -92,6 +92,7 @@ class Config(FieldManagerRule.Config): description: '...' """ mapping: dict = field(default="", init=False, repr=False, eq=False) + ignore_missing_fields: bool = field(default=False, init=False, repr=False, eq=False) @property def customize_target_subfields(self) -> dict: # pylint: disable=missing-function-docstring diff --git a/logprep/processor/grokker/processor.py b/logprep/processor/grokker/processor.py index 33a852ed3..c072c49f0 100644 --- a/logprep/processor/grokker/processor.py +++ b/logprep/processor/grokker/processor.py @@ -37,7 +37,11 @@ from attrs import define, field, validators from logprep.abc.processor import Processor -from logprep.processor.base.exceptions import FieldExistsWarning, ProcessingWarning, ProcessingError +from logprep.processor.base.exceptions import ( + FieldExistsWarning, + ProcessingError, + ProcessingWarning, +) from logprep.processor.grokker.rule import GrokkerRule from logprep.util.getter import GetterFactory from logprep.util.helper import add_field_to, get_dotted_field_value @@ -66,6 +70,8 @@ def _apply_rules(self, event: dict, rule: GrokkerRule): for dotted_field, grok in rule.actions.items(): field_value = get_dotted_field_value(event, dotted_field) if field_value is None: + if rule.ignore_missing_fields: + continue error = BaseException(f"{self.name}: missing source_field: '{dotted_field}'") self._handle_warning_error(event=event, rule=rule, error=error) continue diff --git a/logprep/processor/ip_informer/processor.py b/logprep/processor/ip_informer/processor.py index 4d213e3e6..d589eb40e 100644 --- a/logprep/processor/ip_informer/processor.py +++ b/logprep/processor/ip_informer/processor.py @@ -45,6 +45,8 @@ class IpInformer(FieldManager): rule_class = IpInformerRule def _apply_rules(self, event: dict, rule: IpInformerRule) -> None: + source_field_values = self._get_field_values(event, rule.source_fields) + self._handle_missing_fields(event, rule, rule.source_fields, source_field_values) self._processing_warnings = [] ip_address_list = self._get_flat_ip_address_list(event, rule) results = self._get_results(ip_address_list, rule) diff --git a/logprep/processor/key_checker/rule.py b/logprep/processor/key_checker/rule.py index 25a17eed5..ee1cc1340 100644 --- a/logprep/processor/key_checker/rule.py +++ b/logprep/processor/key_checker/rule.py @@ -70,3 +70,4 @@ class Config(FieldManagerRule.Config): target_field: str = field(validator=validators.instance_of(str)) """The field where to write the processed values to. """ mapping: dict = field(default="", init=False, repr=False, eq=False) + ignore_missing_fields: bool = field(default=False, init=False, repr=False, eq=False) diff --git a/logprep/processor/list_comparison/rule.py b/logprep/processor/list_comparison/rule.py index ff88b6f1b..fcb6df094 100644 --- a/logprep/processor/list_comparison/rule.py +++ b/logprep/processor/list_comparison/rule.py @@ -70,6 +70,7 @@ class Config(FieldManagerRule.Config): e.g., :code:`${}`. The special key :code:`${LOGPREP_LIST}` will be filled by this processor. """ mapping: dict = field(default="", init=False, repr=False, eq=False) + ignore_missing_fields: bool = field(default=False, init=False, repr=False, eq=False) def __init__(self, filter_rule: FilterExpression, config: dict): super().__init__(filter_rule, config) diff --git a/logprep/processor/requester/rule.py b/logprep/processor/requester/rule.py index a09514cfa..a2a68ad3b 100644 --- a/logprep/processor/requester/rule.py +++ b/logprep/processor/requester/rule.py @@ -175,6 +175,7 @@ class Config(FieldManagerRule.Config): cert: str = field(validator=validators.instance_of(str), default="") """(Optional) SSL client certificate as path to ssl client cert file (.pem).""" mapping: dict = field(default="", init=False, repr=False, eq=False) + ignore_missing_fields: bool = field(default=False, init=False, repr=False, eq=False) def __attrs_post_init__(self): url_fields = re.findall(FIELD_PATTERN, self.url) diff --git a/logprep/processor/selective_extractor/processor.py b/logprep/processor/selective_extractor/processor.py index 3c6888924..05d503f17 100644 --- a/logprep/processor/selective_extractor/processor.py +++ b/logprep/processor/selective_extractor/processor.py @@ -79,6 +79,7 @@ def _apply_rules(self, event: dict, rule: SelectiveExtractorRule): """ flattened_fields = get_source_fields_dict(event, rule) + self._handle_missing_fields(event, rule, flattened_fields.keys(), flattened_fields.values()) flattened_fields = { dotted_field: content for dotted_field, content in flattened_fields.items() diff --git a/logprep/processor/selective_extractor/rule.py b/logprep/processor/selective_extractor/rule.py index 54a159874..54f636372 100644 --- a/logprep/processor/selective_extractor/rule.py +++ b/logprep/processor/selective_extractor/rule.py @@ -155,6 +155,9 @@ class Config(FieldManagerRule.Config): extract_from_file: str = field(validator=validators.instance_of(str), default="", eq=False) """The path or url to a file with a flat list of fields to extract. For string format see :ref:`getters`.""" + ignore_missing_fields: bool = field(validator=validators.instance_of(bool), default=True) + """If set to :code:`True` missing fields will be ignored, no warning is logged and the event + is not tagged with the failure tag. Defaults to :code:`True`""" target_field: str = field(default="", init=False, repr=False, eq=False) overwrite_target: bool = field(default=False, init=False, repr=False, eq=False) extend_target_list: bool = field(default=False, init=False, repr=False, eq=False) diff --git a/logprep/processor/string_splitter/rule.py b/logprep/processor/string_splitter/rule.py index 48f031448..84e06e2a5 100644 --- a/logprep/processor/string_splitter/rule.py +++ b/logprep/processor/string_splitter/rule.py @@ -56,6 +56,7 @@ class Config(FieldManagerRule.Config): delimeter: str = field(validator=validators.instance_of(str), default=" ") """The delimeter for splitting. Defaults to whitespace""" mapping: dict = field(default="", init=False, repr=False, eq=False) + ignore_missing_fields: bool = field(default=False, init=False, repr=False, eq=False) @property def delimeter(self): diff --git a/logprep/processor/timestamp_differ/rule.py b/logprep/processor/timestamp_differ/rule.py index 4846f1666..738b0f77f 100644 --- a/logprep/processor/timestamp_differ/rule.py +++ b/logprep/processor/timestamp_differ/rule.py @@ -86,6 +86,7 @@ class Config(FieldManagerRule.Config): """(Optional) Specifies whether the unit (s, ms, ns) should be part of the output. Defaults to :code:`False`.""" mapping: dict = field(default="", init=False, repr=False, eq=False) + ignore_missing_fields: bool = field(default=False, init=False, repr=False, eq=False) def __attrs_post_init__(self): field_format_str = re.findall(FIELD_PATTERN, self.diff) diff --git a/logprep/processor/timestamper/rule.py b/logprep/processor/timestamper/rule.py index 2eae103bd..a58b62323 100644 --- a/logprep/processor/timestamper/rule.py +++ b/logprep/processor/timestamper/rule.py @@ -107,6 +107,7 @@ class Config(FieldManagerRule.Config): ) """ timezone for target_field. defaults to :code:`UTC`""" mapping: dict = field(default="", init=False, repr=False, eq=False) + ignore_missing_fields: bool = field(default=False, init=False, repr=False, eq=False) @property def source_format(self): diff --git a/tests/unit/processor/calculator/test_calculator.py b/tests/unit/processor/calculator/test_calculator.py index eee25ea37..f613bc6f4 100644 --- a/tests/unit/processor/calculator/test_calculator.py +++ b/tests/unit/processor/calculator/test_calculator.py @@ -158,6 +158,19 @@ {"duration": "0.01"}, {"duration": 10000.0}, ), + ( + "Ignore missing source fields", + { + "filter": "duration", + "calculator": { + "calc": "${missing_field} * 10e5", + "target_field": "duration", + "ignore_missing_fields": True, + }, + }, + {"duration": "0.01"}, + {"duration": "0.01"}, + ), ] diff --git a/tests/unit/processor/concatenator/test_concatenator.py b/tests/unit/processor/concatenator/test_concatenator.py index 6e613d6d5..3e6ad9979 100644 --- a/tests/unit/processor/concatenator/test_concatenator.py +++ b/tests/unit/processor/concatenator/test_concatenator.py @@ -56,6 +56,7 @@ def specific_rules_dirs(self): "separator": "-", "overwrite_target": False, "delete_source_fields": False, + "ignore_missing_fields": True, }, }, {"field": {"a": "first", "b": "second"}}, @@ -143,6 +144,26 @@ def specific_rules_dirs(self): }, {"field": {"c": "another one"}, "target_field": "first-second"}, ), + ( + "ignore missing fields", + { + "filter": "field.a", + "concatenator": { + "source_fields": ["field.a", "field.b", "other_field.c"], + "target_field": "target_field", + "separator": "-", + "overwrite_target": False, + "delete_source_fields": False, + "ignore_missing_fields": True, + }, + }, + {"field": {"a": "first"}, "other_field": {"c": "third"}}, + { + "field": {"a": "first"}, + "other_field": {"c": "third"}, + "target_field": "first-third", + }, + ), ], ) def test_for_expected_output(self, test_case, rule, document, expected_output): diff --git a/tests/unit/processor/dissector/test_dissector.py b/tests/unit/processor/dissector/test_dissector.py index d724de0fc..769d9a0d9 100644 --- a/tests/unit/processor/dissector/test_dissector.py +++ b/tests/unit/processor/dissector/test_dissector.py @@ -620,6 +620,24 @@ "sys_type": "system_monitor", }, ), + ( + "ignore missing fields", + { + "filter": "message", + "dissector": { + "mapping": { + "message": "%{sys_type}", + "does_not_exist": "%{sys_type}", + }, + "ignore_missing_fields": True, + }, + }, + {"message": "system_monitor"}, + { + "message": "system_monitor", + "sys_type": "system_monitor", + }, + ), ] failure_test_cases = [ # testcase, rule, event, expected ( diff --git a/tests/unit/processor/field_manager/test_field_manager.py b/tests/unit/processor/field_manager/test_field_manager.py index e8080190b..dd4ea91ca 100644 --- a/tests/unit/processor/field_manager/test_field_manager.py +++ b/tests/unit/processor/field_manager/test_field_manager.py @@ -402,6 +402,24 @@ "merged": ["a", "b"], }, ), + ( + "Ignore missing fields: No warning and no failure tag if source field is missing", + { + "filter": "field.a", + "field_manager": { + "mapping": { + "field.a": "target_field", + "does.not.exists": "target_field", + }, + "ignore_missing_fields": True, + }, + }, + {"field": {"a": "first", "b": "second"}}, + { + "field": {"a": "first", "b": "second"}, + "target_field": "first", + }, + ), ] failure_test_cases = [ @@ -543,3 +561,55 @@ def test_process_raises_processing_warning_with_missing_fields(self, caplog): assert re.match( r".*ProcessingWarning.*missing source_fields: \['does.not.exists'\]", caplog.text ) + + def test_process_raises_processing_warning_with_missing_fields_but_event_is_processed( + self, caplog + ): + rule = { + "filter": "field.a", + "field_manager": { + "mapping": { + "field.a": "target_field", + "does.not.exists": "target_field", + } + }, + } + self._load_specific_rule(rule) + document = {"field": {"a": "first", "b": "second"}} + expected = { + "field": {"a": "first", "b": "second"}, + "target_field": "first", + "tags": ["_field_manager_missing_field_warning"], + } + with caplog.at_level(logging.WARNING): + self.object.process(document) + assert re.match( + r".*ProcessingWarning.*missing source_fields: \['does.not.exists'\]", caplog.text + ) + assert document == expected + + def test_process_dos_not_raises_processing_warning_with_missing_fields_and_event_is_processed( + self, caplog + ): + rule = { + "filter": "field.a", + "field_manager": { + "mapping": { + "field.a": "target_field", + "does.not.exists": "target_field", + }, + "ignore_missing_fields": True, + }, + } + self._load_specific_rule(rule) + document = {"field": {"a": "first", "b": "second"}} + expected = { + "field": {"a": "first", "b": "second"}, + "target_field": "first", + } + with caplog.at_level(logging.WARNING): + self.object.process(document) + assert not re.match( + r".*ProcessingWarning.*missing source_fields: \['does.not.exists'\]", caplog.text + ) + assert document == expected diff --git a/tests/unit/processor/grokker/test_grokker.py b/tests/unit/processor/grokker/test_grokker.py index f010a7420..229334162 100644 --- a/tests/unit/processor/grokker/test_grokker.py +++ b/tests/unit/processor/grokker/test_grokker.py @@ -281,6 +281,35 @@ "action": "42", }, ), + ( + "ignore_missing_fields", + { + "filter": "winlog.event_id: 123456789", + "grokker": { + "mapping": { + "winlog.event_data.normalize me!": "%{IP:some_ip} %{NUMBER:port:int}", + "this_field_does_not_exist": "%{IP:some_ip} %{NUMBER:port:int}", + }, + "ignore_missing_fields": True, + }, + }, + { + "winlog": { + "api": "wineventlog", + "event_id": 123456789, + "event_data": {"normalize me!": "123.123.123.123 1234"}, + } + }, + { + "winlog": { + "api": "wineventlog", + "event_id": 123456789, + "event_data": {"normalize me!": "123.123.123.123 1234"}, + }, + "some_ip": "123.123.123.123", + "port": 1234, + }, + ), ] failure_test_cases = [ diff --git a/tests/unit/processor/ip_informer/test_ip_informer.py b/tests/unit/processor/ip_informer/test_ip_informer.py index 704a58f4a..ecff57fe4 100644 --- a/tests/unit/processor/ip_informer/test_ip_informer.py +++ b/tests/unit/processor/ip_informer/test_ip_informer.py @@ -226,6 +226,27 @@ }, }, ), + ( + "ignore missing fields", + { + "filter": "ip", + "ip_informer": { + "source_fields": ["ip", "does_not_exist"], + "target_field": "result", + "properties": ["teredo"], + "ignore_missing_fields": True, + }, + }, + {"ip": "192.168.5.1"}, + { + "ip": "192.168.5.1", + "result": { + "192.168.5.1": { + "teredo": False, + } + }, + }, + ), ] # testcase, rule, event, expected failure_test_cases = [ @@ -241,6 +262,27 @@ {"ip": "not an ip"}, {"ip": "not an ip", "tags": ["_ip_informer_failure"]}, ), + ( + "missing fields", + { + "filter": "ip", + "ip_informer": { + "source_fields": ["ip", "does_not_exist"], + "target_field": "result", + "properties": ["teredo"], + }, + }, + {"ip": "192.168.5.1"}, + { + "ip": "192.168.5.1", + "result": { + "192.168.5.1": { + "teredo": False, + } + }, + "tags": ["_ip_informer_missing_field_warning"], + }, + ), ( "single field is not an ip address and other field is an ip address", { @@ -324,7 +366,7 @@ { "filter": "ip", "ip_informer": { - "source_fields": ["ip", "notip"], + "source_fields": ["ip"], "target_field": "result", }, }, diff --git a/tests/unit/processor/selective_extractor/test_selective_extractor.py b/tests/unit/processor/selective_extractor/test_selective_extractor.py index 83b16768c..e292ca5ad 100644 --- a/tests/unit/processor/selective_extractor/test_selective_extractor.py +++ b/tests/unit/processor/selective_extractor/test_selective_extractor.py @@ -142,3 +142,40 @@ def test_process_clears_internal_filtered_events_list_before_every_event(self): assert len(self.object._extra_data) == 1 _ = self.object.process(document) assert len(self.object._extra_data) == 1 + + def test_process_extracts_dotted_fields_complains_on_missing_fields(self): + rule = { + "filter": "message", + "selective_extractor": { + "source_fields": ["other.message", "not.exists", "message"], + "outputs": [{"opensearch": "index"}], + "ignore_missing_fields": False, + }, + } + self._load_specific_rule(rule) + document = {"message": "test_message", "other": {"message": "my message value"}} + expected = { + "message": "test_message", + "other": {"message": "my message value"}, + "tags": ["_selective_extractor_missing_field_warning"], + } + self.object.process(document) + assert document == expected + + def test_process_extracts_dotted_fields_and_ignores_missing_fields(self): + rule = { + "filter": "message", + "selective_extractor": { + "source_fields": ["other.message", "message", "not.exists"], + "outputs": [{"opensearch": "index"}], + "ignore_missing_fields": True, + }, + } + self._load_specific_rule(rule) + document = {"message": "test_message", "other": {"message": "my message value"}} + expected = { + "message": "test_message", + "other": {"message": "my message value"}, + } + self.object.process(document) + assert document == expected