Skip to content

Commit

Permalink
Fix ReplaceStringTransformation and SigmaString plain string conversion
Browse files Browse the repository at this point in the history
* Reverted ReplaceStringTransformation to work on the whole SigmaString as plain string representation because backends/pipelines rely on this feature.
* ReplaceStringTransformation former misbehavior of "consuming" backslashes mitigated by doubling backslashes without escaping semantics.
* SigmaString now distinguishes in to_plain() method if it is in regex or string context and behaves differently on escaping.
* SigmaDetectionItem.to_plain() now also distinguishes between strings andregex.
  • Loading branch information
thomaspatzke committed Sep 8, 2024
1 parent 6a1bd60 commit df1b714
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 11 deletions.
10 changes: 6 additions & 4 deletions sigma/processing/transformations.py
Original file line number Diff line number Diff line change
Expand Up @@ -762,7 +762,8 @@ def apply(
class ReplaceStringTransformation(StringValueTransformation):
"""
Replace string part matched by regular expresssion with replacement string that can reference
capture groups. It operates on the plain string parts of the SigmaString value.
capture groups. It operates on the whole string representation of the SigmaString value.
Therefore, it is able to replace special characters like wildcards too.
This is basically an interface to re.sub() and can use all features available there.
"""
Expand All @@ -781,9 +782,10 @@ def __post_init__(self):

def apply_string_value(self, field: str, val: SigmaString) -> SigmaString:
if isinstance(val, SigmaString):
return val.map_parts(
lambda s: self.re.sub(self.replacement, s), lambda p: isinstance(p, str)
)
sigma_string_plain = str(val)
replaced = self.re.sub(self.replacement, sigma_string_plain)
postprocessed_backslashes = re.sub(r"\\(?![*?])", r"\\\\", replaced)
return SigmaString(postprocessed_backslashes)


@dataclass
Expand Down
17 changes: 15 additions & 2 deletions sigma/rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,9 +432,22 @@ def to_plain(self) -> Union[Dict[str, Union[str, int, None]], List[str]]:
)

if len(self.original_value) > 1:
value = [value.to_plain() for value in self.original_value]
value = [
(
value.to_plain(True)
if isinstance(value, SigmaString)
and SigmaRegularExpressionModifier in self.modifiers
else value.to_plain()
)
for value in self.original_value
]
else:
value = self.original_value[0].to_plain()
value = (
self.original_value[0].to_plain(True)
if isinstance(self.original_value[0], SigmaString)
and SigmaRegularExpressionModifier in self.modifiers
else self.original_value[0].to_plain()
)

if (
self.is_keyword() and len(self.modifiers) == 0
Expand Down
15 changes: 11 additions & 4 deletions sigma/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,10 +355,17 @@ def __eq__(self, other: Union["SigmaString", str]) -> bool:
)

def __str__(self) -> str:
return self.to_plain()

def to_plain(self, regex: bool = False) -> str:
"""Generate string representation of SigmaString with or without regex escaping."""
rs = ""
for s in self.s:
if isinstance(s, str):
rs += s
if regex:
rs += s
else:
rs += s.replace("*", "\\*").replace("?", "\\?")
elif isinstance(s, SpecialChars):
rs += special_char_mapping[s]
elif isinstance(s, Placeholder):
Expand All @@ -372,9 +379,9 @@ def __str__(self) -> str:
def __repr__(self) -> str:
return str(f"SigmaString({self.s})")

def to_plain(self):
"""Return plain string representation of SigmaString, equivalent to converting it with str()."""
return str(self)
def to_plain_regex(self):
"""Return plain string representation of SigmaString with reduced escaping."""
return self._stringify(True)

def __bytes__(self) -> bytes:
return str(self).encode()
Expand Down
34 changes: 33 additions & 1 deletion tests/test_processing_transformations.py
Original file line number Diff line number Diff line change
Expand Up @@ -1348,14 +1348,46 @@ def test_replace_string_specials(dummy_pipeline):
[
SigmaDetection(
[
SigmaDetectionItem("field1", [], [SigmaString("*/value")]),
SigmaDetectionItem("field1", [], [SigmaString("/value")]),
SigmaDetectionItem("field2", [], [SigmaNumber(123)]),
]
)
]
)


def test_replace_string_backslashes(dummy_pipeline):
sigma_rule = SigmaRule.from_dict(
{
"title": "Test",
"logsource": {"category": "test"},
"detection": {
"test": [
{
"field1": r"backslash\\value",
"field2": r"backslash\\\\value",
"field3": r"plainwildcard\*value",
}
],
"condition": "test",
},
}
)
transformation = ReplaceStringTransformation("value", "test")
transformation.apply(dummy_pipeline, sigma_rule)
assert sigma_rule.detection.detections["test"] == SigmaDetection(
[
SigmaDetection(
[
SigmaDetectionItem("field1", [], [SigmaString(r"backslash\\test")]),
SigmaDetectionItem("field2", [], [SigmaString(r"backslash\\\\test")]),
SigmaDetectionItem("field3", [], [SigmaString(r"plainwildcard\*test")]),
]
)
]
)


def test_replace_string_invalid():
with pytest.raises(SigmaRegularExpressionError, match="Regular expression.*invalid"):
ReplaceStringTransformation("*", "test")
Expand Down
14 changes: 14 additions & 0 deletions tests/test_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,20 @@ def test_strings_to_string():
assert str(SigmaString("test*?")) == "test*?"


def test_strings_with_plain_wildcards_to_string():
plain_s = "value\*with\?plain*wild?cards"
s = SigmaString(plain_s)
assert s.s == (
"value*with?plain",
SpecialChars.WILDCARD_MULTI,
"wild",
SpecialChars.WILDCARD_SINGLE,
"cards",
)
assert str(s) == plain_s
assert SigmaString(str(s)) == s


def test_strings_with_placeholder_to_string():
assert str(SigmaString("te%var%st").insert_placeholders()) == "te%var%st"

Expand Down

0 comments on commit df1b714

Please sign in to comment.