Skip to content

Commit

Permalink
fix(iast): add code to filter out ddtrace stuff from dir() on patched…
Browse files Browse the repository at this point in the history
… modules [backport 2.17] (#11504)

Backport a9a6ad7 from #11490 to 2.17.

Signed-off-by: Juanjo Alvarez <[email protected]>

## Description

While testing on dd-source CI we found an issue where a module was doing
a `dir(other_module)` and the changed results from the patched module
were breaking stuff (because our patching would add `ddtrace_aspects`,
`ddtrace_sink_points`, et cetera to the results and the original module
was expecting all `other_module` symbols to have some members like
`id`).

This PRs fixes this problem by:

- Creating a custom `__dir__` function (that will override any
pre-existing ones) removing from the results all the symbols that we add
ourselves while patching.
- Renaming all added `ddtrace` symbols to `__ddtrace`.

Also:
- Adds a `_DD_IAST_NO_DIR_PATCH` config var to disable the wrapping of
the patched module `__dir__` functions in case the user have some
side-effect problem.
- The return type of `visit_ast` has been fixed (it wrongly was `str`
while is in fact a `ast.Module` type).

## Checklist
- [X] PR author has checked that all the criteria below are met
- The PR description includes an overview of the change
- The PR description articulates the motivation for the change
- The change includes tests OR the PR description describes a testing
strategy
- The PR description notes risks associated with the change, if any
- Newly-added code is easy to change
- The change follows the [library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
- The change includes or references documentation updates if necessary
- Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))

## Reviewer Checklist
- [x] Reviewer has checked that all the criteria below are met 
- Title is accurate
- All changes are related to the pull request's stated goal
- Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes
- Testing strategy adequately addresses listed risks
- Newly-added code is easy to change
- Release note makes sense to a user of the library
- If necessary, author has acknowledged and discussed the performance
implications of this PR as reported in the benchmarks PR comment
- Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)

Co-authored-by: Juanjo Alvarez Martinez <[email protected]>
  • Loading branch information
github-actions[bot] and juanjux authored Nov 25, 2024
1 parent f3c25f7 commit 665c44b
Show file tree
Hide file tree
Showing 12 changed files with 269 additions and 111 deletions.
2 changes: 2 additions & 0 deletions ddtrace/appsec/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,10 @@ class IAST(metaclass=Constant_Class):
JSON: Literal["_dd.iast.json"] = "_dd.iast.json"
ENABLED: Literal["_dd.iast.enabled"] = "_dd.iast.enabled"
PATCH_MODULES: Literal["_DD_IAST_PATCH_MODULES"] = "_DD_IAST_PATCH_MODULES"
ENV_NO_DIR_PATCH: Literal["_DD_IAST_NO_DIR_PATCH"] = "_DD_IAST_NO_DIR_PATCH"
DENY_MODULES: Literal["_DD_IAST_DENY_MODULES"] = "_DD_IAST_DENY_MODULES"
SEP_MODULES: Literal[","] = ","
PATCH_ADDED_SYMBOL_PREFIX: Literal["_ddtrace_"] = "_ddtrace_"

METRICS_REPORT_LVLS = (
(TELEMETRY_DEBUG_VERBOSITY, TELEMETRY_DEBUG_NAME),
Expand Down
67 changes: 55 additions & 12 deletions ddtrace/appsec/_iast/_ast/ast_patching.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import os
import re
from sys import builtin_module_names
from sys import version_info
import textwrap
from types import ModuleType
from typing import Optional
from typing import Text
Expand All @@ -14,12 +16,14 @@
from ddtrace.appsec._python_info.stdlib import _stdlib_for_python_version
from ddtrace.internal.logger import get_logger
from ddtrace.internal.module import origin
from ddtrace.internal.utils.formats import asbool

from .visitor import AstVisitor


_VISITOR = AstVisitor()

_PREFIX = IAST.PATCH_ADDED_SYMBOL_PREFIX

# Prefixes for modules where IAST patching is allowed
IAST_ALLOWLIST: Tuple[Text, ...] = ("tests.appsec.iast.",)
Expand Down Expand Up @@ -278,6 +282,7 @@
"protobuf.",
"pycparser.", # this package is called when a module is imported, propagation is not needed
"pytest.", # Testing framework
"_pytest.",
"setuptools.",
"sklearn.", # Machine learning library
"sqlalchemy.orm.interfaces.", # Performance optimization
Expand Down Expand Up @@ -368,7 +373,7 @@ def visit_ast(
source_text: Text,
module_path: Text,
module_name: Text = "",
) -> Optional[str]:
) -> Optional[ast.Module]:
parsed_ast = ast.parse(source_text, module_path)
_VISITOR.update_location(filename=module_path, module_name=module_name)
modified_ast = _VISITOR.visit(parsed_ast)
Expand Down Expand Up @@ -401,23 +406,56 @@ def _remove_flask_run(text: Text) -> Text:
return new_text


def astpatch_module(module: ModuleType, remove_flask_run: bool = False) -> Tuple[str, str]:
_DIR_WRAPPER = textwrap.dedent(
f"""
def {_PREFIX}dir():
orig_dir = globals().get("{_PREFIX}orig_dir__")
if orig_dir:
# Use the original __dir__ method and filter the results
results = [name for name in orig_dir() if not name.startswith("{_PREFIX}")]
else:
# List names from the module's __dict__ and filter out the unwanted names
results = [
name for name in globals()
if not (name.startswith("{_PREFIX}") or name == "__dir__")
]
return results
def {_PREFIX}set_dir_filter():
if "__dir__" in globals():
# Store the original __dir__ method
globals()["{_PREFIX}orig_dir__"] = __dir__
# Replace the module's __dir__ with the custom one
globals()["__dir__"] = {_PREFIX}dir
{_PREFIX}set_dir_filter()
"""
)


def astpatch_module(module: ModuleType, remove_flask_run: bool = False) -> Tuple[str, Optional[ast.Module]]:
module_name = module.__name__

module_origin = origin(module)
if module_origin is None:
log.debug("astpatch_source couldn't find the module: %s", module_name)
return "", ""
return "", None

module_path = str(module_origin)
try:
if module_origin.stat().st_size == 0:
# Don't patch empty files like __init__.py
log.debug("empty file: %s", module_path)
return "", ""
return "", None
except OSError:
log.debug("astpatch_source couldn't find the file: %s", module_path, exc_info=True)
return "", ""
return "", None

# Get the file extension, if it's dll, os, pyd, dyn, dynlib: return
# If its pyc or pyo, change to .py and check that the file exists. If not,
Expand All @@ -427,30 +465,35 @@ def astpatch_module(module: ModuleType, remove_flask_run: bool = False) -> Tuple
if module_ext.lower() not in {".pyo", ".pyc", ".pyw", ".py"}:
# Probably native or built-in module
log.debug("extension not supported: %s for: %s", module_ext, module_path)
return "", ""
return "", None

with open(module_path, "r", encoding=get_encoding(module_path)) as source_file:
try:
source_text = source_file.read()
except UnicodeDecodeError:
log.debug("unicode decode error for file: %s", module_path, exc_info=True)
return "", ""
return "", None

if len(source_text.strip()) == 0:
# Don't patch empty files like __init__.py
log.debug("empty file: %s", module_path)
return "", ""
return "", None

if remove_flask_run:
source_text = _remove_flask_run(source_text)

new_source = visit_ast(
if not asbool(os.environ.get(IAST.ENV_NO_DIR_PATCH, "false")) and version_info > (3, 7):
# Add the dir filter so __ddtrace stuff is not returned by dir(module)
# does not work in 3.7 because it enters into infinite recursion
source_text += _DIR_WRAPPER

new_ast = visit_ast(
source_text,
module_path,
module_name=module_name,
)
if new_source is None:
if new_ast is None:
log.debug("file not ast patched: %s", module_path)
return "", ""
return "", None

return module_path, new_source
return module_path, new_ast
117 changes: 62 additions & 55 deletions ddtrace/appsec/_iast/_ast/visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from typing import Text
from typing import Tuple # noqa:F401

from ..._constants import IAST
from .._metrics import _set_metric_iast_instrumented_propagation
from ..constants import DEFAULT_PATH_TRAVERSAL_FUNCTIONS
from ..constants import DEFAULT_WEAK_RANDOMNESS_FUNCTIONS
Expand All @@ -22,11 +23,12 @@
PY38_PLUS = sys.version_info >= (3, 8, 0)
PY39_PLUS = sys.version_info >= (3, 9, 0)

_PREFIX = IAST.PATCH_ADDED_SYMBOL_PREFIX
CODE_TYPE_FIRST_PARTY = "first_party"
CODE_TYPE_DD = "datadog"
CODE_TYPE_SITE_PACKAGES = "site_packages"
CODE_TYPE_STDLIB = "stdlib"
TAINT_SINK_FUNCTION_REPLACEMENT = "ddtrace_taint_sinks.ast_function"
TAINT_SINK_FUNCTION_REPLACEMENT = _PREFIX + "taint_sinks.ast_function"


def _mark_avoid_convert_recursively(node):
Expand All @@ -38,71 +40,71 @@ def _mark_avoid_convert_recursively(node):

_ASPECTS_SPEC: Dict[Text, Any] = {
"definitions_module": "ddtrace.appsec._iast._taint_tracking.aspects",
"alias_module": "ddtrace_aspects",
"alias_module": _PREFIX + "aspects",
"functions": {
"StringIO": "ddtrace_aspects.stringio_aspect",
"BytesIO": "ddtrace_aspects.bytesio_aspect",
"str": "ddtrace_aspects.str_aspect",
"bytes": "ddtrace_aspects.bytes_aspect",
"bytearray": "ddtrace_aspects.bytearray_aspect",
"ddtrace_iast_flask_patch": "ddtrace_aspects.empty_func", # To avoid recursion
"StringIO": _PREFIX + "aspects.stringio_aspect",
"BytesIO": _PREFIX + "aspects.bytesio_aspect",
"str": _PREFIX + "aspects.str_aspect",
"bytes": _PREFIX + "aspects.bytes_aspect",
"bytearray": _PREFIX + "aspects.bytearray_aspect",
"ddtrace_iast_flask_patch": _PREFIX + "aspects.empty_func", # To avoid recursion
},
"stringalike_methods": {
"StringIO": "ddtrace_aspects.stringio_aspect",
"BytesIO": "ddtrace_aspects.bytesio_aspect",
"decode": "ddtrace_aspects.decode_aspect",
"join": "ddtrace_aspects.join_aspect",
"encode": "ddtrace_aspects.encode_aspect",
"extend": "ddtrace_aspects.bytearray_extend_aspect",
"upper": "ddtrace_aspects.upper_aspect",
"lower": "ddtrace_aspects.lower_aspect",
"replace": "ddtrace_aspects.replace_aspect",
"swapcase": "ddtrace_aspects.swapcase_aspect",
"title": "ddtrace_aspects.title_aspect",
"capitalize": "ddtrace_aspects.capitalize_aspect",
"casefold": "ddtrace_aspects.casefold_aspect",
"translate": "ddtrace_aspects.translate_aspect",
"format": "ddtrace_aspects.format_aspect",
"format_map": "ddtrace_aspects.format_map_aspect",
"zfill": "ddtrace_aspects.zfill_aspect",
"ljust": "ddtrace_aspects.ljust_aspect",
"split": "ddtrace_aspects.split_aspect", # Both regular split and re.split
"rsplit": "ddtrace_aspects.rsplit_aspect",
"splitlines": "ddtrace_aspects.splitlines_aspect",
"StringIO": _PREFIX + "aspects.stringio_aspect",
"BytesIO": _PREFIX + "aspects.bytesio_aspect",
"decode": _PREFIX + "aspects.decode_aspect",
"join": _PREFIX + "aspects.join_aspect",
"encode": _PREFIX + "aspects.encode_aspect",
"extend": _PREFIX + "aspects.bytearray_extend_aspect",
"upper": _PREFIX + "aspects.upper_aspect",
"lower": _PREFIX + "aspects.lower_aspect",
"replace": _PREFIX + "aspects.replace_aspect",
"swapcase": _PREFIX + "aspects.swapcase_aspect",
"title": _PREFIX + "aspects.title_aspect",
"capitalize": _PREFIX + "aspects.capitalize_aspect",
"casefold": _PREFIX + "aspects.casefold_aspect",
"translate": _PREFIX + "aspects.translate_aspect",
"format": _PREFIX + "aspects.format_aspect",
"format_map": _PREFIX + "aspects.format_map_aspect",
"zfill": _PREFIX + "aspects.zfill_aspect",
"ljust": _PREFIX + "aspects.ljust_aspect",
"split": _PREFIX + "aspects.split_aspect", # Both regular split and re.split
"rsplit": _PREFIX + "aspects.rsplit_aspect",
"splitlines": _PREFIX + "aspects.splitlines_aspect",
# re module and re.Match methods
"findall": "ddtrace_aspects.re_findall_aspect",
"finditer": "ddtrace_aspects.re_finditer_aspect",
"fullmatch": "ddtrace_aspects.re_fullmatch_aspect",
"expand": "ddtrace_aspects.re_expand_aspect",
"group": "ddtrace_aspects.re_group_aspect",
"groups": "ddtrace_aspects.re_groups_aspect",
"match": "ddtrace_aspects.re_match_aspect",
"search": "ddtrace_aspects.re_search_aspect",
"sub": "ddtrace_aspects.re_sub_aspect",
"subn": "ddtrace_aspects.re_subn_aspect",
"findall": _PREFIX + "aspects.re_findall_aspect",
"finditer": _PREFIX + "aspects.re_finditer_aspect",
"fullmatch": _PREFIX + "aspects.re_fullmatch_aspect",
"expand": _PREFIX + "aspects.re_expand_aspect",
"group": _PREFIX + "aspects.re_group_aspect",
"groups": _PREFIX + "aspects.re_groups_aspect",
"match": _PREFIX + "aspects.re_match_aspect",
"search": _PREFIX + "aspects.re_search_aspect",
"sub": _PREFIX + "aspects.re_sub_aspect",
"subn": _PREFIX + "aspects.re_subn_aspect",
},
# Replacement function for indexes and ranges
"slices": {
"index": "ddtrace_aspects.index_aspect",
"slice": "ddtrace_aspects.slice_aspect",
"index": _PREFIX + "aspects.index_aspect",
"slice": _PREFIX + "aspects.slice_aspect",
},
# Replacement functions for modules
"module_functions": {
"os.path": {
"basename": "ddtrace_aspects.ospathbasename_aspect",
"dirname": "ddtrace_aspects.ospathdirname_aspect",
"join": "ddtrace_aspects.ospathjoin_aspect",
"normcase": "ddtrace_aspects.ospathnormcase_aspect",
"split": "ddtrace_aspects.ospathsplit_aspect",
"splitext": "ddtrace_aspects.ospathsplitext_aspect",
"basename": _PREFIX + "aspects.ospathbasename_aspect",
"dirname": _PREFIX + "aspects.ospathdirname_aspect",
"join": _PREFIX + "aspects.ospathjoin_aspect",
"normcase": _PREFIX + "aspects.ospathnormcase_aspect",
"split": _PREFIX + "aspects.ospathsplit_aspect",
"splitext": _PREFIX + "aspects.ospathsplitext_aspect",
}
},
"operators": {
ast.Add: "ddtrace_aspects.add_aspect",
"INPLACE_ADD": "ddtrace_aspects.add_inplace_aspect",
"FORMAT_VALUE": "ddtrace_aspects.format_value_aspect",
ast.Mod: "ddtrace_aspects.modulo_aspect",
"BUILD_STRING": "ddtrace_aspects.build_string_aspect",
ast.Add: _PREFIX + "aspects.add_aspect",
"INPLACE_ADD": _PREFIX + "aspects.add_inplace_aspect",
"FORMAT_VALUE": _PREFIX + "aspects.format_value_aspect",
ast.Mod: _PREFIX + "aspects.modulo_aspect",
"BUILD_STRING": _PREFIX + "aspects.build_string_aspect",
},
"excluded_from_patching": {
# Key: module being patched
Expand All @@ -123,6 +125,8 @@ def _mark_avoid_convert_recursively(node):
},
"django.utils.html": {"": ("format_html", "format_html_join")},
"sqlalchemy.sql.compiler": {"": ("_requires_quotes",)},
# Our added functions
"": {"": (f"{_PREFIX}dir", f"{_PREFIX}set_dir_filter")},
},
# This is a set since all functions will be replaced by taint_sink_functions
"taint_sinks": {
Expand All @@ -149,10 +153,10 @@ def _mark_avoid_convert_recursively(node):


if sys.version_info >= (3, 12):
_ASPECTS_SPEC["module_functions"]["os.path"]["splitroot"] = "ddtrace_aspects.ospathsplitroot_aspect"
_ASPECTS_SPEC["module_functions"]["os.path"]["splitroot"] = _PREFIX + "aspects.ospathsplitroot_aspect"

if sys.version_info >= (3, 12) or os.name == "nt":
_ASPECTS_SPEC["module_functions"]["os.path"]["splitdrive"] = "ddtrace_aspects.ospathsplitdrive_aspect"
_ASPECTS_SPEC["module_functions"]["os.path"]["splitdrive"] = _PREFIX + "aspects.ospathsplitdrive_aspect"


class AstVisitor(ast.NodeTransformer):
Expand All @@ -163,7 +167,7 @@ def __init__(
):
self._sinkpoints_spec = {
"definitions_module": "ddtrace.appsec._iast.taint_sinks",
"alias_module": "ddtrace_taint_sinks",
"alias_module": _PREFIX + "taint_sinks",
"functions": {},
}
self._sinkpoints_functions = self._sinkpoints_spec["functions"]
Expand Down Expand Up @@ -458,6 +462,9 @@ def visit_FunctionDef(self, def_node: ast.FunctionDef) -> Any:
Special case for some tests which would enter in a patching
loop otherwise when visiting the check functions
"""
if f"{_PREFIX}dir" in def_node.name or f"{_PREFIX}set_dir_filter" in def_node.name:
return def_node

self.replacements_disabled_for_functiondef = def_node.name in self.dont_patch_these_functionsdefs

if hasattr(def_node.args, "vararg") and def_node.args.vararg:
Expand Down
10 changes: 5 additions & 5 deletions ddtrace/appsec/_iast/_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,19 @@


def _exec_iast_patched_module(module_watchdog, module):
patched_source = None
patched_ast = None
compiled_code = None
if IS_IAST_ENABLED:
try:
module_path, patched_source = astpatch_module(module)
module_path, patched_ast = astpatch_module(module)
except Exception:
log.debug("Unexpected exception while AST patching", exc_info=True)
patched_source = None
patched_ast = None

if patched_source:
if patched_ast:
try:
# Patched source is compiled in order to execute it
compiled_code = compile(patched_source, module_path, "exec")
compiled_code = compile(patched_ast, module_path, "exec")
except Exception:
log.debug("Unexpected exception while compiling patched code", exc_info=True)
compiled_code = None
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
fixes:
- |
Code Security: patch the module dir function so original pre-patch results are not changed.
1 change: 1 addition & 0 deletions tests/appsec/appsec_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ def appsec_application_server(
env[IAST.ENV] = iast_enabled
env[IAST.ENV_REQUEST_SAMPLING] = "100"
env["_DD_APPSEC_DEDUPLICATION_ENABLED"] = "false"
env[IAST.ENV_NO_DIR_PATCH] = "false"
if assert_debug:
env["_" + IAST.ENV_DEBUG] = iast_enabled
env["_" + IAST.ENV_PROPAGATION_DEBUG] = iast_enabled
Expand Down
Loading

0 comments on commit 665c44b

Please sign in to comment.