Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

display analysis information to users #2111

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions capa/capabilities/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ def has_file_limitation(rules: RuleSet, capabilities: MatchResults, is_standalon
for line in file_limitation_rule.meta.get("description", "").split("\n"):
logger.warning(" %s", line)
logger.warning(" Identified via rule: %s", file_limitation_rule.name)
# TODO(s-ff): remove is_standalone flag as it is no longer need
# #2111
if is_standalone:
pass
logger.warning("-" * 80)
Expand Down
43 changes: 33 additions & 10 deletions capa/capabilities/static.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@

logger = logging.getLogger(__name__)

MIN_LIB_FUNCS_RATIO = 0.4
MIN_API_CALLS = 10


def find_instruction_capabilities(
ruleset: RuleSet, extractor: StaticFeatureExtractor, f: FunctionHandle, bb: BBHandle, insn: InsnHandle
Expand Down Expand Up @@ -97,7 +100,7 @@ def find_basic_block_capabilities(

def find_code_capabilities(
ruleset: RuleSet, extractor: StaticFeatureExtractor, fh: FunctionHandle
) -> Tuple[MatchResults, MatchResults, MatchResults, int]:
) -> Tuple[MatchResults, MatchResults, MatchResults, FeatureSet]:
"""
find matches for the given rules within the given function.

Expand All @@ -119,9 +122,6 @@ def find_code_capabilities(
features, bmatches, imatches = find_basic_block_capabilities(ruleset, extractor, fh, bb)
for feature, vas in features.items():
function_features[feature].update(vas)
if isinstance(feature, API):
# delcare a global variable (a set) and append to it here?
pass

for rule_name, res in bmatches.items():
bb_matches[rule_name].extend(res)
Expand All @@ -133,7 +133,7 @@ def find_code_capabilities(
function_features[feature].add(va)

_, function_matches = ruleset.match(Scope.FUNCTION, function_features, fh.address)
return function_matches, bb_matches, insn_matches, len(function_features)
return function_matches, bb_matches, insn_matches, function_features


def find_static_capabilities(
Expand All @@ -144,7 +144,9 @@ def find_static_capabilities(
all_insn_matches: MatchResults = collections.defaultdict(list)

feature_counts = rdoc.StaticFeatureCounts(file=0, functions=())
n_funcs: int = 0
library_functions: Tuple[rdoc.LibraryFunction, ...] = ()
api_calls: int = 0

assert isinstance(extractor, StaticFeatureExtractor)
with redirecting_print_to_tqdm(disable_progress):
Expand Down Expand Up @@ -184,12 +186,24 @@ def pbar(s, *args, **kwargs):
pb.set_postfix_str(f"skipped {n_libs} library functions ({percentage}%)")
continue

function_matches, bb_matches, insn_matches, feature_count = find_code_capabilities(
function_matches, bb_matches, insn_matches, function_features = find_code_capabilities(
ruleset, extractor, f
)
feature_count = len(function_features)
feature_counts.functions += (
rdoc.FunctionFeatureCount(address=frz.Address.from_capa(f.address), count=feature_count),
)

# for each function, count the number of API features,
# and cumulatively it to the total count of API calls made
call_addresses = {
addr
for feature, addresses in function_features.items()
if isinstance(feature, API)
for addr in addresses
}
api_calls += len(call_addresses)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in addition we should count/report the imports

and it would be neat to generate some stats for various features / test samples

I


t1 = time.time()

match_count = 0
Expand Down Expand Up @@ -217,6 +231,18 @@ def pbar(s, *args, **kwargs):
for rule_name, res in insn_matches.items():
all_insn_matches[rule_name].extend(res)

if n_funcs:
lib_ratio = len(library_functions) / n_funcs
if lib_ratio < MIN_LIB_FUNCS_RATIO:
logger.info(
"Few library functions (%.2f%% of all functions) recognized by FLIRT signatures, results may contain false positives",
lib_ratio * 100,
)

if api_calls < MIN_API_CALLS:
logger.info(
"The analyzed sample reports very few API calls, this could indicate that it is packed, corrupted, or tiny"
)
# collection of features that captures the rule matches within function, BB, and instruction scopes.
# mapping from feature (matched rule) to set of addresses at which it matched.
function_and_lower_features: FeatureSet = collections.defaultdict(set)
Expand All @@ -242,9 +268,6 @@ def pbar(s, *args, **kwargs):
)
)

meta = {
"feature_counts": feature_counts,
"library_functions": library_functions,
}
meta = {"feature_counts": feature_counts, "library_functions": library_functions}

return matches, meta
10 changes: 0 additions & 10 deletions capa/render/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from capa.render.utils import StringIO

tabulate.PRESERVE_WHITESPACE = True
MIN_LIBFUNCS_COUNT = 5


def width(s: str, character_count: int) -> str:
Expand All @@ -30,15 +29,6 @@ def width(s: str, character_count: int) -> str:


def render_meta(doc: rd.ResultDocument, ostream: StringIO):
# check if analysis is Static analysis to inform users about
# potential false postive due to low number of library functions
if isinstance(doc.meta.analysis, rd.StaticAnalysis):
n_libs: int = len(doc.meta.analysis.library_functions)
if n_libs <= MIN_LIBFUNCS_COUNT:
ostream.write(
"Few library functions recognized by FLIRT signatures, results may contain false positives\n\n"
)

rows = [
(width("md5", 22), width(doc.meta.sample.md5, 82)),
("sha1", doc.meta.sample.sha1),
Expand Down