Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

display analysis information to users #2111

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
- add function in capa/helpers to load plain and compressed JSON reports #1883 @Rohit1123
- document Antivirus warnings and VirusTotal false positive detections #2028 @RionEV @mr-tz
- replace Halo spinner with Rich #2086 @s-ff
- display analysis information for user #857 @s-ff

### Breaking Changes

Expand Down
5 changes: 3 additions & 2 deletions capa/capabilities/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,10 @@ def has_file_limitation(rules: RuleSet, capabilities: MatchResults, is_standalon
for line in file_limitation_rule.meta.get("description", "").split("\n"):
logger.warning(" %s", line)
logger.warning(" Identified via rule: %s", file_limitation_rule.name)
# TODO(s-ff): remove is_standalone flag as it is no longer need
# #2111
if is_standalone:
logger.warning(" ")
logger.warning(" Use -v or -vv if you really want to see the capabilities identified by capa.")
pass
fariss marked this conversation as resolved.
Show resolved Hide resolved
logger.warning("-" * 80)

# bail on first file limitation
Expand Down
41 changes: 34 additions & 7 deletions capa/capabilities/static.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,15 @@
from capa.rules import Scope, RuleSet
from capa.engine import FeatureSet, MatchResults
from capa.helpers import redirecting_print_to_tqdm
from capa.features.insn import API
from capa.capabilities.common import find_file_capabilities
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, StaticFeatureExtractor

logger = logging.getLogger(__name__)

MIN_LIB_FUNCS_RATIO = 0.4
MIN_API_CALLS = 10


def find_instruction_capabilities(
ruleset: RuleSet, extractor: StaticFeatureExtractor, f: FunctionHandle, bb: BBHandle, insn: InsnHandle
Expand Down Expand Up @@ -96,7 +100,7 @@ def find_basic_block_capabilities(

def find_code_capabilities(
ruleset: RuleSet, extractor: StaticFeatureExtractor, fh: FunctionHandle
) -> Tuple[MatchResults, MatchResults, MatchResults, int]:
) -> Tuple[MatchResults, MatchResults, MatchResults, FeatureSet]:
"""
find matches for the given rules within the given function.

Expand Down Expand Up @@ -129,7 +133,7 @@ def find_code_capabilities(
function_features[feature].add(va)

_, function_matches = ruleset.match(Scope.FUNCTION, function_features, fh.address)
return function_matches, bb_matches, insn_matches, len(function_features)
return function_matches, bb_matches, insn_matches, function_features


def find_static_capabilities(
Expand All @@ -140,7 +144,9 @@ def find_static_capabilities(
all_insn_matches: MatchResults = collections.defaultdict(list)

feature_counts = rdoc.StaticFeatureCounts(file=0, functions=())
n_funcs: int = 0
library_functions: Tuple[rdoc.LibraryFunction, ...] = ()
api_calls: int = 0

assert isinstance(extractor, StaticFeatureExtractor)
with redirecting_print_to_tqdm(disable_progress):
Expand Down Expand Up @@ -180,12 +186,24 @@ def pbar(s, *args, **kwargs):
pb.set_postfix_str(f"skipped {n_libs} library functions ({percentage}%)")
continue

function_matches, bb_matches, insn_matches, feature_count = find_code_capabilities(
function_matches, bb_matches, insn_matches, function_features = find_code_capabilities(
ruleset, extractor, f
)
feature_count = len(function_features)
feature_counts.functions += (
rdoc.FunctionFeatureCount(address=frz.Address.from_capa(f.address), count=feature_count),
)

# for each function, count the number of API features,
# and cumulatively it to the total count of API calls made
call_addresses = {
addr
for feature, addresses in function_features.items()
if isinstance(feature, API)
for addr in addresses
}
api_calls += len(call_addresses)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in addition we should count/report the imports

and it would be neat to generate some stats for various features / test samples

I


t1 = time.time()

match_count = 0
Expand Down Expand Up @@ -213,6 +231,18 @@ def pbar(s, *args, **kwargs):
for rule_name, res in insn_matches.items():
all_insn_matches[rule_name].extend(res)

if n_funcs:
lib_ratio = len(library_functions) / n_funcs
if lib_ratio < MIN_LIB_FUNCS_RATIO:
logger.info(
"Few library functions (%.2f%% of all functions) recognized by FLIRT signatures, results may contain false positives",
lib_ratio * 100,
)

if api_calls < MIN_API_CALLS:
logger.info(
"The analyzed sample reports very few API calls, this could indicate that it is packed, corrupted, or tiny"
)
# collection of features that captures the rule matches within function, BB, and instruction scopes.
# mapping from feature (matched rule) to set of addresses at which it matched.
function_and_lower_features: FeatureSet = collections.defaultdict(set)
Expand All @@ -238,9 +268,6 @@ def pbar(s, *args, **kwargs):
)
)

meta = {
"feature_counts": feature_counts,
"library_functions": library_functions,
}
meta = {"feature_counts": feature_counts, "library_functions": library_functions}

return matches, meta
17 changes: 2 additions & 15 deletions capa/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
FORMAT_RESULT,
)
from capa.capabilities.common import find_capabilities, has_file_limitation, find_file_capabilities
from capa.features.extractors.base_extractor import FeatureExtractor, StaticFeatureExtractor, DynamicFeatureExtractor
from capa.features.extractors.base_extractor import FeatureExtractor, DynamicFeatureExtractor

RULES_PATH_DEFAULT_STRING = "(embedded rules)"
SIGNATURES_PATH_DEFAULT_STRING = "(embedded signatures)"
Expand Down Expand Up @@ -666,16 +666,9 @@ def find_file_limitations_from_cli(args, rules: RuleSet, file_extractors: List[F
except (ELFError, OverflowError) as e:
logger.error("Input file '%s' is not a valid ELF file: %s", args.input_file, str(e))
raise ShouldExitError(E_CORRUPT_FILE) from e

# file limitations that rely on non-file scope won't be detected here.
# nor on FunctionName features, because pefile doesn't support this.
found_file_limitation = has_file_limitation(rules, pure_file_capabilities)
if found_file_limitation:
# bail if capa encountered file limitation e.g. a packed binary
# do show the output in verbose mode, though.
if not (args.verbose or args.vverbose or args.json):
logger.debug("file limitation short circuit, won't analyze fully.")
raise ShouldExitError(E_FILE_LIMITATION)
return found_file_limitation


Expand Down Expand Up @@ -804,7 +797,7 @@ def main(argv: Optional[List[str]] = None):
input_format = get_input_format_from_cli(args)
rules = get_rules_from_cli(args)
file_extractors = get_file_extractors_from_cli(args, input_format)
found_file_limitation = find_file_limitations_from_cli(args, rules, file_extractors)
_ = find_file_limitations_from_cli(args, rules, file_extractors)
Comment on lines 799 to +800
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only reason I am keeping find_file_limitation_from_cli is that it prints a warning to the user if the sample is packed.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll have to do more thinking/testing on how to handle this. One the one hand the limitations are very valid, on the other hand often users still want to see the results. Ideally, we find a solution that helps with both.

except ShouldExitError as e:
return e.status_code

Expand Down Expand Up @@ -837,12 +830,6 @@ def main(argv: Optional[List[str]] = None):
meta = capa.loader.collect_metadata(argv, args.input_file, input_format, os_, args.rules, extractor, counts)
meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities)

if isinstance(extractor, StaticFeatureExtractor) and found_file_limitation:
# bail if capa's static feature extractor encountered file limitation e.g. a packed binary
# do show the output in verbose mode, though.
if not (args.verbose or args.vverbose or args.json):
return E_FILE_LIMITATION

if args.json:
print(capa.render.json.render(meta, rules, capabilities))
elif args.vverbose:
Expand Down