From fe55d3ad371fec56e36268f4fe52ec12a5aafd59 Mon Sep 17 00:00:00 2001
From: Soufiane Fariss <soufiane.fariss@um5s.net.ma>
Date: Thu, 30 May 2024 04:04:51 +0200
Subject: [PATCH 1/6] display capabilities for file limitations

This commit deals with:
- displaying the capabilities for files matching a file limitations rule
- inform users about potential false positive due to few library
  functions
- wip: report the number of api calls made, and inform the user if the
  number is low
---
 capa/capabilities/common.py |  3 +--
 capa/capabilities/static.py |  4 ++++
 capa/main.py                | 17 ++---------------
 capa/render/default.py      | 10 ++++++++++
 4 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/capa/capabilities/common.py b/capa/capabilities/common.py
index a73f40afe..d71d2a12f 100644
--- a/capa/capabilities/common.py
+++ b/capa/capabilities/common.py
@@ -52,8 +52,7 @@ def has_file_limitation(rules: RuleSet, capabilities: MatchResults, is_standalon
             logger.warning(" %s", line)
         logger.warning(" Identified via rule: %s", file_limitation_rule.name)
         if is_standalone:
-            logger.warning(" ")
-            logger.warning(" Use -v or -vv if you really want to see the capabilities identified by capa.")
+            pass
         logger.warning("-" * 80)
 
         # bail on first file limitation
diff --git a/capa/capabilities/static.py b/capa/capabilities/static.py
index 4f3b3b6a1..01bbd4f7d 100644
--- a/capa/capabilities/static.py
+++ b/capa/capabilities/static.py
@@ -21,6 +21,7 @@
 from capa.rules import Scope, RuleSet
 from capa.engine import FeatureSet, MatchResults
 from capa.helpers import redirecting_print_to_tqdm
+from capa.features.insn import API
 from capa.capabilities.common import find_file_capabilities
 from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle, StaticFeatureExtractor
 
@@ -118,6 +119,9 @@ def find_code_capabilities(
         features, bmatches, imatches = find_basic_block_capabilities(ruleset, extractor, fh, bb)
         for feature, vas in features.items():
             function_features[feature].update(vas)
+            if isinstance(feature, API):
+                # delcare a global variable (a set) and append to it here?
+                pass
 
         for rule_name, res in bmatches.items():
             bb_matches[rule_name].extend(res)
diff --git a/capa/main.py b/capa/main.py
index eb43769d2..8d2beca7c 100644
--- a/capa/main.py
+++ b/capa/main.py
@@ -75,7 +75,7 @@
     FORMAT_RESULT,
 )
 from capa.capabilities.common import find_capabilities, has_file_limitation, find_file_capabilities
-from capa.features.extractors.base_extractor import FeatureExtractor, StaticFeatureExtractor, DynamicFeatureExtractor
+from capa.features.extractors.base_extractor import FeatureExtractor, DynamicFeatureExtractor
 
 RULES_PATH_DEFAULT_STRING = "(embedded rules)"
 SIGNATURES_PATH_DEFAULT_STRING = "(embedded signatures)"
@@ -666,16 +666,9 @@ def find_file_limitations_from_cli(args, rules: RuleSet, file_extractors: List[F
         except (ELFError, OverflowError) as e:
             logger.error("Input file '%s' is not a valid ELF file: %s", args.input_file, str(e))
             raise ShouldExitError(E_CORRUPT_FILE) from e
-
         # file limitations that rely on non-file scope won't be detected here.
         # nor on FunctionName features, because pefile doesn't support this.
         found_file_limitation = has_file_limitation(rules, pure_file_capabilities)
-        if found_file_limitation:
-            # bail if capa encountered file limitation e.g. a packed binary
-            # do show the output in verbose mode, though.
-            if not (args.verbose or args.vverbose or args.json):
-                logger.debug("file limitation short circuit, won't analyze fully.")
-                raise ShouldExitError(E_FILE_LIMITATION)
     return found_file_limitation
 
 
@@ -804,7 +797,7 @@ def main(argv: Optional[List[str]] = None):
         input_format = get_input_format_from_cli(args)
         rules = get_rules_from_cli(args)
         file_extractors = get_file_extractors_from_cli(args, input_format)
-        found_file_limitation = find_file_limitations_from_cli(args, rules, file_extractors)
+        _ = find_file_limitations_from_cli(args, rules, file_extractors)
     except ShouldExitError as e:
         return e.status_code
 
@@ -837,12 +830,6 @@ def main(argv: Optional[List[str]] = None):
         meta = capa.loader.collect_metadata(argv, args.input_file, input_format, os_, args.rules, extractor, counts)
         meta.analysis.layout = capa.loader.compute_layout(rules, extractor, capabilities)
 
-        if isinstance(extractor, StaticFeatureExtractor) and found_file_limitation:
-            # bail if capa's static feature extractor encountered file limitation e.g. a packed binary
-            # do show the output in verbose mode, though.
-            if not (args.verbose or args.vverbose or args.json):
-                return E_FILE_LIMITATION
-
     if args.json:
         print(capa.render.json.render(meta, rules, capabilities))
     elif args.vverbose:
diff --git a/capa/render/default.py b/capa/render/default.py
index 2e5064740..0c3390980 100644
--- a/capa/render/default.py
+++ b/capa/render/default.py
@@ -18,6 +18,7 @@
 from capa.render.utils import StringIO
 
 tabulate.PRESERVE_WHITESPACE = True
+MIN_LIBFUNCS_COUNT = 5
 
 
 def width(s: str, character_count: int) -> str:
@@ -29,6 +30,15 @@ def width(s: str, character_count: int) -> str:
 
 
 def render_meta(doc: rd.ResultDocument, ostream: StringIO):
+    # check if analysis is Static analysis to inform users about
+    # potential false postive due to low number of library functions
+    if isinstance(doc.meta.analysis, rd.StaticAnalysis):
+        n_libs: int = len(doc.meta.analysis.library_functions)
+        if n_libs <= MIN_LIBFUNCS_COUNT:
+            ostream.write(
+                "Few library functions recognized by FLIRT signatures, results may contain false positives\n\n"
+            )
+
     rows = [
         (width("md5", 22), width(doc.meta.sample.md5, 82)),
         ("sha1", doc.meta.sample.sha1),

From 9a8a18eddf1c9482bc1f240e134ddafe7f9d43f0 Mon Sep 17 00:00:00 2001
From: Soufiane Fariss <soufiane.fariss@um5s.net.ma>
Date: Thu, 30 May 2024 04:13:23 +0200
Subject: [PATCH 2/6] changelog: display analysis information to user

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 99f92a8bf..6746f80fa 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,7 @@
 - add function in capa/helpers to load plain and compressed JSON reports #1883 @Rohit1123
 - document Antivirus warnings and VirusTotal false positive detections #2028 @RionEV @mr-tz
 - replace Halo spinner with Rich #2086 @s-ff
+- display analysis information for user #857 @s-ff
 
 ### Breaking Changes
 

From 2597c84ffefb36077483f56a0385e0176c97e977 Mon Sep 17 00:00:00 2001
From: Soufiane Fariss <rev.fariss@gmail.com>
Date: Thu, 6 Jun 2024 16:32:08 +0200
Subject: [PATCH 3/6] print few library function found banner

---
 capa/capabilities/static.py    |  6 ++----
 capa/loader.py                 |  1 +
 capa/main.py                   |  1 +
 capa/render/default.py         | 10 +++++++---
 capa/render/result_document.py |  1 +
 5 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/capa/capabilities/static.py b/capa/capabilities/static.py
index 01bbd4f7d..d81988942 100644
--- a/capa/capabilities/static.py
+++ b/capa/capabilities/static.py
@@ -144,6 +144,7 @@ def find_static_capabilities(
     all_insn_matches: MatchResults = collections.defaultdict(list)
 
     feature_counts = rdoc.StaticFeatureCounts(file=0, functions=())
+    n_funcs: int = 0
     library_functions: Tuple[rdoc.LibraryFunction, ...] = ()
 
     assert isinstance(extractor, StaticFeatureExtractor)
@@ -242,9 +243,6 @@ def pbar(s, *args, **kwargs):
         )
     )
 
-    meta = {
-        "feature_counts": feature_counts,
-        "library_functions": library_functions,
-    }
+    meta = {"feature_counts": feature_counts, "library_functions": library_functions, "function_count": n_funcs}
 
     return matches, meta
diff --git a/capa/loader.py b/capa/loader.py
index 8e91fae0f..b970700bd 100644
--- a/capa/loader.py
+++ b/capa/loader.py
@@ -349,6 +349,7 @@ def get_sample_analysis(format_, arch, os_, extractor, rules_path, counts):
                 # "functions": { 0x401000: { "matched_basic_blocks": [ 0x401000, 0x401005, ... ] }, ... }
             ),
             feature_counts=counts["feature_counts"],
+            function_count=counts["function_count"],
             library_functions=counts["library_functions"],
         )
     elif isinstance(extractor, DynamicFeatureExtractor):
diff --git a/capa/main.py b/capa/main.py
index 8d2beca7c..6a22d6aa2 100644
--- a/capa/main.py
+++ b/capa/main.py
@@ -876,6 +876,7 @@ def ida_main():
     capabilities, counts = find_capabilities(rules, capa.features.extractors.ida.extractor.IdaFeatureExtractor())
 
     meta.analysis.feature_counts = counts["feature_counts"]
+    meta.analysis.function_count = counts["function_count"]
     meta.analysis.library_functions = counts["library_functions"]
 
     if has_file_limitation(rules, capabilities, is_standalone=False):
diff --git a/capa/render/default.py b/capa/render/default.py
index 0c3390980..16cfafc76 100644
--- a/capa/render/default.py
+++ b/capa/render/default.py
@@ -18,7 +18,7 @@
 from capa.render.utils import StringIO
 
 tabulate.PRESERVE_WHITESPACE = True
-MIN_LIBFUNCS_COUNT = 5
+MIN_LIB_FUNCS_PERCENTAGE = 30
 
 
 def width(s: str, character_count: int) -> str:
@@ -34,9 +34,13 @@ def render_meta(doc: rd.ResultDocument, ostream: StringIO):
     # potential false postive due to low number of library functions
     if isinstance(doc.meta.analysis, rd.StaticAnalysis):
         n_libs: int = len(doc.meta.analysis.library_functions)
-        if n_libs <= MIN_LIBFUNCS_COUNT:
+        n_funcs: int = doc.meta.analysis.function_count
+        lib_percentage = round(100 * (n_libs / n_funcs), 2)
+        if lib_percentage <= MIN_LIB_FUNCS_PERCENTAGE:
             ostream.write(
-                "Few library functions recognized by FLIRT signatures, results may contain false positives\n\n"
+                rutils.warn(
+                    f"Few library functions (%{lib_percentage} of all functions) recognized by FLIRT signatures, results may contain false positives\n\n",
+                )
             )
 
     rows = [
diff --git a/capa/render/result_document.py b/capa/render/result_document.py
index d2de49d73..595c6e4e6 100644
--- a/capa/render/result_document.py
+++ b/capa/render/result_document.py
@@ -114,6 +114,7 @@ class StaticAnalysis(Model):
     layout: StaticLayout
     feature_counts: StaticFeatureCounts
     library_functions: Tuple[LibraryFunction, ...]
+    function_count: int
 
 
 class DynamicAnalysis(Model):

From e8ccd2a8b41704816cb880d6ba6d0040af10c284 Mon Sep 17 00:00:00 2001
From: Soufiane Fariss <rev.fariss@gmail.com>
Date: Thu, 6 Jun 2024 23:29:37 +0200
Subject: [PATCH 4/6] display analysis information to users

- Inform users if few library functions were recognized by FLIRT
signatures (this should be around ~40-50% of all functions)

- Inform users if there are very few API calls(<10), this could
indicate that it is packed, corrupted or tiny
---
 capa/capabilities/static.py    | 44 ++++++++++++++++++++++++++++++----
 capa/loader.py                 |  1 -
 capa/main.py                   |  1 -
 capa/render/default.py         | 14 -----------
 capa/render/result_document.py |  1 -
 5 files changed, 40 insertions(+), 21 deletions(-)

diff --git a/capa/capabilities/static.py b/capa/capabilities/static.py
index d81988942..53d86c826 100644
--- a/capa/capabilities/static.py
+++ b/capa/capabilities/static.py
@@ -16,6 +16,7 @@
 import tqdm.contrib.logging
 
 import capa.perf
+import capa.render.utils as rutils
 import capa.features.freeze as frz
 import capa.render.result_document as rdoc
 from capa.rules import Scope, RuleSet
@@ -27,6 +28,9 @@
 
 logger = logging.getLogger(__name__)
 
+MIN_LIB_FUNCS_RATIO = 0.4
+MIN_API_CALLS = 10
+
 
 def find_instruction_capabilities(
     ruleset: RuleSet, extractor: StaticFeatureExtractor, f: FunctionHandle, bb: BBHandle, insn: InsnHandle
@@ -97,7 +101,7 @@ def find_basic_block_capabilities(
 
 def find_code_capabilities(
     ruleset: RuleSet, extractor: StaticFeatureExtractor, fh: FunctionHandle
-) -> Tuple[MatchResults, MatchResults, MatchResults, int]:
+) -> Tuple[MatchResults, MatchResults, MatchResults, FeatureSet]:
     """
     find matches for the given rules within the given function.
 
@@ -133,7 +137,7 @@ def find_code_capabilities(
         function_features[feature].add(va)
 
     _, function_matches = ruleset.match(Scope.FUNCTION, function_features, fh.address)
-    return function_matches, bb_matches, insn_matches, len(function_features)
+    return function_matches, bb_matches, insn_matches, function_features
 
 
 def find_static_capabilities(
@@ -146,6 +150,7 @@ def find_static_capabilities(
     feature_counts = rdoc.StaticFeatureCounts(file=0, functions=())
     n_funcs: int = 0
     library_functions: Tuple[rdoc.LibraryFunction, ...] = ()
+    api_calls: int = 0
 
     assert isinstance(extractor, StaticFeatureExtractor)
     with redirecting_print_to_tqdm(disable_progress):
@@ -185,12 +190,24 @@ def pbar(s, *args, **kwargs):
                         pb.set_postfix_str(f"skipped {n_libs} library functions ({percentage}%)")
                     continue
 
-                function_matches, bb_matches, insn_matches, feature_count = find_code_capabilities(
+                function_matches, bb_matches, insn_matches, function_features = find_code_capabilities(
                     ruleset, extractor, f
                 )
+                feature_count = len(function_features)
                 feature_counts.functions += (
                     rdoc.FunctionFeatureCount(address=frz.Address.from_capa(f.address), count=feature_count),
                 )
+
+                # for each function, count the number of API features,
+                # and cumulatively it to the total count of API calls made
+                call_addresses = {
+                    addr
+                    for feature, addresses in function_features.items()
+                    if isinstance(feature, API)
+                    for addr in addresses
+                }
+                api_calls += len(call_addresses)
+
                 t1 = time.time()
 
                 match_count = 0
@@ -218,6 +235,25 @@ def pbar(s, *args, **kwargs):
                 for rule_name, res in insn_matches.items():
                     all_insn_matches[rule_name].extend(res)
 
+    # inform users if few library functions are recognized
+    # via FLIRT signatures, results may contain false positives
+    # from library code
+    if n_funcs:
+        lib_ratio = len(library_functions) / n_funcs
+        if lib_ratio < MIN_LIB_FUNCS_RATIO:
+            print(
+                rutils.warn(
+                    "Few library functions (%.2f%% of all functions) recognized by FLIRT signatures, results may contain false positives"
+                    % (lib_ratio * 100)
+                )
+            )
+
+    if api_calls < MIN_API_CALLS:
+        print(
+            rutils.warn(
+                "The analyzed sample reports very few API calls, this could indicate that it is packed, corrupted, or tiny"
+            )
+        )
     # collection of features that captures the rule matches within function, BB, and instruction scopes.
     # mapping from feature (matched rule) to set of addresses at which it matched.
     function_and_lower_features: FeatureSet = collections.defaultdict(set)
@@ -243,6 +279,6 @@ def pbar(s, *args, **kwargs):
         )
     )
 
-    meta = {"feature_counts": feature_counts, "library_functions": library_functions, "function_count": n_funcs}
+    meta = {"feature_counts": feature_counts, "library_functions": library_functions}
 
     return matches, meta
diff --git a/capa/loader.py b/capa/loader.py
index b970700bd..8e91fae0f 100644
--- a/capa/loader.py
+++ b/capa/loader.py
@@ -349,7 +349,6 @@ def get_sample_analysis(format_, arch, os_, extractor, rules_path, counts):
                 # "functions": { 0x401000: { "matched_basic_blocks": [ 0x401000, 0x401005, ... ] }, ... }
             ),
             feature_counts=counts["feature_counts"],
-            function_count=counts["function_count"],
             library_functions=counts["library_functions"],
         )
     elif isinstance(extractor, DynamicFeatureExtractor):
diff --git a/capa/main.py b/capa/main.py
index 6a22d6aa2..8d2beca7c 100644
--- a/capa/main.py
+++ b/capa/main.py
@@ -876,7 +876,6 @@ def ida_main():
     capabilities, counts = find_capabilities(rules, capa.features.extractors.ida.extractor.IdaFeatureExtractor())
 
     meta.analysis.feature_counts = counts["feature_counts"]
-    meta.analysis.function_count = counts["function_count"]
     meta.analysis.library_functions = counts["library_functions"]
 
     if has_file_limitation(rules, capabilities, is_standalone=False):
diff --git a/capa/render/default.py b/capa/render/default.py
index 16cfafc76..2e5064740 100644
--- a/capa/render/default.py
+++ b/capa/render/default.py
@@ -18,7 +18,6 @@
 from capa.render.utils import StringIO
 
 tabulate.PRESERVE_WHITESPACE = True
-MIN_LIB_FUNCS_PERCENTAGE = 30
 
 
 def width(s: str, character_count: int) -> str:
@@ -30,19 +29,6 @@ def width(s: str, character_count: int) -> str:
 
 
 def render_meta(doc: rd.ResultDocument, ostream: StringIO):
-    # check if analysis is Static analysis to inform users about
-    # potential false postive due to low number of library functions
-    if isinstance(doc.meta.analysis, rd.StaticAnalysis):
-        n_libs: int = len(doc.meta.analysis.library_functions)
-        n_funcs: int = doc.meta.analysis.function_count
-        lib_percentage = round(100 * (n_libs / n_funcs), 2)
-        if lib_percentage <= MIN_LIB_FUNCS_PERCENTAGE:
-            ostream.write(
-                rutils.warn(
-                    f"Few library functions (%{lib_percentage} of all functions) recognized by FLIRT signatures, results may contain false positives\n\n",
-                )
-            )
-
     rows = [
         (width("md5", 22), width(doc.meta.sample.md5, 82)),
         ("sha1", doc.meta.sample.sha1),
diff --git a/capa/render/result_document.py b/capa/render/result_document.py
index 595c6e4e6..d2de49d73 100644
--- a/capa/render/result_document.py
+++ b/capa/render/result_document.py
@@ -114,7 +114,6 @@ class StaticAnalysis(Model):
     layout: StaticLayout
     feature_counts: StaticFeatureCounts
     library_functions: Tuple[LibraryFunction, ...]
-    function_count: int
 
 
 class DynamicAnalysis(Model):

From a5e5167e2e4fef8c765afa9aef4240191e3df449 Mon Sep 17 00:00:00 2001
From: Soufiane Fariss <rev.fariss@gmail.com>
Date: Thu, 6 Jun 2024 23:47:39 +0200
Subject: [PATCH 5/6] code style: do not use %

---
 capa/capabilities/static.py | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/capa/capabilities/static.py b/capa/capabilities/static.py
index 53d86c826..84da874c9 100644
--- a/capa/capabilities/static.py
+++ b/capa/capabilities/static.py
@@ -16,7 +16,6 @@
 import tqdm.contrib.logging
 
 import capa.perf
-import capa.render.utils as rutils
 import capa.features.freeze as frz
 import capa.render.result_document as rdoc
 from capa.rules import Scope, RuleSet
@@ -235,24 +234,17 @@ def pbar(s, *args, **kwargs):
                 for rule_name, res in insn_matches.items():
                     all_insn_matches[rule_name].extend(res)
 
-    # inform users if few library functions are recognized
-    # via FLIRT signatures, results may contain false positives
-    # from library code
     if n_funcs:
         lib_ratio = len(library_functions) / n_funcs
         if lib_ratio < MIN_LIB_FUNCS_RATIO:
-            print(
-                rutils.warn(
-                    "Few library functions (%.2f%% of all functions) recognized by FLIRT signatures, results may contain false positives"
-                    % (lib_ratio * 100)
-                )
+            logger.info(
+                "Few library functions (%.2f%% of all functions) recognized by FLIRT signatures, results may contain false positives",
+                lib_ratio * 100,
             )
 
     if api_calls < MIN_API_CALLS:
-        print(
-            rutils.warn(
-                "The analyzed sample reports very few API calls, this could indicate that it is packed, corrupted, or tiny"
-            )
+        logger.info(
+            "The analyzed sample reports very few API calls, this could indicate that it is packed, corrupted, or tiny"
         )
     # collection of features that captures the rule matches within function, BB, and instruction scopes.
     # mapping from feature (matched rule) to set of addresses at which it matched.

From ae226e626de118b7152d862b8f46fbdaa77c62bf Mon Sep 17 00:00:00 2001
From: Soufiane Fariss <rev.fariss@gmail.com>
Date: Fri, 7 Jun 2024 00:00:05 +0200
Subject: [PATCH 6/6] start using todos

---
 capa/capabilities/common.py | 2 ++
 capa/capabilities/static.py | 3 ---
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/capa/capabilities/common.py b/capa/capabilities/common.py
index d71d2a12f..f3833096f 100644
--- a/capa/capabilities/common.py
+++ b/capa/capabilities/common.py
@@ -51,6 +51,8 @@ def has_file_limitation(rules: RuleSet, capabilities: MatchResults, is_standalon
         for line in file_limitation_rule.meta.get("description", "").split("\n"):
             logger.warning(" %s", line)
         logger.warning(" Identified via rule: %s", file_limitation_rule.name)
+        # TODO(s-ff): remove is_standalone flag as it is no longer need
+        # #2111
         if is_standalone:
             pass
         logger.warning("-" * 80)
diff --git a/capa/capabilities/static.py b/capa/capabilities/static.py
index 84da874c9..73dcb7074 100644
--- a/capa/capabilities/static.py
+++ b/capa/capabilities/static.py
@@ -122,9 +122,6 @@ def find_code_capabilities(
         features, bmatches, imatches = find_basic_block_capabilities(ruleset, extractor, fh, bb)
         for feature, vas in features.items():
             function_features[feature].update(vas)
-            if isinstance(feature, API):
-                # delcare a global variable (a set) and append to it here?
-                pass
 
         for rule_name, res in bmatches.items():
             bb_matches[rule_name].extend(res)