Skip to content

Commit e3a1dbf

Browse files
authored
Merge pull request #2537 from mandiant/fix/vmray-improvements
VMRay and dynamic improvements
2 parents 1a82b9d + 51d606b commit e3a1dbf

File tree

4 files changed

+31
-12
lines changed

4 files changed

+31
-12
lines changed

CHANGELOG.md

+3
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212

1313
### Bug Fixes
1414

15+
- vmray: load more analysis archives @mr-tz
16+
- dynamic: only check file limitations for static file formats @mr-tz
17+
1518
### capa Explorer Web
1619

1720
### capa Explorer IDA Pro plugin

capa/features/extractors/vmray/__init__.py

+17-3
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,10 @@ class VMRayMonitorProcess:
3434
pid: int # process ID assigned by OS
3535
ppid: int # parent process ID assigned by OS
3636
monitor_id: int # unique ID assigned to process by VMRay
37+
origin_monitor_id: int # unique VMRay ID of parent process
3738
image_name: str
38-
filename: str
39-
cmd_line: str
39+
filename: Optional[str] = ""
40+
cmd_line: Optional[str] = ""
4041

4142

4243
class VMRayAnalysis:
@@ -165,6 +166,7 @@ def _compute_monitor_processes(self):
165166
process.os_pid,
166167
ppid,
167168
process.monitor_id,
169+
process.origin_monitor_id,
168170
process.image_name,
169171
process.filename,
170172
process.cmd_line,
@@ -176,6 +178,7 @@ def _compute_monitor_processes(self):
176178
monitor_process.os_pid,
177179
monitor_process.os_parent_pid,
178180
monitor_process.process_id,
181+
monitor_process.parent_id,
179182
monitor_process.image_name,
180183
monitor_process.filename,
181184
monitor_process.cmd_line,
@@ -185,7 +188,18 @@ def _compute_monitor_processes(self):
185188
self.monitor_processes[monitor_process.process_id] = vmray_monitor_process
186189
else:
187190
# we expect monitor processes recorded in both SummaryV2.json and flog.xml to equal
188-
assert self.monitor_processes[monitor_process.process_id] == vmray_monitor_process
191+
# to ensure this, we compare the pid, monitor_id, and origin_monitor_id
192+
# for the other fields we've observed cases with slight deviations, e.g.,
193+
# the ppid for a process in flog.xml is not set correctly, all other data is equal
194+
sv2p = self.monitor_processes[monitor_process.process_id]
195+
if self.monitor_processes[monitor_process.process_id] != vmray_monitor_process:
196+
logger.debug("processes differ: %s (sv2) vs. %s (flog)", sv2p, vmray_monitor_process)
197+
198+
assert (sv2p.pid, sv2p.monitor_id, sv2p.origin_monitor_id) == (
199+
vmray_monitor_process.pid,
200+
vmray_monitor_process.monitor_id,
201+
vmray_monitor_process.origin_monitor_id,
202+
)
189203

190204
def _compute_monitor_threads(self):
191205
for monitor_thread in self.flog.analysis.monitor_threads:

capa/features/extractors/vmray/models.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,7 @@ class ElfFileHeader(BaseModel):
276276

277277
class ElfFile(BaseModel):
278278
# file_header: ElfFileHeader
279-
sections: list[ElfFileSection]
279+
sections: list[ElfFileSection] = []
280280

281281

282282
class StaticData(BaseModel):
@@ -314,10 +314,11 @@ class Process(BaseModel):
314314
# is_ioc: bool
315315
monitor_id: int
316316
# monitor_reason: str
317+
origin_monitor_id: int # VMRay ID of parent process
317318
os_pid: int
318-
filename: SanitizedString
319+
filename: Optional[SanitizedString] = ""
319320
image_name: str
320-
cmd_line: SanitizedString
321+
cmd_line: Optional[SanitizedString] = ""
321322
ref_parent_process: Optional[GenericReference] = None
322323

323324

capa/main.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -748,15 +748,13 @@ def find_file_limitations_from_cli(args, rules: RuleSet, file_extractors: list[F
748748
args:
749749
args: The parsed command line arguments from `install_common_args`.
750750
751+
Dynamic feature extractors can handle packed samples and do not need to be considered here.
752+
751753
raises:
752754
ShouldExitError: if the program is invoked incorrectly and should exit.
753755
"""
754756
found_file_limitation = False
755757
for file_extractor in file_extractors:
756-
if isinstance(file_extractor, DynamicFeatureExtractor):
757-
# Dynamic feature extractors can handle packed samples
758-
continue
759-
760758
try:
761759
pure_file_capabilities, _ = find_file_capabilities(rules, file_extractor, {})
762760
except PEFormatError as e:
@@ -962,8 +960,11 @@ def main(argv: Optional[list[str]] = None):
962960
ensure_input_exists_from_cli(args)
963961
input_format = get_input_format_from_cli(args)
964962
rules = get_rules_from_cli(args)
965-
file_extractors = get_file_extractors_from_cli(args, input_format)
966-
found_file_limitation = find_file_limitations_from_cli(args, rules, file_extractors)
963+
found_file_limitation = False
964+
if input_format in STATIC_FORMATS:
965+
# only static extractors have file limitations
966+
file_extractors = get_file_extractors_from_cli(args, input_format)
967+
found_file_limitation = find_file_limitations_from_cli(args, rules, file_extractors)
967968
except ShouldExitError as e:
968969
return e.status_code
969970

0 commit comments

Comments
 (0)