Skip to content

Commit 65b19ad

Browse files
add initial script parsing VMRay flog.txt files
ref #2452
1 parent 6a12ab8 commit 65b19ad

File tree

1 file changed

+291
-0
lines changed

1 file changed

+291
-0
lines changed

scripts/parse-vmray-flog.py

+291
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,291 @@
1+
import sys
2+
import logging
3+
from typing import Any, Literal, Optional
4+
from pathlib import Path
5+
6+
from pydantic import BeforeValidator
7+
from typing_extensions import Annotated
8+
from pydantic.dataclasses import dataclass
9+
10+
HexInt = Annotated[int, BeforeValidator(lambda v: int(v.strip('"'), 0x10))]
11+
QuotedInt = Annotated[int, BeforeValidator(lambda v: int(v.strip('"')))]
12+
QuotedString = Annotated[str, BeforeValidator(lambda v: v.strip('"'))]
13+
14+
15+
logger = logging.getLogger("vmray.flog")
16+
17+
18+
@dataclass
19+
class Region:
20+
id: QuotedInt
21+
start_va: HexInt
22+
end_va: HexInt
23+
monitored: bool
24+
entry_point: HexInt
25+
region_type: Literal["private"] | Literal["mapped_file"] | Literal["pagefile_backed"]
26+
name: QuotedString
27+
filename: str
28+
29+
30+
@dataclass
31+
class Event:
32+
timestamp: tuple[int, int]
33+
api: str
34+
args: str
35+
rv: Optional[int]
36+
37+
38+
@dataclass
39+
class Thread:
40+
id: QuotedInt
41+
os_tid: HexInt
42+
events: list[Event]
43+
44+
45+
@dataclass
46+
class Process:
47+
id: QuotedInt
48+
image_name: QuotedString
49+
filename: QuotedString
50+
page_root: HexInt
51+
os_pid: HexInt
52+
os_integrity_level: HexInt
53+
os_privileges: HexInt
54+
monitor_reason: Literal['"analysis_target"'] | Literal['"rpc_server"']
55+
parent_id: HexInt
56+
os_parent_pid: HexInt
57+
cmd_line: str # TODO: json decode str
58+
cur_dir: str # TODO: json decode str
59+
os_username: str # TODO: json decode str
60+
bitness: QuotedInt # TODO: enum 32 or 64
61+
os_groups: str # TODO: list of str
62+
regions: list[Region]
63+
threads: list[Thread]
64+
65+
66+
@dataclass
67+
class Flog:
68+
processes: list[Process]
69+
70+
processes_by_id: dict[int, Process]
71+
regions_by_id: dict[int, Region]
72+
threads_by_id: dict[int, Thread]
73+
74+
75+
def parse_properties(txt: str) -> dict[str, Any]:
76+
properties = {}
77+
for line in txt.partition("\n\n")[0].splitlines():
78+
key, _, value = line.lstrip().partition(" = ")
79+
properties[key] = value
80+
81+
return properties
82+
83+
84+
def parse_region(txt: str) -> Region:
85+
# like:
86+
#
87+
# Region:
88+
# id = 125
89+
# start_va = 0x10000
90+
# end_va = 0x2ffff
91+
# monitored = 1
92+
# entry_point = 0x0
93+
# region_type = private
94+
# name = "private_0x0000000000010000"
95+
# filename = ""
96+
region_kwargs = parse_properties(txt)
97+
return Region(**region_kwargs)
98+
99+
100+
def parse_event(line: str) -> Event:
101+
# like:
102+
#
103+
# [0066.433] CoInitializeEx (pvReserved=0x0, dwCoInit=0x2) returned 0x0
104+
# [0071.184] RegisterClipboardFormatW (lpszFormat="WM_GETCONTROLTYPE") returned 0xc1dc
105+
# [0072.750] GetCurrentProcess () returned 0xffffffffffffffff
106+
numbers, _, rest = line.lstrip()[1:].partition("] ")
107+
major, _, minor = numbers.partition(".")
108+
majori = int(major.lstrip("0") or "0")
109+
minori = int(minor.lstrip("0") or "0")
110+
timestamp = (majori, minori)
111+
112+
api, _, rest = rest.partition(" (")
113+
args, _, rest = rest.rpartition(")")
114+
115+
if " returned " in rest:
116+
_, _, rvs = rest.partition(" returned ")
117+
rv = int(rvs, 0x10)
118+
else:
119+
rv = None
120+
121+
return Event(
122+
timestamp=timestamp,
123+
api=api,
124+
args=args,
125+
rv=rv,
126+
)
127+
128+
129+
def parse_thread(txt: str) -> Thread:
130+
# like:
131+
#
132+
# Thread:
133+
# id = 1
134+
# os_tid = 0x117c
135+
#
136+
# [0066.433] CoInitializeEx (pvReserved=0x0, dwCoInit=0x2) returned 0x0
137+
# [0071.184] RegisterClipboardFormatW (lpszFormat="WM_GETCONTROLTYPE") returned 0xc1dc
138+
# [0072.750] GetCurrentProcess () returned 0xffffffffffffffff
139+
thread_kwargs = parse_properties(txt)
140+
141+
events = []
142+
for line in txt.splitlines():
143+
if not line.startswith("\t["):
144+
continue
145+
146+
events.append(parse_event(line))
147+
148+
return Thread(
149+
events=events,
150+
**thread_kwargs,
151+
)
152+
153+
154+
def parse_process(txt: str) -> Process:
155+
# properties look like:
156+
#
157+
# id = "1"
158+
# image_name = "svchost.exe"
159+
# filename = "c:\\users\\rdhj0cnfevzx\\desktop\\svchost.exe"
160+
# page_root = "0x751fc000"
161+
# os_pid = "0x118c"
162+
# os_integrity_level = "0x3000"
163+
# os_privileges = "0x60800000"
164+
# monitor_reason = "analysis_target"
165+
# parent_id = "0"
166+
# os_parent_pid = "0x7d8"
167+
# cmd_line = "\"c:\\users\\rdhj0cnfevzx\\desktop\\svchost.exe\" "
168+
# cur_dir = "c:\\users\\rdhj0cnfevzx\\desktop\\"
169+
# os_username = "xc64zb\\rdhj0cnfevzx"
170+
# bitness = "32"
171+
# os_groups = "xc64zb\\domain users" [0x7], "everyone" [0x7], ...
172+
process_kwargs = parse_properties(txt)
173+
174+
regions = []
175+
for region in txt.split("\nRegion:\n")[1:]:
176+
regions.append(parse_region(region))
177+
178+
threads = []
179+
for thread in txt.split("\nThread:\n")[1:]:
180+
threads.append(parse_thread(thread))
181+
182+
return Process(
183+
regions=regions,
184+
threads=threads,
185+
**process_kwargs,
186+
)
187+
188+
189+
def parse_processes(txt: str) -> list[Process]:
190+
processes = []
191+
for process in txt.split("\nProcess:\n")[1:]:
192+
processes.append(parse_process(process))
193+
return processes
194+
195+
196+
def parse_flog(txt: str) -> Flog:
197+
# the header probably fits within this size
198+
header_lines = txt[:512].splitlines()
199+
200+
# file may start with: | ef bb bf |
201+
assert "# Flog Txt Version 1" in header_lines[0]
202+
203+
for line in header_lines[1:]:
204+
line = line.strip()
205+
if not line.startswith("#"):
206+
break
207+
208+
# metadata lines, like:
209+
#
210+
# Flog Txt Version 1
211+
# Analyzer Version: 2024.4.1
212+
# Analyzer Build Date: Sep 2 2024 06:30:10
213+
# Log Creation Date: 08.10.2024 18:12:03.945c
214+
logger.debug("%s", line)
215+
216+
processes = parse_processes(txt)
217+
processes_by_id = {process.id: process for process in processes}
218+
regions_by_id = {region.id: region for process in processes for region in process.regions}
219+
threads_by_id = {thread.id: thread for process in processes for thread in process.threads}
220+
221+
return Flog(
222+
processes=processes,
223+
processes_by_id=processes_by_id,
224+
regions_by_id=regions_by_id,
225+
threads_by_id=threads_by_id,
226+
)
227+
228+
229+
if __name__ == "__main__":
230+
logging.basicConfig(level=logging.DEBUG)
231+
232+
flog_path = Path(sys.argv[1])
233+
flog = parse_flog(flog_path.read_text(encoding="utf-8"))
234+
235+
for process in flog.processes:
236+
print(f"{process.id=} {len(process.regions)=} {len(process.threads)=}")
237+
238+
for region in process.regions:
239+
print(f" {region.id=} {region.name}")
240+
241+
for thread in process.threads:
242+
print(f" {thread.id=} {len(thread.events)=}")
243+
244+
245+
def test_event_timestamp():
246+
event = parse_event(" [0072.750] GetCurrentProcess () returned 0xffffffffffffffff")
247+
assert event.timestamp == (72, 750)
248+
249+
250+
def test_event_api():
251+
event = parse_event(" [0072.750] GetCurrentProcess () returned 0xffffffffffffffff")
252+
assert event.api == "GetCurrentProcess"
253+
254+
255+
def test_event_empty_args():
256+
event = parse_event(" [0072.750] GetCurrentProcess () returned 0xffffffffffffffff")
257+
assert len(event.args) == 0
258+
259+
260+
# single arg
261+
# [0074.875] GetSystemMetrics (nIndex=75) returned 1
262+
263+
# no return value
264+
# [0083.567] CoTaskMemFree (pv=0x746aa0)
265+
266+
# two args
267+
# [0085.491] GetWindowLongPtrW (hWnd=0x401f0, nIndex=-16) returned 0x6c10000
268+
269+
# in/out
270+
# [0086.848] GetClientRect (in: hWnd=0x401f0, lpRect=0x14d0c0 | out: lpRect=0x14d0c0) returned 1
271+
272+
# string
273+
# [0102.753] FindAtomW (lpString="GDI+Atom_4492_1") returned 0xc000
274+
275+
# int (hex)
276+
# [0102.756] GdipDeleteFont (font=0x1c504e00) returned 0x0
277+
278+
# int (decimal)
279+
# [0074.875] GetSystemMetrics (nIndex=75) returned 1
280+
281+
# int (negative)
282+
# [0085.491] GetWindowLongPtrW (hWnd=0x401f0, nIndex=-16) returned 0x6c10000
283+
284+
# struct
285+
# [0067.024] GetVersionExW (in: lpVersionInformation=0x14e3f0*(dwOSVersionInfoSize=0x114, dwMajorVersion=0x0, dwMinorVersion=0x0, dwBuildNumber=0x0, dwPlatformId=0x0, szCSDVersion="") | out: lpVersionInformation=0x14e3f0*(dwOSVersionInfoSize=0x114, dwMajorVersion=0x6, dwMinorVersion=0x2, dwBuildNumber=0x23f0, dwPlatformId=0x2, szCSDVersion="")) returned 1
286+
287+
# nested struct
288+
# [0111.527] CoCreateGuid (in: pguid=0x14c910 | out: pguid=0x14c910*(Data1=0x63ac5b46, Data2=0xc417, Data3=0x49b0, Data4=([0]=0xac, [1]=0xbf, [2]=0xb8, [3]=0xf3, [4]=0x8b, [5]=0x1a, [6]=0x51, [7]=0x78))) returned 0x0
289+
290+
# bytes
291+
# [0111.527] CoCreateGuid (in: pguid=0x14c910 | out: pguid=0x14c910*(Data1=0x63ac5b46, Data2=0xc417, Data3=0x49b0, Data4=([0]=0xac, [1]=0xbf, [2]=0xb8, [3]=0xf3, [4]=0x8b, [5]=0x1a, [6]=0x51, [7]=0x78))) returned 0x0

0 commit comments

Comments
 (0)