|
| 1 | +import sys |
| 2 | +import logging |
| 3 | +from typing import Any, Literal, Optional |
| 4 | +from pathlib import Path |
| 5 | + |
| 6 | +from pydantic import BeforeValidator |
| 7 | +from typing_extensions import Annotated |
| 8 | +from pydantic.dataclasses import dataclass |
| 9 | + |
| 10 | +HexInt = Annotated[int, BeforeValidator(lambda v: int(v.strip('"'), 0x10))] |
| 11 | +QuotedInt = Annotated[int, BeforeValidator(lambda v: int(v.strip('"')))] |
| 12 | +QuotedString = Annotated[str, BeforeValidator(lambda v: v.strip('"'))] |
| 13 | + |
| 14 | + |
| 15 | +logger = logging.getLogger("vmray.flog") |
| 16 | + |
| 17 | + |
| 18 | +@dataclass |
| 19 | +class Region: |
| 20 | + id: QuotedInt |
| 21 | + start_va: HexInt |
| 22 | + end_va: HexInt |
| 23 | + monitored: bool |
| 24 | + entry_point: HexInt |
| 25 | + region_type: Literal["private"] | Literal["mapped_file"] | Literal["pagefile_backed"] |
| 26 | + name: QuotedString |
| 27 | + filename: str |
| 28 | + |
| 29 | + |
| 30 | +@dataclass |
| 31 | +class Event: |
| 32 | + timestamp: tuple[int, int] |
| 33 | + api: str |
| 34 | + args: str |
| 35 | + rv: Optional[int] |
| 36 | + |
| 37 | + |
| 38 | +@dataclass |
| 39 | +class Thread: |
| 40 | + id: QuotedInt |
| 41 | + os_tid: HexInt |
| 42 | + events: list[Event] |
| 43 | + |
| 44 | + |
| 45 | +@dataclass |
| 46 | +class Process: |
| 47 | + id: QuotedInt |
| 48 | + image_name: QuotedString |
| 49 | + filename: QuotedString |
| 50 | + page_root: HexInt |
| 51 | + os_pid: HexInt |
| 52 | + os_integrity_level: HexInt |
| 53 | + os_privileges: HexInt |
| 54 | + monitor_reason: Literal['"analysis_target"'] | Literal['"rpc_server"'] |
| 55 | + parent_id: HexInt |
| 56 | + os_parent_pid: HexInt |
| 57 | + cmd_line: str # TODO: json decode str |
| 58 | + cur_dir: str # TODO: json decode str |
| 59 | + os_username: str # TODO: json decode str |
| 60 | + bitness: QuotedInt # TODO: enum 32 or 64 |
| 61 | + os_groups: str # TODO: list of str |
| 62 | + regions: list[Region] |
| 63 | + threads: list[Thread] |
| 64 | + |
| 65 | + |
| 66 | +@dataclass |
| 67 | +class Flog: |
| 68 | + processes: list[Process] |
| 69 | + |
| 70 | + processes_by_id: dict[int, Process] |
| 71 | + regions_by_id: dict[int, Region] |
| 72 | + threads_by_id: dict[int, Thread] |
| 73 | + |
| 74 | + |
| 75 | +def parse_properties(txt: str) -> dict[str, Any]: |
| 76 | + properties = {} |
| 77 | + for line in txt.partition("\n\n")[0].splitlines(): |
| 78 | + key, _, value = line.lstrip().partition(" = ") |
| 79 | + properties[key] = value |
| 80 | + |
| 81 | + return properties |
| 82 | + |
| 83 | + |
| 84 | +def parse_region(txt: str) -> Region: |
| 85 | + # like: |
| 86 | + # |
| 87 | + # Region: |
| 88 | + # id = 125 |
| 89 | + # start_va = 0x10000 |
| 90 | + # end_va = 0x2ffff |
| 91 | + # monitored = 1 |
| 92 | + # entry_point = 0x0 |
| 93 | + # region_type = private |
| 94 | + # name = "private_0x0000000000010000" |
| 95 | + # filename = "" |
| 96 | + region_kwargs = parse_properties(txt) |
| 97 | + return Region(**region_kwargs) |
| 98 | + |
| 99 | + |
| 100 | +def parse_event(line: str) -> Event: |
| 101 | + # like: |
| 102 | + # |
| 103 | + # [0066.433] CoInitializeEx (pvReserved=0x0, dwCoInit=0x2) returned 0x0 |
| 104 | + # [0071.184] RegisterClipboardFormatW (lpszFormat="WM_GETCONTROLTYPE") returned 0xc1dc |
| 105 | + # [0072.750] GetCurrentProcess () returned 0xffffffffffffffff |
| 106 | + numbers, _, rest = line.lstrip()[1:].partition("] ") |
| 107 | + major, _, minor = numbers.partition(".") |
| 108 | + majori = int(major.lstrip("0") or "0") |
| 109 | + minori = int(minor.lstrip("0") or "0") |
| 110 | + timestamp = (majori, minori) |
| 111 | + |
| 112 | + api, _, rest = rest.partition(" (") |
| 113 | + args, _, rest = rest.rpartition(")") |
| 114 | + |
| 115 | + if " returned " in rest: |
| 116 | + _, _, rvs = rest.partition(" returned ") |
| 117 | + rv = int(rvs, 0x10) |
| 118 | + else: |
| 119 | + rv = None |
| 120 | + |
| 121 | + return Event( |
| 122 | + timestamp=timestamp, |
| 123 | + api=api, |
| 124 | + args=args, |
| 125 | + rv=rv, |
| 126 | + ) |
| 127 | + |
| 128 | + |
| 129 | +def parse_thread(txt: str) -> Thread: |
| 130 | + # like: |
| 131 | + # |
| 132 | + # Thread: |
| 133 | + # id = 1 |
| 134 | + # os_tid = 0x117c |
| 135 | + # |
| 136 | + # [0066.433] CoInitializeEx (pvReserved=0x0, dwCoInit=0x2) returned 0x0 |
| 137 | + # [0071.184] RegisterClipboardFormatW (lpszFormat="WM_GETCONTROLTYPE") returned 0xc1dc |
| 138 | + # [0072.750] GetCurrentProcess () returned 0xffffffffffffffff |
| 139 | + thread_kwargs = parse_properties(txt) |
| 140 | + |
| 141 | + events = [] |
| 142 | + for line in txt.splitlines(): |
| 143 | + if not line.startswith("\t["): |
| 144 | + continue |
| 145 | + |
| 146 | + events.append(parse_event(line)) |
| 147 | + |
| 148 | + return Thread( |
| 149 | + events=events, |
| 150 | + **thread_kwargs, |
| 151 | + ) |
| 152 | + |
| 153 | + |
| 154 | +def parse_process(txt: str) -> Process: |
| 155 | + # properties look like: |
| 156 | + # |
| 157 | + # id = "1" |
| 158 | + # image_name = "svchost.exe" |
| 159 | + # filename = "c:\\users\\rdhj0cnfevzx\\desktop\\svchost.exe" |
| 160 | + # page_root = "0x751fc000" |
| 161 | + # os_pid = "0x118c" |
| 162 | + # os_integrity_level = "0x3000" |
| 163 | + # os_privileges = "0x60800000" |
| 164 | + # monitor_reason = "analysis_target" |
| 165 | + # parent_id = "0" |
| 166 | + # os_parent_pid = "0x7d8" |
| 167 | + # cmd_line = "\"c:\\users\\rdhj0cnfevzx\\desktop\\svchost.exe\" " |
| 168 | + # cur_dir = "c:\\users\\rdhj0cnfevzx\\desktop\\" |
| 169 | + # os_username = "xc64zb\\rdhj0cnfevzx" |
| 170 | + # bitness = "32" |
| 171 | + # os_groups = "xc64zb\\domain users" [0x7], "everyone" [0x7], ... |
| 172 | + process_kwargs = parse_properties(txt) |
| 173 | + |
| 174 | + regions = [] |
| 175 | + for region in txt.split("\nRegion:\n")[1:]: |
| 176 | + regions.append(parse_region(region)) |
| 177 | + |
| 178 | + threads = [] |
| 179 | + for thread in txt.split("\nThread:\n")[1:]: |
| 180 | + threads.append(parse_thread(thread)) |
| 181 | + |
| 182 | + return Process( |
| 183 | + regions=regions, |
| 184 | + threads=threads, |
| 185 | + **process_kwargs, |
| 186 | + ) |
| 187 | + |
| 188 | + |
| 189 | +def parse_processes(txt: str) -> list[Process]: |
| 190 | + processes = [] |
| 191 | + for process in txt.split("\nProcess:\n")[1:]: |
| 192 | + processes.append(parse_process(process)) |
| 193 | + return processes |
| 194 | + |
| 195 | + |
| 196 | +def parse_flog(txt: str) -> Flog: |
| 197 | + # the header probably fits within this size |
| 198 | + header_lines = txt[:512].splitlines() |
| 199 | + |
| 200 | + # file may start with: | ef bb bf | |
| 201 | + assert "# Flog Txt Version 1" in header_lines[0] |
| 202 | + |
| 203 | + for line in header_lines[1:]: |
| 204 | + line = line.strip() |
| 205 | + if not line.startswith("#"): |
| 206 | + break |
| 207 | + |
| 208 | + # metadata lines, like: |
| 209 | + # |
| 210 | + # Flog Txt Version 1 |
| 211 | + # Analyzer Version: 2024.4.1 |
| 212 | + # Analyzer Build Date: Sep 2 2024 06:30:10 |
| 213 | + # Log Creation Date: 08.10.2024 18:12:03.945c |
| 214 | + logger.debug("%s", line) |
| 215 | + |
| 216 | + processes = parse_processes(txt) |
| 217 | + processes_by_id = {process.id: process for process in processes} |
| 218 | + regions_by_id = {region.id: region for process in processes for region in process.regions} |
| 219 | + threads_by_id = {thread.id: thread for process in processes for thread in process.threads} |
| 220 | + |
| 221 | + return Flog( |
| 222 | + processes=processes, |
| 223 | + processes_by_id=processes_by_id, |
| 224 | + regions_by_id=regions_by_id, |
| 225 | + threads_by_id=threads_by_id, |
| 226 | + ) |
| 227 | + |
| 228 | + |
| 229 | +if __name__ == "__main__": |
| 230 | + logging.basicConfig(level=logging.DEBUG) |
| 231 | + |
| 232 | + flog_path = Path(sys.argv[1]) |
| 233 | + flog = parse_flog(flog_path.read_text(encoding="utf-8")) |
| 234 | + |
| 235 | + for process in flog.processes: |
| 236 | + print(f"{process.id=} {len(process.regions)=} {len(process.threads)=}") |
| 237 | + |
| 238 | + for region in process.regions: |
| 239 | + print(f" {region.id=} {region.name}") |
| 240 | + |
| 241 | + for thread in process.threads: |
| 242 | + print(f" {thread.id=} {len(thread.events)=}") |
| 243 | + |
| 244 | + |
| 245 | +def test_event_timestamp(): |
| 246 | + event = parse_event(" [0072.750] GetCurrentProcess () returned 0xffffffffffffffff") |
| 247 | + assert event.timestamp == (72, 750) |
| 248 | + |
| 249 | + |
| 250 | +def test_event_api(): |
| 251 | + event = parse_event(" [0072.750] GetCurrentProcess () returned 0xffffffffffffffff") |
| 252 | + assert event.api == "GetCurrentProcess" |
| 253 | + |
| 254 | + |
| 255 | +def test_event_empty_args(): |
| 256 | + event = parse_event(" [0072.750] GetCurrentProcess () returned 0xffffffffffffffff") |
| 257 | + assert len(event.args) == 0 |
| 258 | + |
| 259 | + |
| 260 | +# single arg |
| 261 | +# [0074.875] GetSystemMetrics (nIndex=75) returned 1 |
| 262 | + |
| 263 | +# no return value |
| 264 | +# [0083.567] CoTaskMemFree (pv=0x746aa0) |
| 265 | + |
| 266 | +# two args |
| 267 | +# [0085.491] GetWindowLongPtrW (hWnd=0x401f0, nIndex=-16) returned 0x6c10000 |
| 268 | + |
| 269 | +# in/out |
| 270 | +# [0086.848] GetClientRect (in: hWnd=0x401f0, lpRect=0x14d0c0 | out: lpRect=0x14d0c0) returned 1 |
| 271 | + |
| 272 | +# string |
| 273 | +# [0102.753] FindAtomW (lpString="GDI+Atom_4492_1") returned 0xc000 |
| 274 | + |
| 275 | +# int (hex) |
| 276 | +# [0102.756] GdipDeleteFont (font=0x1c504e00) returned 0x0 |
| 277 | + |
| 278 | +# int (decimal) |
| 279 | +# [0074.875] GetSystemMetrics (nIndex=75) returned 1 |
| 280 | + |
| 281 | +# int (negative) |
| 282 | +# [0085.491] GetWindowLongPtrW (hWnd=0x401f0, nIndex=-16) returned 0x6c10000 |
| 283 | + |
| 284 | +# struct |
| 285 | +# [0067.024] GetVersionExW (in: lpVersionInformation=0x14e3f0*(dwOSVersionInfoSize=0x114, dwMajorVersion=0x0, dwMinorVersion=0x0, dwBuildNumber=0x0, dwPlatformId=0x0, szCSDVersion="") | out: lpVersionInformation=0x14e3f0*(dwOSVersionInfoSize=0x114, dwMajorVersion=0x6, dwMinorVersion=0x2, dwBuildNumber=0x23f0, dwPlatformId=0x2, szCSDVersion="")) returned 1 |
| 286 | + |
| 287 | +# nested struct |
| 288 | +# [0111.527] CoCreateGuid (in: pguid=0x14c910 | out: pguid=0x14c910*(Data1=0x63ac5b46, Data2=0xc417, Data3=0x49b0, Data4=([0]=0xac, [1]=0xbf, [2]=0xb8, [3]=0xf3, [4]=0x8b, [5]=0x1a, [6]=0x51, [7]=0x78))) returned 0x0 |
| 289 | + |
| 290 | +# bytes |
| 291 | +# [0111.527] CoCreateGuid (in: pguid=0x14c910 | out: pguid=0x14c910*(Data1=0x63ac5b46, Data2=0xc417, Data3=0x49b0, Data4=([0]=0xac, [1]=0xbf, [2]=0xb8, [3]=0xf3, [4]=0x8b, [5]=0x1a, [6]=0x51, [7]=0x78))) returned 0x0 |
0 commit comments