-
Notifications
You must be signed in to change notification settings - Fork 566
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add initial script parsing VMRay flog.txt files
ref #2452
- Loading branch information
1 parent
6a12ab8
commit 4261ee5
Showing
1 changed file
with
178 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,178 @@ | ||
import sys | ||
import logging | ||
from typing import Literal, Optional | ||
from pathlib import Path | ||
|
||
from pydantic import BeforeValidator | ||
from typing_extensions import Annotated | ||
from pydantic.dataclasses import dataclass | ||
|
||
HexInt = Annotated[int, BeforeValidator(lambda v: int(v.strip('"'), 0x10))] | ||
QuotedInt = Annotated[int, BeforeValidator(lambda v: int(v.strip('"')))] | ||
QuotedString = Annotated[str, BeforeValidator(lambda v: v.strip('"'))] | ||
|
||
|
||
logger = logging.getLogger("vmray.flog") | ||
|
||
|
||
@dataclass | ||
class Region: | ||
id: QuotedInt | ||
start_va: HexInt | ||
end_va: HexInt | ||
monitored: bool | ||
entry_point: HexInt | ||
region_type: Literal["private"] | Literal["mapped_file"] | Literal["pagefile_backed"] | ||
name: QuotedString | ||
filename: str | ||
|
||
|
||
@dataclass | ||
class Event: | ||
timestamp: tuple[int, int] | ||
api: str | ||
args: str | ||
rv: Optional[int] | ||
|
||
|
||
@dataclass | ||
class Thread: | ||
id: QuotedInt | ||
os_tid: HexInt | ||
events: list[Event] | ||
|
||
|
||
@dataclass | ||
class Process: | ||
id: QuotedInt | ||
image_name: QuotedString | ||
filename: QuotedString | ||
page_root: HexInt | ||
os_pid: HexInt | ||
os_integrity_level: HexInt | ||
os_privileges: HexInt | ||
monitor_reason: Literal['"analysis_target"'] | Literal['"rpc_server"'] | ||
parent_id: HexInt | ||
os_parent_pid: HexInt | ||
cmd_line: str # TODO: json decode str | ||
cur_dir: str # TODO: json decode str | ||
os_username: str # TODO: json decode str | ||
bitness: QuotedInt # TODO: enum 32 or 64 | ||
os_groups: str # TODO: list of str | ||
regions: list[Region] | ||
threads: list[Thread] | ||
|
||
|
||
logging.basicConfig(level=logging.DEBUG) | ||
|
||
flog_path = Path(sys.argv[1]) | ||
flog = flog_path.read_text(encoding="utf-8") | ||
|
||
lines = flog.splitlines() | ||
|
||
# file may start with: ef bb bf | ||
assert "# Flog Txt Version 1" in lines[0] | ||
|
||
for line in lines[1:]: | ||
line = line.strip() | ||
if not line.startswith("#"): | ||
break | ||
|
||
# metadata lines, like: | ||
# | ||
# Flog Txt Version 1 | ||
# Analyzer Version: 2024.4.1 | ||
# Analyzer Build Date: Sep 2 2024 06:30:10 | ||
# Log Creation Date: 08.10.2024 18:12:03.945c | ||
logger.debug("%s", line) | ||
|
||
|
||
processes = [] | ||
for process in flog.split("\nProcess:\n")[1:]: | ||
process_kwargs = {} | ||
for line in process.splitlines(): | ||
# parse until the first empty line | ||
if not line.strip(): | ||
break | ||
|
||
key, _, value = line.lstrip().partition(" = ") | ||
process_kwargs[key] = value | ||
|
||
regions = [] | ||
for region in process.split("\nRegion:\n")[1:]: | ||
region_kwargs = {} | ||
for line in region.splitlines(): | ||
# parse until the first empty line | ||
if not line.strip(): | ||
break | ||
|
||
key, _, value = line.lstrip().partition(" = ") | ||
region_kwargs[key] = value | ||
|
||
regions.append(Region(**region_kwargs)) | ||
|
||
threads = [] | ||
for thread in process.split("\nThread:\n")[1:]: | ||
|
||
thread_kwargs = {} | ||
for line in thread.splitlines(): | ||
# parse until the first empty line | ||
if not line.strip(): | ||
break | ||
|
||
key, _, value = line.lstrip().partition(" = ") | ||
thread_kwargs[key] = value | ||
|
||
events = [] | ||
for line in thread.splitlines(): | ||
if not line.startswith("\t["): | ||
continue | ||
|
||
numbers, _, rest = line.lstrip()[1:].partition("] ") | ||
major, _, minor = numbers.partition(".") | ||
major = int(major.lstrip("0") or "0") | ||
minor = int(minor.lstrip("0") or "0") | ||
timestamp = (major, minor) | ||
|
||
api, _, rest = rest.partition(" (") | ||
args, _, rest = rest.rpartition(")") | ||
|
||
if " returned " in rest: | ||
_, _, rv = rest.partition(" returned ") | ||
rv = int(rv, 0x10) | ||
else: | ||
rv = None | ||
|
||
events.append( | ||
Event( | ||
timestamp=timestamp, | ||
api=api, | ||
args=args, | ||
rv=rv, | ||
) | ||
) | ||
|
||
threads.append( | ||
Thread( | ||
events=events, | ||
**thread_kwargs, | ||
) | ||
) | ||
|
||
processes.append( | ||
Process( | ||
regions=regions, | ||
threads=threads, | ||
**process_kwargs, | ||
) | ||
) | ||
|
||
|
||
for process in processes: | ||
print(f"{process.id=} {len(process.regions)=} {len(process.threads)=}") | ||
|
||
for region in process.regions: | ||
print(f" {region.id=} {region.name}") | ||
|
||
for thread in process.threads: | ||
print(f" {thread.id=} {len(thread.events)=}") |