Skip to content

Commit

Permalink
Improve vmdk extent descriptor parsing (2) (#47)
Browse files Browse the repository at this point in the history
  • Loading branch information
JSCU-CNI authored Nov 28, 2024
1 parent 50464c1 commit 54a733b
Show file tree
Hide file tree
Showing 2 changed files with 270 additions and 34 deletions.
123 changes: 90 additions & 33 deletions dissect/hypervisor/disk/vmdk.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
from __future__ import annotations

import ctypes
import io
import logging
import os
import re
import textwrap
import zlib
from bisect import bisect_right
from dataclasses import dataclass
from functools import lru_cache
from pathlib import Path

Expand Down Expand Up @@ -59,13 +63,13 @@ def __init__(self, fh):
if self.descriptor.attr["parentCID"] != "ffffffff":
self.parent = open_parent(path.parent, self.descriptor.attr["parentFileNameHint"])

for _, size, extent_type, filename in self.descriptor.extents:
if extent_type in ["SPARSE", "VMFSSPARSE", "SESPARSE"]:
sdisk_fh = path.with_name(filename).open("rb")
for extent in self.descriptor.extents:
if extent.type in ["SPARSE", "VMFSSPARSE", "SESPARSE"]:
sdisk_fh = path.with_name(extent.filename).open("rb")
self.disks.append(SparseDisk(sdisk_fh, parent=self.parent))
elif extent_type in ["VMFS", "FLAT"]:
rdisk_fh = path.with_name(filename).open("rb")
self.disks.append(RawDisk(rdisk_fh, size * SECTOR_SIZE))
elif extent.type in ["VMFS", "FLAT"]:
rdisk_fh = path.with_name(extent.filename).open("rb")
self.disks.append(RawDisk(rdisk_fh, extent.sectors * SECTOR_SIZE))

elif magic in (COWD_MAGIC, VMDK_MAGIC, SESPARSE_MAGIC):
sparse_disk = SparseDisk(fh)
Expand Down Expand Up @@ -398,18 +402,69 @@ def __getattr__(self, attr):
return getattr(self.hdr, attr)


RE_EXTENT_DESCRIPTOR = re.compile(
r"""
^
(?P<access_mode>RW|RDONLY|NOACCESS)\s
(?P<sectors>\d+)\s
(?P<type>SPARSE|ZERO|FLAT|VMFS|VMFSSPARSE|VMFSRDM|VMFSRAW)
(\s(?P<filename>\".+\"))?
(\s(?P<start_sector>\d+))?
(\s(?P<partition_uuid>\S+))?
(\s(?P<device_identifier>\S+))?
$
""",
re.VERBOSE,
)


@dataclass
class ExtentDescriptor:
raw: str
access_mode: str
sectors: int
type: str
filename: str | None = None
start_sector: int | None = None
partition_uuid: str | None = None
device_identifier: str | None = None

def __post_init__(self) -> None:
self.sectors = int(self.sectors)

if self.filename:
self.filename = self.filename.strip('"')

if self.start_sector:
self.start_sector = int(self.start_sector)

def __repr__(self) -> str:
return f"<ExtentDescriptor {self.raw}>"

def __str__(self) -> str:
return self.raw


class DiskDescriptor:
def __init__(self, attr, extents, disk_db, sectors, raw_config=None):
def __init__(
self, attr: dict, extents: list[ExtentDescriptor], disk_db: dict, sectors: int, raw_config: str | None = None
):
self.attr = attr
self.extents = extents
self.ddb = disk_db
self.sectors = sectors
self.raw = raw_config

@classmethod
def parse(cls, vmdk_config):
def parse(cls, vmdk_config: str) -> DiskDescriptor:
"""Return :class:`DiskDescriptor` based on the provided ``vmdk_config``.
Resources:
- https://github.com/libyal/libvmdk/blob/main/documentation/VMWare%20Virtual%20Disk%20Format%20(VMDK).asciidoc
""" # noqa: E501

descriptor_settings = {}
extents = []
extents: list[ExtentDescriptor] = []
disk_db = {}
sectors = 0

Expand All @@ -420,11 +475,15 @@ def parse(cls, vmdk_config):
continue

if line.startswith("RW ") or line.startswith("RDONLY ") or line.startswith("NOACCESS "):
access_type, size, extent_type, filename = line.split(" ", 3)
filename = filename.strip('"')
size = int(size)
sectors += size
extents.append([access_type, size, extent_type, filename])
match = RE_EXTENT_DESCRIPTOR.search(line)

if not match:
log.warning("Unexpected ExtentDescriptor format in vmdk config: %s, ignoring", line)
continue

extent = ExtentDescriptor(raw=line, **match.groupdict())
sectors += extent.sectors
extents.append(extent)
continue

setting, _, value = line.partition("=")
Expand All @@ -438,35 +497,33 @@ def parse(cls, vmdk_config):

return cls(descriptor_settings, extents, disk_db, sectors, vmdk_config)

def __str__(self):
str_template = """\
# Disk DescriptorFile
version=1
{}
def __str__(self) -> str:
str_template = textwrap.dedent(
"""\
# Disk DescriptorFile
version=1
{}
# Extent Description
{}
# Extent Description
{}
# The Disk Data Base
#DDB
# The Disk Data Base
#DDB
{}"""
)

{}"""
str_template = textwrap.dedent(str_template)
descriptor_settings = []
for setting, value in self.attr.items():
if setting == "version":
continue
descriptor_settings.append("{}={}".format(setting, value))
if setting != "version":
descriptor_settings.append(f"{setting}={value}")
descriptor_settings = "\n".join(descriptor_settings)

extents = []
for access_type, size, extent_type, filename in self.extents:
extents.append('{} {} {} "{}"'.format(access_type, size, extent_type, filename))
extents = "\n".join(extents)
extents = "\n".join(map(str, self.extents))

disk_db = []
for setting, value in self.ddb.items():
disk_db.append('{} = "{}"'.format(setting, value))
disk_db.append(f'{setting} = "{value}"')
disk_db = "\n".join(disk_db)

return str_template.format(descriptor_settings, extents, disk_db)
Expand Down
181 changes: 180 additions & 1 deletion tests/test_vmdk.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import pytest

from dissect.hypervisor.disk.c_vmdk import c_vmdk
from dissect.hypervisor.disk.vmdk import VMDK
from dissect.hypervisor.disk.vmdk import VMDK, DiskDescriptor, ExtentDescriptor


def test_vmdk_sesparse(sesparse_vmdk):
Expand All @@ -18,3 +20,180 @@ def test_vmdk_sesparse(sesparse_vmdk):
assert header.version == 0x200000001

assert vmdk.read(0x1000000) == b"a" * 0x1000000


@pytest.mark.parametrize(
"extent_description, expected_extents",
[
(
'RW 123456789 SPARSE "disk.vmdk"',
[
ExtentDescriptor(
raw='RW 123456789 SPARSE "disk.vmdk"',
access_mode="RW",
sectors=123456789,
type="SPARSE",
filename='"disk.vmdk"',
start_sector=None,
partition_uuid=None,
device_identifier=None,
),
],
),
(
'RW 123456789 FLAT "disk-flat.vmdk" 0',
[
ExtentDescriptor(
raw='RW 123456789 FLAT "disk-flat.vmdk" 0',
access_mode="RW",
sectors=123456789,
type="FLAT",
filename='"disk-flat.vmdk"',
start_sector=0,
partition_uuid=None,
device_identifier=None,
)
],
),
(
"RDONLY 0 ZERO",
[
ExtentDescriptor(
raw="RDONLY 0 ZERO",
access_mode="RDONLY",
sectors=0,
type="ZERO",
),
],
),
(
'NOACCESS 123456789 SPARSE "disk-sparse.vmdk" 123 partition-uuid device-id',
[
ExtentDescriptor(
raw='NOACCESS 123456789 SPARSE "disk-sparse.vmdk" 123 partition-uuid device-id',
access_mode="NOACCESS",
sectors=123456789,
type="SPARSE",
filename='"disk-sparse.vmdk"',
start_sector=123,
partition_uuid="partition-uuid",
device_identifier="device-id",
),
],
),
("RW 1234567890", []),
('RDONLY "file.vmdk"', []),
("NOACCESS", []),
(
'RW 1234567890 SPARSE "disk with spaces.vmdk"',
[
ExtentDescriptor(
raw='RW 1234567890 SPARSE "disk with spaces.vmdk"',
access_mode="RW",
sectors=1234567890,
type="SPARSE",
filename='"disk with spaces.vmdk"',
start_sector=None,
partition_uuid=None,
device_identifier=None,
)
],
),
(
'RW 1234567890 SPARSE "disk with spaces.vmdk" 123',
[
ExtentDescriptor(
raw='RW 1234567890 SPARSE "disk with spaces.vmdk" 123',
access_mode="RW",
sectors=1234567890,
type="SPARSE",
filename='"disk with spaces.vmdk"',
start_sector=123,
partition_uuid=None,
device_identifier=None,
)
],
),
(
'RW 1234567890 SPARSE "disk with spaces.vmdk" 123 part-uuid',
[
ExtentDescriptor(
raw='RW 1234567890 SPARSE "disk with spaces.vmdk" 123 part-uuid',
access_mode="RW",
sectors=1234567890,
type="SPARSE",
filename='"disk with spaces.vmdk"',
start_sector=123,
partition_uuid="part-uuid",
device_identifier=None,
)
],
),
(
'RW 1234567890 SPARSE "disk with spaces.vmdk" 123 part-uuid device-id',
[
ExtentDescriptor(
raw='RW 1234567890 SPARSE "disk with spaces.vmdk" 123 part-uuid device-id',
access_mode="RW",
sectors=1234567890,
type="SPARSE",
filename='"disk with spaces.vmdk"',
start_sector=123,
partition_uuid="part-uuid",
device_identifier="device-id",
)
],
),
(
r'RW 16777216 SPARSE "this is an example "\' diskëäô:)\\\'`\foo.vmdk" 123',
[
ExtentDescriptor(
raw=r'RW 16777216 SPARSE "this is an example "\' diskëäô:)\\\'`\foo.vmdk" 123',
access_mode="RW",
sectors=16777216,
type="SPARSE",
filename=r'"this is an example "\' diskëäô:)\\\'`\foo.vmdk"',
start_sector=123,
partition_uuid=None,
device_identifier=None,
)
],
),
(
r'RW 13371337 SPARSE "🦊 🦊 🦊.vmdk"',
[
ExtentDescriptor(
raw=r'RW 13371337 SPARSE "🦊 🦊 🦊.vmdk"',
access_mode="RW",
sectors=13371337,
type="SPARSE",
filename='"🦊 🦊 🦊.vmdk"',
)
],
),
],
ids=(
"sparse",
"flat",
"zero",
"sparse-ids",
"bad-1",
"bad-2",
"bad-3",
"spaces-four-parts",
"spaces-five-parts",
"spaces-six-parts",
"spaces-seven-parts",
"specials-five-parts",
"emoji-four-parts",
),
)
def test_vmdk_extent_description(extent_description: str, expected_extents: list) -> None:
"""test if we correctly parse VMDK sparse and flat extent descriptions.
Resources:
- https://github.com/libyal/libvmdk/blob/main/documentation/VMWare%20Virtual%20Disk%20Format%20(VMDK).asciidoc#22-extent-descriptions
""" # noqa: E501

descriptor = DiskDescriptor.parse(extent_description)
assert descriptor.extents == expected_extents

0 comments on commit 54a733b

Please sign in to comment.