diff --git a/dissect/hypervisor/__init__.py b/dissect/hypervisor/__init__.py index 6716621..2fbf5fd 100644 --- a/dissect/hypervisor/__init__.py +++ b/dissect/hypervisor/__init__.py @@ -1,4 +1,4 @@ -from dissect.hypervisor.backup import vma, xva +from dissect.hypervisor.backup import vbk, vma, xva from dissect.hypervisor.descriptor import hyperv, ovf, pvs, vbox, vmx from dissect.hypervisor.disk import hdd, qcow2, vdi, vhd, vhdx, vmdk from dissect.hypervisor.util import envelope, vmtar @@ -10,6 +10,7 @@ "ovf", "pvs", "qcow2", + "vbk", "vbox", "vdi", "vhd", diff --git a/dissect/hypervisor/backup/c_vbk.py b/dissect/hypervisor/backup/c_vbk.py new file mode 100644 index 0000000..c27e90e --- /dev/null +++ b/dissect/hypervisor/backup/c_vbk.py @@ -0,0 +1,287 @@ +from dissect.cstruct import cstruct + +vbk_def = """ +#define PAGE_SIZE 4096 + +/* Storage header */ + +struct StorageHeader { + uint32 FormatVersion; /* 0x0000 */ + uint32 Initialized; /* 0x0004 */ + uint32 DigestTypeLength; /* 0x0008 */ + char DigestType[251]; /* 0x000C */ + uint32 SnapshotSlotFormat; /* 0x0107 format > 5 -> crc32c */ + uint32 StandardBlockSize; /* 0x010B */ + uint8 ClusterAlign; /* 0x010F */ + char Unk0[16]; /* 0x0120 */ + char ExternalStorageId[16]; /* 0x0130 */ +}; + +/* Snapshot header */ + +struct SnapshotSlotHeader { + uint32 CRC; + uint32 ContainsSnapshot; +}; + +struct DirectoryRootRecord { + int64 RootPage; /* Root page of the directory */ + uint64 Count; /* Number of children */ +}; + +struct BlocksStoreHeader { + int64 RootPage; /* Root of the blocks store */ + uint64 Count; /* Number of blocks store entries */ + int64 FreeRootPage; /* Root of the free blocks tree */ + int64 DeduplicationRootPage; /* Root of the deduplication tree */ + int64 Unk0; + int64 Unk1; +}; + +struct CryptoStoreRecord { + int64 RootPage; /* Root of the crypto store */ +}; + +struct SnapshotDescriptor { + uint64 Version; /* Acts as a sequence number, highest is active slot */ + uint64 StorageEOF; /* End of file, aka file size */ + uint32 BanksCount; /* Number of banks */ + DirectoryRootRecord DirectoryRoot; /* Directory root record */ + BlocksStoreHeader BlocksStore; /* Blocks store header */ + CryptoStoreRecord CryptoStore; /* Crypto store record */ + uint64 Unk0; + uint64 Unk1; +}; + +struct BankDescriptor { + uint32 CRC; + uint64 Offset; + uint32 Size; +}; + +struct BanksGrain { + uint32 MaxBanks; + uint32 StoredBanks; + // BankDescriptor Banks[StoredBanks]; +}; + +/* Block headers */ + +struct BankHeader { + uint16 PageCount; + uint16 Flags; + char Unk0[3064]; + uint64 Unk1; + char Unk2[1020]; +}; + +struct BankHeaderV71 { + uint16 PageCount; + uint16 Flags; /* 2 == encrypted */ + char Unk0[3072]; + char KeySetId[16]; + char Unk1[16]; + char Unk2[16]; + uint32 Unk3; + char Unk4[968]; +}; + +struct MetaBlobHeader { + int64 NextPage; + int32 Unk0; +}; + +struct Lz4BlockHeader { + uint32 Magic; /* 0xF800000F */ + uint32 CRC; /* CRC32C of the compressed data */ + uint32 SourceSize; +}; + +/* DirItem headers */ +struct BlocksVectorHeader { + uint64 RootPage; + uint64 Count; +}; + +struct SubFolderHeader { + uint64 RootPage; /* 0x94 */ + uint32 Count; /* 0x9C */ + char Data[32]; /* 0xA0 */ +}; /* 0xC0 */ + +struct ExtFibHeader { + uint16 UpdateInProgress; /* 0x94 */ + uint8 Unk3; /* 0x96 */ + uint8 Format; /* 0x97 Bit 3 == 1 */ + BlocksVectorHeader BlocksVector; /* 0x98 */ + uint64 FibSize; /* 0xA8 */ + uint64 Size; /* 0xB0 */ + uint8 FsObjAttachState; /* 0xB8 */ + char Data[7]; /* 0xB9 */ +}; /* 0xC0 */ + +struct IntFibHeader { + uint16 UpdateInProgress; /* 0x94 */ + uint8 Unk3; /* 0x96 */ + uint8 Format; /* 0x97 Bit 3 == 1 */ + BlocksVectorHeader BlocksVector; /* 0x98 */ + uint64 FibSize; /* 0xA8 */ + uint64 Size; /* 0xB0 */ + uint8 FsObjAttachState; /* 0xB8 */ + char Data[7]; /* 0xB9 */ +}; /* 0xC0 */ + +struct PatchHeader { + uint32 Unk0; /* 0x94 */ + BlocksVectorHeader BlocksVector; /* 0x98 */ + uint64 FibSize; /* 0xA8 Source file size */ + uint64 Unk4; /* 0xB0 */ + char Data[8]; /* 0xB8 */ +}; /* 0xC0 */ + +struct IncrementHeader { + uint32 Unk0; /* 0x94 */ + BlocksVectorHeader BlocksVector; /* 0x98 */ + uint64 FibSize; /* 0xA8 Original FIB size */ + uint64 Unk4; /* 0xB0 */ + char Data[8]; /* 0xB8 */ +}; /* 0xC0 */ + +enum DirItemType : uint32 { + None = 0, + SubFolder = 1, + ExtFib = 2, + IntFib = 3, + Patch = 4, + Increment = 5, +}; + +struct DirItemRecord { + DirItemType Type; /* 0x00 */ + uint32 NameLength; /* 0x04 */ + char Name[128]; /* 0x08 */ + int64 PropsRootPage; /* 0x88 */ + uint32 Unk1; /* 0x90 */ + union { /* 0x94 */ + char Data[44]; + SubFolderHeader SubFolder; + ExtFibHeader ExtFib; + IntFibHeader IntFib; + PatchHeader Patch; + IncrementHeader Increment; + }; +}; + +/* Block descriptors */ + +flag BlockFlags : uint8 { + None = 0x00, + Updated = 0x01, + CommitInProgress = 0x02, +}; + +enum BlockLocationType : uint8 { + Normal = 0x00, + Sparse = 0x01, + Reserved = 0x02, + Archived = 0x03, /* CompressedSize | (CompressionType << 32) */ + BlockInBlob = 0x04, /* BlockId? & 0x3FFFFFF | (BlobId << 26) | ((Offset >> 9) << 42) */ + BlockInBlobReserved = 0x05, /* BlockId? | 0xFFFFFFFFFC000000 */ +}; + +enum CompressionType : int8 { + Plain = -1, + RL = 2, + ZLH = 3, + ZLL = 4, + LZ4 = 7, +}; + +struct MetaTableDescriptor { + int64 RootPage; + uint64 BlockSize; + uint64 Count; +}; + +struct StgBlockDescriptor { + uint8 Format; /* Format != 4 == legacy */ + uint32 UsageCounter; + uint64 Offset; + uint32 AllocatedSize; + uint8 Deduplication; + char Digest[16]; + CompressionType CompressionType; + uint8 Unk0; + uint32 CompressedSize; + uint32 SourceSize; +}; + +struct StgBlockDescriptorV7 { + uint8 Format; /* Format != 4 == legacy */ + uint32 UsageCounter; + uint64 Offset; + uint32 AllocatedSize; + uint8 Deduplication; + char Digest[16]; + CompressionType CompressionType; + uint8 Unk0; + uint32 CompressedSize; + uint32 SourceSize; + char KeySetId[16]; +}; + +struct FibBlockDescriptor { + uint32 BlockSize; + BlockLocationType Type; + char Digest[16]; + // union { + // struct { + // uint32 ArchiveUsedSize; + // uint8 ArchiveCompressionType; + // uint8 Unk3; + // uint16 Unk4; + // } Archived; + // uint64 Offset; + // }; + uint64 BlockId; /* For performance reasons we just put a uint64 here, but this is actually a union */ + BlockFlags Flags; +}; + +struct FibBlockDescriptorV7 { + uint32 BlockSize; + BlockLocationType Type; + char Digest[16]; + // union { + // struct { + // uint32 ArchiveUsedSize; + // uint8 ArchiveCompressionType; + // uint8 Unk3; + // uint16 Unk4; + // } Archived; + // uint64 Offset; + // }; + uint64 BlockId; /* For performance reasons we just put a uint64 here, but this is actually a union */ + BlockFlags Flags; + char KeySetId[16]; +}; + +struct PatchBlockDescriptor { +}; + +struct PatchBlockDescriptorV7 { +}; + +/* Property dictionary */ + +enum PropertyType : int32 { + UInt32 = 1, + UInt64 = 2, + AString = 3, + WString = 4, + Binary = 5, + Boolean = 6, + End = -1, +}; +""" # noqa: E501 +c_vbk = cstruct() +c_vbk.load(vbk_def) diff --git a/dissect/hypervisor/backup/vbk.py b/dissect/hypervisor/backup/vbk.py new file mode 100644 index 0000000..f533eec --- /dev/null +++ b/dissect/hypervisor/backup/vbk.py @@ -0,0 +1,1044 @@ +# References: +# - Veeam extract utility +# - Veeam agent +from __future__ import annotations + +from functools import cached_property, lru_cache +from io import BytesIO +from typing import BinaryIO, Generic, Iterator, Optional, TypeVar +from zlib import crc32 + +from dissect.cstruct import Structure +from dissect.util.crc32c import crc32c +from dissect.util.stream import AlignedStream +from dissect.util.xmemoryview import xmemoryview + +try: + from lz4.block import decompress as lz4_decompress +except ImportError: + from dissect.util.compression.lz4 import decompress as lz4_decompress + +from dissect.hypervisor.backup.c_vbk import c_vbk +from dissect.hypervisor.exceptions import Error + +PAGE_SIZE = 4096 +"""VBK page size.""" + + +class VBKError(Error): + pass + + +class NotAFileError(VBKError): + pass + + +class NotADirectoryError(VBKError): + pass + + +class VBK: + """Veeam Backup (VBK) file implementation. + + Args: + fh: The file handle of the VBK file to read. + verify: Whether to verify checksums. + + References: + - CMeta + - CStgFormat + + Notes: + - **TODO**: Encryption + - **TODO**: Incrememental backups + """ + + def __init__(self, fh: BinaryIO, verify: bool = True): + self.fh = fh + + fh.seek(0) + self.header = c_vbk.StorageHeader(fh) + + self.format_version = self.header.FormatVersion + self.block_size = self.header.StandardBlockSize + + # First slot starts at PAGE_SIZE because StorageHeader is considered to be PAGE_SIZE large + self.slot1 = SnapshotSlot(self, PAGE_SIZE) + # Second slot starts at PAGE_SIZE + slot1 size + self.slot2 = SnapshotSlot(self, PAGE_SIZE + self.slot1.size) + + populated_slots = filter(lambda slot: slot.header.ContainsSnapshot, (self.slot1, self.slot2)) + + if verify: + populated_slots = filter(lambda slot: slot.verify(), populated_slots) + + if not (active_slot := max(populated_slots, key=lambda slot: slot.descriptor.Version, default=None)): + raise VBKError("No active VBK metadata slot found") + + self.active_slot: SnapshotSlot = active_slot + + self.root = RootDirectory( + self, + self.active_slot.descriptor.DirectoryRoot.RootPage, + self.active_slot.descriptor.DirectoryRoot.Count, + ) + self.block_store = MetaVector( + self, + StgBlockDescriptorV7 if self.is_v7 else StgBlockDescriptor, + self.active_slot.descriptor.BlocksStore.RootPage, + self.active_slot.descriptor.BlocksStore.Count, + ) + + def is_v7(self) -> bool: + return self.format_version == 7 or self.format_version == 0x10008 or self.format_version >= 9 + + def page(self, idx: int) -> bytes: + """Read a page from the VBK file. + + Args: + idx: The index of the page to read. + """ + return self.active_slot.page(idx) + + def read_meta_blob(self, page: int) -> bytes: + """Read a meta blob from the VBK file. + + Args: + page: The starting page number of the meta blob to read. + """ + return self.active_slot._get_meta_blob(page) + + def get(self, path: str, item: Optional[DirItem] = None) -> DirItem: + """Get a directory item from the VBK file.""" + item = item or self.root + + for part in path.split("/"): + if not part: + continue + + for entry in item.iterdir(): + if entry.name == part: + item = entry + break + else: + raise FileNotFoundError(f"File not found: {path}") + + return item + + +class SnapshotSlot: + """A snapshot slot in the VBK file. + + Args: + vbk: The VBK object that the snapshot slot is part of. + offset: The offset of the snapshot slot in the file. + + References: + - CSlotHdr + - SSnapshotDescriptor + - CSnapshotSlot + - CMetaStore + - CMetaObjs + - SMetaObjRefs + - SDirRootRec + - SBlocksStoreHdr + + Notes: + - **TODO**: Free blocks index (CFreeBlocksIndex, SFreeBlockIndexItem) + - **TODO**: Deduplication index (CDedupIndex, SDedupIndexItem) + - **TODO**: Crypto store (CCryptoStore, SCryptoStoreRec) + """ + + def __init__(self, vbk: VBK, offset: int): + self.vbk = vbk + self.offset = offset + + self.vbk.fh.seek(offset) + self.header = c_vbk.SnapshotSlotHeader(vbk.fh) + self.descriptor = None + self.grain = None + self.banks = [] + + if self.header.ContainsSnapshot: + self.descriptor = c_vbk.SnapshotDescriptor(vbk.fh) + self.grain = c_vbk.BanksGrain(vbk.fh) + + valid_max_banks = 0xF8 if self.vbk.header.SnapshotSlotFormat == 0 else 0x7F00 + + if self.grain.MaxBanks > valid_max_banks: + raise VBKError("Invalid SnapshotSlot: MaxBanks is not valid") + if self.grain.StoredBanks > self.grain.MaxBanks: + raise VBKError("Invalid SnapshotSlot: StoredBanks is greater than MaxBanks") + + self.banks = [ + Bank(self.vbk, entry.Offset, entry.Size) + for entry in c_vbk.BankDescriptor[self.grain.StoredBanks](vbk.fh) + ] + + def __repr__(self) -> str: + return f"" + + @cached_property + def size(self) -> int: + """The size of the snapshot slot in the file.""" + slot_size = len(c_vbk.SnapshotSlotHeader) + len(c_vbk.SnapshotDescriptor) + if self.header.ContainsSnapshot: + slot_size += self.grain.MaxBanks * len(c_vbk.BankDescriptor) + else: + slot_size += (0xF8 if self.vbk.header.SnapshotSlotFormat == 0 else 0x7F00) * len(c_vbk.BankDescriptor) + + if slot_size & 0xFFF: + # Round to next page boundary + slot_size = (slot_size & ~0xFFF) + PAGE_SIZE + + return slot_size + + def verify(self) -> bool: + """Verify the snapshot slot's CRC. + + Args: + crc: The CRC to verify against. + """ + if not self.header.ContainsSnapshot: + return False + + crc = crc32c if self.vbk.header.SnapshotSlotFormat > 5 else crc32 + + # Remainder of SnapshotSlotHeader + SnapshotDescriptor + BanksGrain + length = 4 + len(c_vbk.SnapshotDescriptor) + 8 + self.grain.MaxBanks * len(c_vbk.BankDescriptor) + + self.vbk.fh.seek(self.offset + 4) # Skip CRC + return crc(self.vbk.fh.read(length)) == self.header.CRC + + def page(self, page: int) -> bytes: + """Read a page from the snapshot slot. + + Args: + idx: The page number to read. + """ + return self.banks[page >> 32].page(page & 0xFFFFFFFF) + + def _get_meta_blob(self, page: int) -> bytes: + """Read a meta blob from the snapshot slot. + + A meta blob is a list of pages that are linked together. Each page has a header (``MetaBlobHeader``) with + a ``NextPage`` field that points to the next page in the blob. The last page has a ``NextPage`` field of -1. + + Args: + page: The page of the first page in the meta blob. + + References: + - CMetaBlobRW + """ + result = [] + + while page != -1: + buf = self.page(page) + result.append(buf) + + # Read the next page from the header + page = int.from_bytes(buf[:8], "little", signed=True) + + return b"".join(result) + + +class Bank: + """A bank in the snapshot slot. A bank is a collection of pages. + + Args: + vbk: The VBK object that the bank is part of. + offset: The offset of the bank in the file. + size: The size of the bank in the file. + + References: + - SBankHdr + - CBankHdrPage + """ + + def __init__(self, vbk: VBK, offset: int, size: int): + self.vbk = vbk + self.offset = offset + self.size = size + + self.vbk.fh.seek(offset) + self.header = c_vbk.BankHeader(vbk.fh) + + self.page = lru_cache(128)(self.page) + + def __repr__(self) -> str: + return f"" + + def verify(self, crc: int) -> bool: + """Verify the bank's CRC. + + Args: + crc: The CRC to verify against. + """ + crc = crc32c if self.vbk.format_version >= 12 and self.vbk.format_version != 0x10008 else crc32 + + self.vbk.fh.seek(self.offset) + return crc(self.vbk.fh.read(self.size)) == crc + + def page(self, page: int) -> bytes: + """Read a page from the bank. + + Args: + page: The page number to read. + """ + # Data starts at PAGE_SIZE from bank offset + self.vbk.fh.seek(self.offset + PAGE_SIZE + (page * PAGE_SIZE)) + return memoryview(self.vbk.fh.read(PAGE_SIZE)) + + +class MetaItem: + """Base class for value types in a meta vector.""" + + __struct__: Structure = None + + def __init__(self, vbk: VBK, buf: bytes): + self.vbk = vbk + self.buf = buf + self.entry = None + + if self.__struct__: + self.entry = self.__struct__(buf) + + @classmethod + def from_bytes(cls, vbk: VBK, buf: bytes) -> MetaItem: + return cls(vbk, buf) + + +class DirItem(MetaItem): + """Base class for directory items. + + References: + - SDirItemRec + - CDir + """ + + __struct__ = c_vbk.DirItemRecord + + def __init__(self, vbk: VBK, buf: bytes): + super().__init__(vbk, buf) + self.name = self.entry.Name[: self.entry.NameLength].decode("utf-8") + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} name={self.name!r}>" + + @classmethod + def from_bytes( + cls, vbk: VBK, buf: bytes + ) -> SubFolderItem | ExtFibItem | IntFibItem | PatchItem | IncrementItem | DirItem: + cls_map = { + c_vbk.DirItemType.SubFolder: SubFolderItem, + c_vbk.DirItemType.ExtFib: ExtFibItem, + c_vbk.DirItemType.IntFib: IntFibItem, + c_vbk.DirItemType.Patch: PatchItem, + c_vbk.DirItemType.Increment: IncrementItem, + } + + type = c_vbk.DirItemType(buf[:4]) + return cls_map.get(type, cls)(vbk, buf) + + @cached_property + def type(self) -> c_vbk.DirItemType: + """The type of the directory item.""" + return self.entry.Type + + @cached_property + def size(self) -> int: + raise VBKError(f"Size not available for {self!r}") + + @cached_property + def properties(self) -> Optional[PropertiesDictionary]: + """The properties of the directory item, if it has them.""" + if self.entry.PropsRootPage == -1: + return None + + return PropertiesDictionary(self.vbk, self.entry.PropsRootPage) + + def is_dir(self) -> bool: + """Return whether the directory item is a directory.""" + return isinstance(self, (RootDirectory, SubFolderItem)) + + def is_file(self) -> bool: + """Return whether the directory item is a file.""" + return self.is_internal_file() or self.is_external_file() + + def is_internal_file(self) -> bool: + """Return whether the directory item is an internal file.""" + return isinstance(self, IntFibItem) + + def is_external_file(self) -> bool: + """Return whether the directory item is an external file.""" + return isinstance(self, ExtFibItem) + + def listdir(self) -> dict[str, DirItem]: + """Return a dictionary of the items in the directory.""" + if not self.is_dir(): + raise NotADirectoryError(f"{self!r} is not a directory") + + return {item.name: item for item in self.iterdir()} + + def iterdir(self) -> Iterator[DirItem]: + """Iterate over the items in the directory.""" + raise NotADirectoryError(f"{self!r} is not a directory") + + def open(self) -> BinaryIO: + """Open the file for reading.""" + raise NotAFileError(f"{self!r} is not a file") + + +class RootDirectory(DirItem): + """Special directory item for the root directory. Does not actually exist in the VBK file.""" + + def __init__(self, vbk: VBK, page: int, count: int): + super().__init__(vbk, b"\x00" * len(c_vbk.DirItemRecord)) + self.name = "/" + self.root = page + self.count = count + + def __repr__(self) -> str: + return f"" + + def iterdir(self) -> Iterator[DirItem]: + yield from MetaVector(self.vbk, DirItem, self.root, self.count) + + +class SubFolderItem(DirItem): + """Directory item for a subfolder (directory type). + + References: + - CSubFolderHdr + - CFolderMeta + """ + + def __init__(self, vbk: VBK, buf: bytes): + super().__init__(vbk, buf) + self.root = self.entry.SubFolder.RootPage + self.count = self.entry.SubFolder.Count + + def __repr__(self) -> str: + return f"" + + def iterdir(self) -> Iterator[DirItem]: + yield from MetaVector(self.vbk, DirItem, self.root, self.count) + + +class ExtFibItem(DirItem): + """Directory item for an external file. + + References: + - SFibHdr + - CExtFibMeta + """ + + def __repr__(self) -> str: + return f"" + + @cached_property + def size(self) -> int: + return self.entry.ExtFib.FibSize + + +class IntFibItem(DirItem): + """Directory item for an internal file. + + References: + - SFibHdr + - CIntFibMeta + """ + + def __init__(self, vbk: VBK, buf: bytes): + super().__init__(vbk, buf) + + def __repr__(self) -> str: + return f"" + + @cached_property + def size(self) -> int: + return self.entry.IntFib.FibSize + + def open(self) -> FibStream: + return FibStream( + self.vbk, + self.entry.IntFib.BlocksVector.RootPage, + self.entry.IntFib.BlocksVector.Count, + self.size, + ) + + +class PatchItem(DirItem): + """Directory item for a patch. + + Notes: + - **TODO**: SPatchHdr + - **TODO**: CPatchMeta + """ + + @cached_property + def size(self) -> int: + return self.entry.Patch.FibSize + + def __repr__(self) -> str: + return f"" + + +class IncrementItem(DirItem): + """Directory item for an increment. + + Notes: + - **TODO**: SIncrementHdr + - **TODO**: CIncrementMeta + """ + + @cached_property + def size(self) -> int: + return self.entry.Increment.FibSize + + def __repr__(self) -> str: + return f"" + + +class MetaTableDescriptor(MetaItem): + """A descriptor for a meta table in the VBK file. + + References: + - SMetaTableDescriptor + """ + + __struct__ = c_vbk.MetaTableDescriptor + + def __repr__(self) -> str: + return f"" + + @cached_property + def page(self) -> int: + """The page number of the first page in the meta table.""" + return self.entry.RootPage + + @cached_property + def block_size(self) -> int: + """The block size of the meta table.""" + return self.entry.BlockSize + + @cached_property + def count(self) -> int: + """The number of entries in the meta table.""" + return self.entry.Count + + +class FibBlockDescriptor(MetaItem): + """A descriptor for a FIB (File In Backup) block in the VBK file. + + References: + - SFibBlockDescriptor + """ + + __struct__ = c_vbk.FibBlockDescriptor + + def __repr__(self) -> str: + return f"" + + def is_normal(self) -> bool: + """Return whether the block is a normal block.""" + return self.type == c_vbk.BlockLocationType.Normal + + def is_sparse(self) -> bool: + """Return whether the block is a sparse block.""" + return self.type == c_vbk.BlockLocationType.Sparse + + def is_reserved(self) -> bool: + """Return whether the block is a reserved block.""" + return self.type == c_vbk.BlockLocationType.Reserved + + def is_archived(self) -> bool: + """Return whether the block is an archived block. + + If the block is archived, the compressed size and compression type are stored in the block ID:: + + BlockId = CompressedSize | (CompressionType << 32) + + Notes: + - **TODO**: Verify the above + """ + return self.type == c_vbk.BlockLocationType.Archived + + def is_block_in_blob(self) -> bool: + """Return whether the block is a block in a blob. + + If the block is in a blob, the block ID, blob ID and offset are stored in the block ID:: + + BlockId = BlockId? & 0x3FFFFFF | (BlobId << 26) | ((Offset >> 9) << 42) + + Notes: + - **TODO**: Verify the above + """ + return self.type == c_vbk.BlockLocationType.BlockInBlob + + def is_block_in_blob_reserved(self) -> bool: + """Return whether the block is a reserved block in a blob. + + If the block is a reserved block in a blob, the block ID is stored in the block ID:: + + BlockId = BlockId? | 0xFFFFFFFFFC000000 + + Notes: + - **TODO**: Verify the above + """ + return self.type == c_vbk.BlockLocationType.BlockInBlobReserved + + @cached_property + def block_size(self) -> int: + """The size of the block.""" + return self.entry.BlockSize + + @cached_property + def type(self) -> c_vbk.BlockLocationType: + """The type of the block.""" + return self.entry.Type + + @cached_property + def digest(self) -> bytes: + """The digest of the block.""" + return self.entry.Digest + + @cached_property + def block_id(self) -> int: + """The ID of the block.""" + return self.entry.BlockId + + @cached_property + def flags(self) -> c_vbk.BlockFlags: + """The flags of the block.""" + return self.entry.Flags + + +class FibBlockDescriptorV7(FibBlockDescriptor): + """A descriptor for a FIB (File In Backup) block in the VBK file. Version 7. + + References: + - SFibBlockDescriptorV7 + """ + + __struct__ = c_vbk.FibBlockDescriptorV7 + + def __repr__(self) -> str: + return f"" + + @cached_property + def keyset_id(self) -> bytes: + return self.entry.KeySetId + + +class StgBlockDescriptor(MetaItem): + """A descriptor for a storage block in the VBK file. + + References: + - SStgBlockDescriptor + """ + + __struct__ = c_vbk.StgBlockDescriptor + + def __repr__(self) -> str: + return ( + f"" + ) + + def is_legacy(self) -> bool: + """Return whether the block is a legacy block.""" + return self.format != 4 + + def is_data_block(self) -> bool: + """Return whether the block is a data block. + + A data block is a block that has a usage counter greater than 0. + """ + return self.usage_counter != 0 + + def is_dedup_block(self) -> bool: + """Return whether the block is a dedup block. + + Notes: + - **TODO**: What is this? + """ + return self.deduplication != 0 + + def is_compressed(self) -> bool: + """Return whether the block is compressed.""" + return self.compression_type != c_vbk.CompressionType.Plain + + @cached_property + def format(self) -> int: + """The format of the block.""" + return self.entry.Format + + @cached_property + def usage_counter(self) -> int: + """The usage counter of the block.""" + return self.entry.UsageCounter + + @cached_property + def offset(self) -> int: + """The offset of the block.""" + return self.entry.Offset + + @cached_property + def allocated_size(self) -> int: + """The allocated size of the block.""" + return self.entry.AllocatedSize + + @cached_property + def deduplication(self) -> int: + """The deduplication of the block.""" + return self.entry.Deduplication + + @cached_property + def digest(self) -> bytes: + """The digest of the block.""" + return self.entry.Digest + + @cached_property + def compression_type(self) -> c_vbk.CompressionType: + """The compression type of the block.""" + return self.entry.CompressionType + + @cached_property + def compressed_size(self) -> int: + """The compressed size of the block.""" + return self.entry.CompressedSize + + @cached_property + def source_size(self) -> int: + """The source size of the block.""" + return self.entry.SourceSize + + +class StgBlockDescriptorV7(StgBlockDescriptor): + """A descriptor for a storage block in the VBK file. Version 7. + + References: + - SStgBlockDescriptorV7 + """ + + __struct__ = c_vbk.StgBlockDescriptorV7 + + def __repr__(self) -> str: + return ( + f"" + ) + + @cached_property + def keyset_id(self) -> bytes: + """The keyset ID of the block.""" + return self.entry.KeySetId + + +class PropertiesDictionary(dict): + """A dictionary of properties in the VBK file. + + Args: + vbk: The VBK object that the properties dictionary is part of. + page: The page number of the meta blob of the properties dictionary. + + References: + - CPropsDictionary + - CDirElemPropsRW + """ + + def __init__(self, vbk: VBK, page: int): + self.vbk = vbk + self.page = page + + buf = BytesIO(self.vbk.read_meta_blob(page)) + buf.seek(len(c_vbk.MetaBlobHeader)) + + while True: + value_type = c_vbk.PropertyType(buf) + if value_type == c_vbk.PropertyType.End: + break + + name_length = c_vbk.uint32(buf) + name = buf.read(name_length).decode("utf-8") + + if value_type == c_vbk.PropertyType.UInt32: + value = c_vbk.uint32(buf) + elif value_type == c_vbk.PropertyType.UInt64: + value = c_vbk.uint64(buf) + elif value_type == c_vbk.PropertyType.AString: + value = buf.read(c_vbk.uint32(buf)).decode("utf-8") + elif value_type == c_vbk.PropertyType.WString: + value = buf.read(c_vbk.uint32(buf)).decode("utf-16-le") + elif value_type == c_vbk.PropertyType.Binary: + value = buf.read(c_vbk.uint32(buf)) + elif value_type == c_vbk.PropertyType.Boolean: + value = bool(c_vbk.uint32(buf)) + else: + raise VBKError(f"Unsupported property type: {value_type}") + + self[name] = value + + +T = TypeVar("T", bound=MetaItem) + + +class MetaVector(Generic[T]): + """A vector of meta items in the VBK file. + + Args: + vbk: The VBK object that the vector is part of. + type_: The type of the items in the vector. + page: The page number of the first page in the vector. + count: The number of items in the vector. + + References: + - CMetaVec + """ + + def __new__(cls, vbk: VBK, *args, **kwargs): + if vbk.format_version >= 12 and vbk.format_version != 0x10008: + cls = MetaVector2 + return super().__new__(cls) + + def __init__(self, vbk: VBK, type_: type[T], page: int, count: int): + self.vbk = vbk + self.type = type_ + self.page = page + self.count = count + + self._entry_size = len(self.type.__struct__) + self._entries_per_page = PAGE_SIZE // self._entry_size + self._pages = [] + + while page != -1: + self._pages.append(page) + + buf = self.vbk.page(page) + page = int.from_bytes(buf[:8], "little", signed=True) + + self.get = lru_cache(128)(self.get) + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} type={self.type.__name__} count={self.count}>" + + def __iter__(self) -> Iterator[T]: + return (self.get(i) for i in range(self.count)) + + def data(self, idx: int) -> bytes: + """Read the data for an entry in the vector. + + Args: + idx: The index of the entry to read. + """ + page_id, offset = divmod(idx, self._entries_per_page) + page = self._pages[page_id] + offset = (offset * self._entry_size) + 8 + + buf = self.vbk.active_slot.page(page) + entry = buf[offset : offset + self._entry_size] + return entry + + def get(self, idx: int) -> T: + """Get an entry from the vector. + + Args: + idx: The index of the entry to get. + """ + if idx >= self.count: + raise IndexError("MetaVector2 index out of range") + return self.type.from_bytes(self.vbk, self.data(idx)) + + +class MetaVector2(MetaVector[T]): + """A vector of meta items in the VBK file. Version 2. + + Args: + vbk: The VBK object that the vector is part of. + type_: The type of the items in the vector. + page: The page number of the first page in the vector. + count: The number of items in the vector. + + References: + - CMetaVec2 + """ + + # MetaVector2 is essentially a table of page numbers that contain the vector entries + # The table pages of a MetaVector2 have a 8-16 byte header, so we can hold a maximum of 510-511 entries per page + # Read the comments in _lookup_page for more information + MAX_TABLE_ENTRIES_PER_PAGE = (PAGE_SIZE - 8) // 8 + + def __init__(self, vbk: VBK, type_: type[T], page: int, count: int): + super().__init__(vbk, type_, page, count) + + # It's not actually a meta blob, but the same mechanism is used (next page pointer in the header) + # The table itself is essentially a big array of 64 bit integers, so cast it to a memoryview of that + self._table = xmemoryview(self.vbk.read_meta_blob(page), " int: + """Look up the page number for an entry in the vector. + + Args: + idx: The page index to lookup the page number for. + """ + page_id = idx + 1 + + # MetaVec2 pages are a little special + # The first page has a 16 byte header: + # - 8 bytes for the next page number + # - 8 bytes for the root page number + # The second page has a 32 byte header: + # - 8 bytes for the next page number + # - 8 bytes for the second page number (this page) + # - 8 bytes for the third page number + # - 8 bytes for the fourth page number + # The third and fourth pages only have a 8 byte header: + # - 8 bytes for the next page number + # The fifth page has a 32 byte header again containing the next 3 page numbers + # We've not seen a table large enough to see this repeat more than once, but presumably it does + # + # This means that the first page can hold 510 entries, the second 508, and the third and fourth 511 each + # Reverse engineering gives us this funny looking loop, but it works, so use it for now + if page_id > self.MAX_TABLE_ENTRIES_PER_PAGE - 1: + page_id_offset = 1 + while True: + prev_page_id_offset = page_id_offset + page_id_offset *= 4 + + if (4 * (self.MAX_TABLE_ENTRIES_PER_PAGE - 1)) * prev_page_id_offset >= page_id: + break + + page_id = page_id_offset + idx + + table_id, page_id_in_table = divmod(page_id, self.MAX_TABLE_ENTRIES_PER_PAGE) + return self._table[table_id * (PAGE_SIZE // 8) + 1 + page_id_in_table] + + def data(self, idx: int) -> bytes: + """Read the data for an entry in the vector. + + Args: + idx: The index of the entry to read. + """ + page_idx, offset = divmod(idx, self._entries_per_page) + offset *= self._entry_size + + page_no = self._lookup_page(page_idx) + return self.vbk.active_slot.page(page_no)[offset : offset + self._entry_size] + + +class FibMetaSparseTable: + """A sparse table of FIB (File In Backup) blocks in the VBK file. + + Args: + vbk: The VBK object that the sparse table is part of. + page: The page number of the first page in the table. + count: The number of entries in the table. + + References: + - CFibMetaSparseTable + """ + + # This seems hardcoded? Probably calculated from something but unknown for now + MAX_ENTRIES_PER_TABLE = 1088 + + def __init__(self, vbk: VBK, page: int, count: int): + self.vbk = vbk + self.page = page + self.count = count + + self.type = FibBlockDescriptorV7 if self.vbk.is_v7() else FibBlockDescriptor + self._fake_sparse = self.type( + self.vbk, + self.type.__struct__( + BlockSize=self.vbk.block_size, + Type=c_vbk.BlockLocationType.Sparse, + ).dumps(), + ) + + self._table_count = (count + self.MAX_ENTRIES_PER_TABLE - 1) // self.MAX_ENTRIES_PER_TABLE + self._vec = MetaVector(vbk, MetaTableDescriptor, page, self._table_count) + + self._open_table = lru_cache(128)(self._open_table) + + def _open_table(self, page: int, count: int) -> MetaVector2[FibBlockDescriptor | FibBlockDescriptorV7]: + return MetaVector(self.vbk, self.type, page, count) + + def get(self, idx: int) -> FibBlockDescriptor | FibBlockDescriptorV7: + """Get a block descriptor from the sparse table. + + Args: + idx: The index of the block descriptor to get. + """ + if idx >= self.count: + raise IndexError("MetaSparseTable index out of range") + + table_idx, entry_idx = divmod(idx, self.MAX_ENTRIES_PER_TABLE) + + table_entry = self._vec.get(table_idx) + if table_entry.page == -1: + return self._fake_sparse + + return self._open_table(table_entry.page, table_entry.count).get(entry_idx) + + +class FibStream(AlignedStream): + """A stream for reading FIB (File In Backup) blocks in the VBK file. + + Args: + vbk: The VBK object that the stream is part of. + page: The page number of the :class:`FibMetaSparseTable`. + count: The number of entries in the meta sparse table. + size: The size of the stream. + """ + + def __init__(self, vbk: VBK, page: int, count: int, size: int): + self.vbk = vbk + self.page = page + self.count = count + + self.mt = FibMetaSparseTable(vbk, page, count) + + super().__init__(size, align=vbk.block_size) + + def _read(self, offset: int, length: int) -> bytes: + result = [] + # TODO: Can the block size change per file? + block_size = self.vbk.block_size + + while length > 0: + block_idx = offset // block_size + offset_in_block = offset % block_size + + read_size = min(length, block_size - offset_in_block) + + block_desc = self.mt.get(block_idx) + + if block_desc.is_normal(): + block = self.vbk.block_store.get(block_desc.block_id) + + self.vbk.fh.seek(block.offset) + buf = self.vbk.fh.read(block.compressed_size) + + if block.is_compressed(): + if block.compression_type == c_vbk.CompressionType.LZ4: + # First 12 bytes are Lz4BlockHeader + buf = lz4_decompress(memoryview(buf)[12:], block.source_size) + else: + raise VBKError(f"Unsupported compression type: {block.compression_type}") + + result.append(buf[offset_in_block : offset_in_block + read_size]) + elif block_desc.is_sparse(): + result.append(b"\x00" * read_size) + else: + raise VBKError(f"Unsupported block type: {block_desc.type}") + + offset += read_size + length -= read_size + + return b"".join(result) diff --git a/dissect/hypervisor/backup/vma.py b/dissect/hypervisor/backup/vma.py index 9a733d0..3a7e413 100644 --- a/dissect/hypervisor/backup/vma.py +++ b/dissect/hypervisor/backup/vma.py @@ -21,18 +21,18 @@ class VMA: Parse and provide a readable object for devices in a Proxmox VMA backup file. VMA is designed to be streamed for extraction, so we need to do some funny stuff to create a readable object from it. Performance is not optimal, so it's generally advised to extract a VMA instead. - The vma-extract utility can be used for that. + The ``vma-extract`` utility can be used for that. """ def __init__(self, fh): self.fh = fh - offset = fh.tell() + fh.seek(0) self.header = c_vma.VmaHeader(fh) if self.header.magic != VMA_MAGIC: raise InvalidHeaderError("Invalid VMA header magic") - fh.seek(offset) + fh.seek(0) header_data = bytearray(fh.read(self.header.header_size)) header_data[32:48] = b"\x00" * 16 if hashlib.md5(header_data).digest() != self.header.md5sum: diff --git a/dissect/hypervisor/tools/backup.py b/dissect/hypervisor/tools/backup.py new file mode 100644 index 0000000..25654fd --- /dev/null +++ b/dissect/hypervisor/tools/backup.py @@ -0,0 +1,206 @@ +import argparse +import logging +import sys +from pathlib import Path + +from dissect.hypervisor.backup.c_vma import c_vma +from dissect.hypervisor.backup.vbk import VBK +from dissect.hypervisor.backup.vma import VMA, _iter_mask + +try: + from rich.logging import RichHandler + from rich.progress import ( + BarColumn, + DownloadColumn, + Progress, + TextColumn, + TimeRemainingColumn, + TransferSpeedColumn, + ) + + progress = Progress( + TextColumn("[bold blue]{task.fields[filename]}", justify="right"), + BarColumn(bar_width=None), + "[progress.percentage]{task.percentage:>3.1f}%", + "•", + DownloadColumn(), + "•", + TransferSpeedColumn(), + "•", + TimeRemainingColumn(), + transient=True, + ) +except ImportError: + RichHandler = logging.StreamHandler + + class Progress: + def __init__(self): + self.filename = None + self.total = None + + self._task_id = 0 + self._info = {} + + def __enter__(self): + pass + + def __exit__(self, *args, **kwargs) -> None: + sys.stderr.write("\n") + sys.stderr.flush() + + def add_task(self, name: str, filename: str, total: int, **kwargs) -> int: + task_id = self._task_id + self._task_id += 1 + + self._info[task_id] = {"filename": filename, "total": total, "position": 0} + + return task_id + + def update(self, task_id: int, advance: int) -> None: + self._info[task_id]["position"] += advance + self.draw() + + def draw(self) -> None: + infos = [] + for info in self._info.values(): + infos.append(f"{info['filename']} {(info['position'] / info['total']) * 100:0.2f}%") + sys.stderr.write("\r" + " | ".join(infos)) + sys.stderr.flush() + + progress = Progress() + + +log = logging.getLogger(__name__) + + +def setup_logging(logger: logging.Logger, verbosity: int) -> None: + if verbosity == 1: + level = logging.ERROR + elif verbosity == 2: + level = logging.WARNING + elif verbosity == 3: + level = logging.INFO + elif verbosity >= 4: + level = logging.DEBUG + else: + level = logging.CRITICAL + + handler = RichHandler() + handler.setFormatter(logging.Formatter("%(message)s")) + handler.setLevel(level) + logger.addHandler(handler) + logger.setLevel(level) + + +def extract_vma(vma: VMA, out_dir: Path) -> None: + log.info("Extracting config files") + for config_name, config_data in vma.configs().items(): + out_file = out_dir.joinpath(config_name) + + log.info("%s -> %s (%d bytes)", config_name, out_file, len(config_data)) + out_file.write_bytes(config_data) + + log.info("Extracting device data") + tasks = {} + handles = {} + for device in vma.devices(): + task_id = progress.add_task("extract", filename=device.name, total=device.size) + tasks[device.id] = task_id + handles[device.id] = out_dir.joinpath(device.name).open("wb") + + with progress: + try: + for extent in vma.extents(): + vma.fh.seek(extent.data_offset) + for block_info in extent.header.blockinfo: + cluster_num = block_info & 0xFFFFFFFF + dev_id = (block_info >> 32) & 0xFF + mask = block_info >> (32 + 16) + + if dev_id == 0: + continue + + fh_out = handles[dev_id] + fh_out.seek(cluster_num * c_vma.VMA_CLUSTER_SIZE) + + if mask == 0xFFFF: + fh_out.write(vma.fh.read(c_vma.VMA_CLUSTER_SIZE)) + elif mask == 0: + fh_out.write(b"\x00" * c_vma.VMA_CLUSTER_SIZE) + else: + for allocated, count in _iter_mask(mask, 16): + if allocated: + fh_out.write(vma.fh.read(count * c_vma.VMA_BLOCK_SIZE)) + else: + fh_out.write(b"\x00" * count * c_vma.VMA_BLOCK_SIZE) + + progress.update(tasks[dev_id], advance=c_vma.VMA_CLUSTER_SIZE) + except Exception as e: + log.exception("Exception during extraction") + log.debug("", exc_info=e) + finally: + for handle in handles.values(): + handle.close() + + +def extract_vbk(vbk: VBK, out_dir: Path) -> None: + with progress: + try: + root_dir = next(vbk.get("/").iterdir()) + out_dir = out_dir.joinpath(root_dir.name) + out_dir.mkdir(exist_ok=True) + + for entry in root_dir.iterdir(): + out_file = out_dir.joinpath(entry.name) + task_id = progress.add_task("extract", filename=entry.name, total=entry.size) + + with entry.open() as fh_in, out_file.open("wb") as fh_out: + for chunk in iter(lambda: fh_in.read(vbk.block_size), b""): + fh_out.write(chunk) + progress.update(task_id, advance=len(chunk)) + except Exception as e: + log.exception("Exception during extraction") + log.debug("", exc_info=e) + + +def main() -> None: + parser = argparse.ArgumentParser(description="Hypervisor backup extractor") + parser.add_argument("input", type=Path, help="path to backup file") + parser.add_argument("-o", "--output", type=Path, required=True, help="path to output directory") + parser.add_argument("-v", "--verbose", action="count", default=3, help="increase output verbosity") + args = parser.parse_args() + + setup_logging(log, args.verbose) + + in_file = args.input.resolve() + if not in_file.exists(): + log.error("Input file does not exist: %s", in_file) + parser.exit() + + out_dir = args.output.resolve() + if not out_dir.exists(): + log.error("Output path does not exist: %s", out_dir) + parser.exit() + + if not out_dir.is_dir(): + log.error("Output path is not a directory: %s", out_dir) + parser.exit() + + with in_file.open("rb") as fh: + for klass, extract in ((VMA, extract_vma), (VBK, extract_vbk)): + try: + backup = klass(fh) + extract(backup, out_dir) + break + except Exception as e: + log.debug("Failed to extract using %s", klass.__name__, exc_info=e) + else: + log.error("Unknown backup format") + parser.exit() + + +if __name__ == "__main__": + try: + sys.exit(main()) + except KeyboardInterrupt: + pass diff --git a/dissect/hypervisor/tools/vma.py b/dissect/hypervisor/tools/vma.py deleted file mode 100644 index 781d556..0000000 --- a/dissect/hypervisor/tools/vma.py +++ /dev/null @@ -1,173 +0,0 @@ -import argparse -import logging -import sys -from pathlib import Path - -from dissect.hypervisor.backup.c_vma import c_vma -from dissect.hypervisor.backup.vma import VMA, _iter_mask - -try: - from rich.logging import RichHandler - from rich.progress import ( - BarColumn, - DownloadColumn, - Progress, - TextColumn, - TimeRemainingColumn, - TransferSpeedColumn, - ) - - progress = Progress( - TextColumn("[bold blue]{task.fields[filename]}", justify="right"), - BarColumn(bar_width=None), - "[progress.percentage]{task.percentage:>3.1f}%", - "•", - DownloadColumn(), - "•", - TransferSpeedColumn(), - "•", - TimeRemainingColumn(), - transient=True, - ) -except ImportError: - RichHandler = logging.StreamHandler - - class Progress: - def __init__(self): - self.filename = None - self.total = None - - self._task_id = 0 - self._info = {} - - def __enter__(self): - pass - - def __exit__(self, *args, **kwargs): - sys.stderr.write("\n") - sys.stderr.flush() - - def add_task(self, name, filename, total, **kwargs): - task_id = self._task_id - self._task_id += 1 - - self._info[task_id] = {"filename": filename, "total": total, "position": 0} - - return task_id - - def update(self, task_id, advance): - self._info[task_id]["position"] += advance - self.draw() - - def draw(self): - infos = [] - for info in self._info.values(): - infos.append(f"{info['filename']} {(info['position'] / info['total']) * 100:0.2f}%") - sys.stderr.write("\r" + " | ".join(infos)) - sys.stderr.flush() - - progress = Progress() - - -log = logging.getLogger(__name__) - - -def setup_logging(logger, verbosity): - if verbosity == 1: - level = logging.ERROR - elif verbosity == 2: - level = logging.WARNING - elif verbosity == 3: - level = logging.INFO - elif verbosity >= 4: - level = logging.DEBUG - else: - level = logging.CRITICAL - - handler = RichHandler() - handler.setFormatter(logging.Formatter("%(message)s")) - handler.setLevel(level) - logger.addHandler(handler) - logger.setLevel(level) - - -def main(): - parser = argparse.ArgumentParser(description="VMA extractor") - parser.add_argument("input", type=Path, help="path to vma file") - parser.add_argument("-o", "--output", type=Path, required=True, help="path to output directory") - parser.add_argument("-v", "--verbose", action="count", default=3, help="increase output verbosity") - args = parser.parse_args() - - setup_logging(log, args.verbose) - - in_file = args.input.resolve() - if not in_file.exists(): - log.error("Input file does not exist: %s", in_file) - parser.exit() - - out_dir = args.output.resolve() - if not out_dir.exists(): - log.error("Output path does not exist: %s", out_dir) - parser.exit() - - if not out_dir.is_dir(): - log.error("Output path is not a directory: %s", out_dir) - parser.exit() - - with in_file.open("rb") as fh: - vma = VMA(fh) - - log.info("Extracting config files") - for config_name, config_data in vma.configs().items(): - out_file = out_dir.joinpath(config_name) - - log.info("%s -> %s (%d bytes)", config_name, out_file, len(config_data)) - out_file.write_bytes(config_data) - - log.info("Extracting device data") - tasks = {} - handles = {} - for device in vma.devices(): - task_id = progress.add_task("extract", filename=device.name, total=device.size) - tasks[device.id] = task_id - handles[device.id] = out_dir.joinpath(device.name).open("wb") - - with progress: - try: - for extent in vma.extents(): - vma.fh.seek(extent.data_offset) - for block_info in extent.header.blockinfo: - cluster_num = block_info & 0xFFFFFFFF - dev_id = (block_info >> 32) & 0xFF - mask = block_info >> (32 + 16) - - if dev_id == 0: - continue - - fh_out = handles[dev_id] - fh_out.seek(cluster_num * c_vma.VMA_CLUSTER_SIZE) - - if mask == 0xFFFF: - fh_out.write(vma.fh.read(c_vma.VMA_CLUSTER_SIZE)) - elif mask == 0: - fh_out.write(b"\x00" * c_vma.VMA_CLUSTER_SIZE) - else: - for allocated, count in _iter_mask(mask, 16): - if allocated: - fh_out.write(vma.fh.read(count * c_vma.VMA_BLOCK_SIZE)) - else: - fh_out.write(b"\x00" * count * c_vma.VMA_BLOCK_SIZE) - - progress.update(tasks[dev_id], advance=c_vma.VMA_CLUSTER_SIZE) - except Exception: - log.exception("Exception during extraction") - finally: - for handle in handles.values(): - handle.close() - - -if __name__ == "__main__": - try: - sys.exit(main()) - except KeyboardInterrupt: - pass diff --git a/pyproject.toml b/pyproject.toml index 035ed61..93c27bc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,9 @@ full = [ ] [project.scripts] -vma-extract = "dissect.hypervisor.tools.vma:main" +vma-extract = "dissect.hypervisor.tools.backup:main" +vbk-extract = "dissect.hypervisor.tools.backup:main" +backup-extract = "dissect.hypervisor.tools.vma:main" envelope-decrypt = "dissect.hypervisor.tools.envelope:main" [tool.black] diff --git a/tests/conftest.py b/tests/conftest.py index 054a07c..bc45684 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -97,3 +97,13 @@ def keystore() -> Iterator[TextIO]: @pytest.fixture def vgz() -> Iterator[BinaryIO]: yield from open_file("data/test.vgz") + + +@pytest.fixture +def vbk9() -> Iterator[BinaryIO]: + yield from open_file_gz("data/test9.vbk.gz") + + +@pytest.fixture +def vbk13() -> Iterator[BinaryIO]: + yield from open_file_gz("data/test13.vbk.gz") diff --git a/tests/data/test13.vbk.gz b/tests/data/test13.vbk.gz new file mode 100644 index 0000000..d1ec07b Binary files /dev/null and b/tests/data/test13.vbk.gz differ diff --git a/tests/data/test9.vbk.gz b/tests/data/test9.vbk.gz new file mode 100644 index 0000000..c1a3f46 Binary files /dev/null and b/tests/data/test9.vbk.gz differ diff --git a/tests/test_vbk.py b/tests/test_vbk.py new file mode 100644 index 0000000..f3f8984 --- /dev/null +++ b/tests/test_vbk.py @@ -0,0 +1,72 @@ +import hashlib +from typing import BinaryIO + +from dissect.hypervisor.backup.vbk import VBK, MetaVector, MetaVector2 + + +def test_vbk_version_9(vbk9: BinaryIO) -> None: + vbk = VBK(vbk9) + + assert vbk.is_v7() + assert isinstance(vbk.block_store, MetaVector) + + assert vbk.root.is_dir() + assert not vbk.root.is_file() + assert list(vbk.get("/").listdir().keys()) == [ + "6745a759-2205-4cd2-b172-8ec8f7e60ef8 (78a5467d-87f5-8540-9a84-7569ae2849ad_2d1bb20f-49c1-485d-a689-696693713a5a)" # noqa: E501 + ] + + entry = vbk.get( + "6745a759-2205-4cd2-b172-8ec8f7e60ef8 (78a5467d-87f5-8540-9a84-7569ae2849ad_2d1bb20f-49c1-485d-a689-696693713a5a)" # noqa: E501 + ) + assert entry.is_dir() + assert not vbk.root.is_file() + assert list(entry.listdir().keys()) == [ + "DEV__dev_nvme1n1", + "summary.xml", + ] + + entry = vbk.get("DEV__dev_nvme1n1", entry) + assert not entry.is_dir() + assert entry.is_file() + assert entry.is_internal_file() + assert not entry.properties + assert entry.size == 0x400000 + + with entry.open() as fh: + digest = hashlib.sha256(fh.read()).hexdigest() + assert digest == "337350cac29d2ed34c23ce9fc675950badf85fd2b694791abe6999d36f0dc1b3" + + +def test_vbk_version_13(vbk13: BinaryIO) -> None: + vbk = VBK(vbk13) + + assert isinstance(vbk.block_store, MetaVector2) + assert list(vbk.get("/").listdir().keys()) == [ + "6745a759-2205-4cd2-b172-8ec8f7e60ef8 (3c834d56-37ac-8bd3-b946-30113c55c4b5)" + ] + + entry = vbk.get("6745a759-2205-4cd2-b172-8ec8f7e60ef8 (3c834d56-37ac-8bd3-b946-30113c55c4b5)") + assert entry.is_dir() + assert not entry.is_file() + assert list(entry.listdir().keys()) == [ + "digest_47d9f323-442b-433d-bd4f-1ecb3fa97351", + "8b14f74c-360d-4d7a-98f7-7f4c5e737eb7", + "GuestMembers.xml", + "BackupComponents.xml", + "summary.xml", + ] + + entry = vbk.get( + "6745a759-2205-4cd2-b172-8ec8f7e60ef8 (3c834d56-37ac-8bd3-b946-30113c55c4b5)/8b14f74c-360d-4d7a-98f7-7f4c5e737eb7" # noqa: E501 + ) + assert not entry.is_dir() + assert entry.is_file() + assert entry.is_internal_file() + assert "DefinedBlocksMask" in entry.properties + assert len(entry.properties["DefinedBlocksMask"]) == 35 + assert entry.size == 0x314200 + + with entry.open() as fh: + digest = hashlib.sha256(fh.read()).hexdigest() + assert digest == "e9ed281cf9c2fe1745e4eb9c926c1a64bd47569c48be511c5fdf6fd5793e5a77"