Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: Drop dependency on fact_helper_file #1140

Merged
merged 2 commits into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion docsrc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@
'docker',
'docker-compose',
'email-validator',
'fact_helper_file',
'flaky',
'flask',
'flask_login',
Expand Down
46 changes: 46 additions & 0 deletions src/helperFunctions/magic.py
maringuu marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""This is a wrapper around pymagic.
It aims to provide the same API but with the ability to load multiple magic
files in the default api.
"""
from __future__ import annotations

import os
from os import PathLike

import magic as pymagic

from helperFunctions.fileSystem import get_src_dir

# On ubuntu this is provided by the libmagic-mgc package
maringuu marked this conversation as resolved.
Show resolved Hide resolved
_default_magic = os.getenv('MAGIC', '/usr/lib/file/magic.mgc')
_fact_magic = f'{get_src_dir()}/bin/firmware'
_internal_symlink_magic = f'{get_src_dir()}/bin/internal_symlink_magic'
_magic_file = f'{_internal_symlink_magic}:{_fact_magic}:{_default_magic}'

_instances = {}


def _get_magic_instance(**kwargs):
"""Returns an instance of pymagic.Magic"""
# Dicts are not hashable but sorting and creating a tuple is a valid hash
key = hash(tuple(sorted(kwargs.items())))
instance = _instances.get(key)
if instance is None:
instance = _instances[key] = pymagic.Magic(**kwargs)
return instance


def from_file(filename: bytes | str | PathLike, magic_file: str | None = _magic_file, **kwargs) -> str:
"""Like pymagic's ``magic.from_file`` but it accepts all keyword arguments
that ``magic.Magic`` accepts.
"""
instance = _get_magic_instance(magic_file=magic_file, **kwargs)
return instance.from_file(filename)


def from_buffer(buf: bytes | str, magic_file: str | None = _magic_file, **kwargs) -> str:
"""Like pymagic's ``magic.from_buffer`` but it accepts all keyword arguments
that ``magic.Magic`` accepts.
"""
instance = _get_magic_instance(magic_file=magic_file, **kwargs)
return instance.from_buffer(buf)
6 changes: 6 additions & 0 deletions src/install/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ def main(distribution):

BIN_DIR.mkdir(exist_ok=True)

run_cmd_with_logging(
f'wget -O {BIN_DIR / "firmware.xz"} https://github.com/fkie-cad/firmware-magic-database/releases/download/v0.2.1/firmware.xz'
)
run_cmd_with_logging(f'unxz --force {BIN_DIR / "firmware.xz"}')
run_cmd_with_logging(f'cp --force {INSTALL_DIR / "internal_symlink_magic"} {BIN_DIR}')

apt_packages_path = INSTALL_DIR / 'apt-pkgs-common.txt'
dnf_packages_path = INSTALL_DIR / 'dnf-pkgs-common.txt'

Expand Down
6 changes: 6 additions & 0 deletions src/install/internal_symlink_magic
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# ====================== fact internal ======================

# ---- fact internal link representation ----
0 string symbolic\ link\ -> symbolic link
>17 string x to '%s'
!:mime inode/symlink
2 changes: 0 additions & 2 deletions src/install/requirements_common.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ pydantic==2.4.0
# Config parsing
toml==0.10.2

git+https://github.com/fkie-cad/fact_helper_file.git

# Common code modules
git+https://github.com/fkie-cad/common_helper_files.git
git+https://github.com/fkie-cad/common_helper_filter.git
Expand Down
8 changes: 3 additions & 5 deletions src/plugins/analysis/file_type/code/file_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
from typing import List

import pydantic
from fact_helper_file import get_file_type_from_path
from pydantic import Field

from analysis.plugin import AnalysisPluginV0
from analysis.plugin.compat import AnalysisBasePluginAdapterMixin
from helperFunctions import magic

if typing.TYPE_CHECKING:
import io
Expand Down Expand Up @@ -39,9 +39,7 @@ def summarize(self, result: Schema) -> List[str]:
def analyze(self, file_handle: io.FileIO, virtual_file_path: str, analyses: dict) -> Schema:
del virtual_file_path, analyses

file_dict = get_file_type_from_path(file_handle.name)

return AnalysisPlugin.Schema(
mime=file_dict['mime'],
full=file_dict['full'],
mime=magic.from_file(file_handle.name, mime=True),
full=magic.from_file(file_handle.name, mime=False),
)
7 changes: 5 additions & 2 deletions src/plugins/analysis/qemu_exec/code/qemu_exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@
from common_helper_files import get_binary_from_file, safe_rglob
from docker.errors import DockerException
from docker.types import Mount
from fact_helper_file import get_file_type_from_path
from requests.exceptions import ReadTimeout

import config
from analysis.PluginBase import AnalysisBasePlugin
from helperFunctions import magic
from helperFunctions.docker import run_docker_container
from helperFunctions.tag import TagColor
from helperFunctions.uid import create_uid
Expand Down Expand Up @@ -125,7 +125,10 @@ def _find_relevant_files(self, extracted_files_dir: Path):
result = []
for path in safe_rglob(extracted_files_dir):
if path.is_file() and not path.is_symlink():
file_type = get_file_type_from_path(path.absolute())
file_type = {
'full': magic.from_file(path.absolute(), mime=False),
'mime': magic.from_file(path.absolute(), mime=True),
}
if self._has_relevant_type(file_type):
result.append((f'/{path.relative_to(Path(self.root_path))}', file_type['full']))
return result
Expand Down
4 changes: 2 additions & 2 deletions src/test/acceptance/test_io_routes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest
from fact_helper_file import get_file_type_from_binary

from helperFunctions import magic
from storage.db_interface_comparison import ComparisonDbInterface
from test.common_helper import create_test_firmware

Expand Down Expand Up @@ -68,4 +68,4 @@ def test_pdf_download(self, test_client, backend_db):
assert response.status_code == 200, 'pdf download failed' # noqa: PLR2004
device = self.test_fw.device_name.replace(' ', '_')
assert response.headers['Content-Disposition'] == f'attachment; filename={device}_analysis_report.pdf'
assert get_file_type_from_binary(response.data)['mime'] == 'application/pdf'
assert magic.from_buffer(response.data, mime=True) == 'application/pdf'
5 changes: 2 additions & 3 deletions src/test/integration/helperFunctions/test_pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
import os
from pathlib import Path

from fact_helper_file import get_file_type_from_binary

from helperFunctions import magic
from helperFunctions.pdf import build_pdf_report
from test.common_helper import TEST_FW

Expand All @@ -21,5 +20,5 @@ def test_build_pdf_report():

pdf_path = build_pdf_report(TEST_FW, docker_mount_base_dir)

assert get_file_type_from_binary(pdf_path.read_bytes())['mime'] == 'application/pdf'
assert magic.from_buffer(pdf_path.read_bytes(), mime=True) == 'application/pdf'
assert pdf_path.name == f"{TEST_FW.device_name.replace(' ', '_')}_analysis_report.pdf"
13 changes: 13 additions & 0 deletions src/test/unit/helperFunctions/test_magic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from helperFunctions import magic


def test_internal_magic():
assert magic.from_buffer('symbolic link -> /foo/bar', mime=True) == 'inode/symlink'


def test_firmware_magic():
assert magic.from_buffer('BOOTLOADER!', mime=False) == 'Mediatek bootloader'


def test_magic_from_file():
assert magic.from_file('/dev/null', mime=True) == 'inode/chardevice'
6 changes: 3 additions & 3 deletions src/unpacker/unpack.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,9 @@
from time import time
from typing import TYPE_CHECKING, Optional

from fact_helper_file import get_file_type_from_path

import config
from analysis.PluginBase import sanitize_processed_analysis
from helperFunctions import magic
from helperFunctions.fileSystem import file_is_empty, get_relative_object_path
from helperFunctions.tag import TagColor
from objects.file import FileObject
Expand Down Expand Up @@ -94,7 +93,8 @@ def generate_objects_and_store_files(
continue
current_file = FileObject(file_path=str(path))
current_virtual_path = get_relative_object_path(path, extraction_dir)
current_file.temporary_data['parent_fo_type'] = get_file_type_from_path(parent.file_path)['mime']
current_file.temporary_data['parent_fo_type'] = magic.from_file(parent.file_path, mime=True)

if current_file.uid not in extracted_files:
# the same file can be contained multiple times in one archive -> only the VFP needs an update
self.unpacking_locks.set_unpacking_lock(current_file.uid)
Expand Down
4 changes: 2 additions & 2 deletions src/web_interface/components/io_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
from time import sleep

import requests
from fact_helper_file import get_file_type_from_binary
from flask import Response, make_response, redirect, render_template, request

import config
from helperFunctions import magic
from helperFunctions.database import get_shared_session
from helperFunctions.pdf import build_pdf_report
from helperFunctions.task_conversion import check_for_errors, convert_analysis_task_to_fw_obj, create_analysis_task
Expand Down Expand Up @@ -82,7 +82,7 @@ def _prepare_file_download(self, uid: str, packed: bool = False) -> str | Respon
def _get_file_download_mime(self, binary: bytes, uid: str) -> str:
type_analysis = self.db.frontend.get_analysis(uid, 'file_type')
mime = type_analysis.get('mime') if type_analysis is not None else None
return mime or get_file_type_from_binary(binary)['mime']
return mime or magic.from_buffer(binary, mime=True)

@roles_accepted(*PRIVILEGES['download'])
@AppRoute('/ida-download/<compare_id>', GET)
Expand Down