From 769c5111667c69b504016e062a04a7584ca56db1 Mon Sep 17 00:00:00 2001 From: DC3-TSD <12175126+DC3-DCCI@users.noreply.github.com> Date: Tue, 9 Aug 2022 12:51:14 -0400 Subject: [PATCH] General updates and bugfixes --- CHANGELOG.md | 67 ++++++++++++ CHANGELOG.rst | 60 ----------- README.rst => README.md | 102 +++++++++--------- docs/api.rst | 6 -- docs/changelog.md | 2 + docs/changelog.rst | 2 - docs/conf.py | 13 ++- docs/differences.md | 27 +++++ docs/differences.rst | 30 ------ docs/examples/flowcharts.rst | 28 ++--- docs/examples/functions.rst | 44 +++++++- docs/examples/index.rst | 3 +- docs/examples/lines.rst | 2 + docs/examples/references.rst | 38 +++---- docs/examples/segments.rst | 2 +- docs/examples/variables.rst | 49 +++++++++ docs/index.md | 18 ++++ docs/index.rst | 14 --- docs/install.md | 41 +++++++ docs/install.rst | 54 ---------- dragodis/ghidra/disassembler.py | 4 +- dragodis/ghidra/flat.py | 10 +- dragodis/ghidra/function.py | 12 ++- dragodis/ghidra/function_signature.py | 25 +++++ dragodis/ghidra/instruction.py | 52 +++++++-- dragodis/ida/data_type.py | 7 +- dragodis/ida/flat.py | 11 +- dragodis/ida/flowchart.py | 12 +-- dragodis/ida/function.py | 15 +-- dragodis/ida/function_argument_location.py | 16 ++- dragodis/ida/function_signature.py | 56 +++++++++- dragodis/ida/instruction.py | 43 ++++---- dragodis/ida/line.py | 7 +- dragodis/ida/memory.py | 1 - dragodis/ida/operand.py | 38 +++---- dragodis/ida/operand_value.py | 21 ++-- dragodis/ida/reference.py | 20 ++-- dragodis/ida/sdk/ida_helpers.py | 6 +- dragodis/ida/segment.py | 16 ++- dragodis/interface/__init__.py | 1 + dragodis/interface/data_type.py | 6 ++ dragodis/interface/flat.py | 6 ++ dragodis/interface/flowchart.py | 15 ++- dragodis/interface/function.py | 22 ++-- .../interface/function_argument_location.py | 30 ++++++ dragodis/interface/function_signature.py | 42 +++++++- dragodis/interface/instruction.py | 28 ++++- dragodis/interface/line.py | 3 + dragodis/interface/memory.py | 3 + dragodis/interface/operand.py | 5 +- dragodis/interface/operand_value.py | 57 ++++++++-- dragodis/interface/reference.py | 6 ++ dragodis/interface/segment.py | 5 +- dragodis/interface/stack.py | 6 ++ dragodis/interface/symbol.py | 14 +++ dragodis/interface/variable.py | 18 ++++ noxfile.py | 1 + setup.cfg | 28 +++++ setup.py | 39 +------ tests/test_function_signature.py | 36 +++++++ tests/test_instruction.py | 18 +++- 61 files changed, 901 insertions(+), 462 deletions(-) create mode 100644 CHANGELOG.md delete mode 100644 CHANGELOG.rst rename README.rst => README.md (51%) delete mode 100644 docs/api.rst create mode 100644 docs/changelog.md delete mode 100644 docs/changelog.rst create mode 100644 docs/differences.md delete mode 100644 docs/differences.rst create mode 100644 docs/examples/variables.rst create mode 100644 docs/index.md delete mode 100644 docs/index.rst create mode 100644 docs/install.md delete mode 100644 docs/install.rst diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..5d9cda5 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,67 @@ +# Changelog + + +## [Unreleased] + +- Fixed getting non-user defined exports in Ghidra. +- Fixed issue getting KeyError if Ghidra isn't setup. +- Updated documentation. +- Added `FunctionSignature.calling_convention` get/set property. +- Added `FunctionSignature.return_type` get/set property. +- Fixed issue with `ida_hexrays.DecompilationFailure` getting thrown. Switched to logging warning instead. +- Fixed issue with incorrect immediate operand value being produced with IDA, sometimes causing an OverflowError. +- Added `Instruction.rep` property for x86 instructions. +- Fixed issue with incorrectly getting NotExistError in IDA when base address is zero. + + +## [0.4.0] - 2022-06-28 + +- Added `Symbol.references_to` to get references to imports or exports. +- Added `Disassembler.get_import()` and `Disassembler.get_export()` functions. +- Added `BACKEND_GHIDRA` and `BACKEND_IDA` constants. +- Miscellaneous bugfixes for Ghidra support. + + +## [0.3.0] - 2022-06-01 + +- Fixed connection issues with running IDA disassembler in Linux. +- Add auto detection of 64bit size for IDA. +- Changed `Function.instructions()` implementation to use flowchart. +- Added `Function.lines()` function. +- Added `Disassembler.instructions()` function. +- Added `Disassembler.find_bytes()` function. +- Added ability to use dragodis locally in underlying disassembler. +- Added `Disassembler.teleport()` function to run a function within the underlying disassembler. + + +## [0.2.0] - 2022-02-03 + +- Updated IDA disassembler to use [rpyc](https://rpyc.readthedocs.io/en/latest). +- Updated support to IDA 7.7 +- Updated Ghidra disassembler to use [pyhidra](https://github.com/dod-cyber-crime-center/pyhidra). +- Added proper handling when a disassembler isn't setup/installed. +- Renamed `dragodis.open()` to `dragodis.open_program()` +- Updated README +- Interface has been completely refactored. +- Added support for: + : - Flowcharts + - Function Signatures + - Insturctions + - Memory + - Operands + - Operand value types + - References + - Imports/Export symbols + - Stack/Global variables + - Segments + + +## 0.1.0 - 2020-11-25 + +- Initial release + + +[Unreleased]: https://github.com/dod-cyber-crime-center/dragodis/compare/0.4.0...HEAD +[0.4.0]: https://github.com/dod-cyber-crime-center/dragodis/compare/0.3.0...0.4.0 +[0.3.0]: https://github.com/dod-cyber-crime-center/dragodis/compare/0.2.0...0.3.0 +[0.2.0]: https://github.com/dod-cyber-crime-center/dragodis/compare/0.1.0...0.2.0 diff --git a/CHANGELOG.rst b/CHANGELOG.rst deleted file mode 100644 index 42f1fca..0000000 --- a/CHANGELOG.rst +++ /dev/null @@ -1,60 +0,0 @@ - -Changelog -========= - - -`0.4.0`_ - 2022-06-28 ---------------------- - -- Added ``Symbol.references_to`` to get references to imports or exports. -- Added ``Disassembler.get_import()`` and ``Disassembler.get_export()`` functions. -- Added ``BACKEND_GHIDRA`` and ``BACKEND_IDA`` constants. -- Miscellaneous bugfixes for Ghidra support. - - -`0.3.0`_ - 2022-06-01 ---------------------- - -- Fixed connection issues with running IDA disassembler in Linux. -- Add auto detection of 64bit size for IDA. -- Changed ``Function.instructions()`` implementation to use flowchart. -- Added ``Function.lines()`` function. -- Added ``Disassembler.instructions()`` function. -- Added ``Disassembler.find_bytes()`` function. -- Added ability to use dragodis locally in underlying disassembler. -- Added ``Disassembler.teleport()`` function to run a function within the underlying disassembler. - - -`0.2.0`_ - 2022-02-03 ---------------------- - -- Updated IDA disassembler to use `rpyc `_. -- Updated support to IDA 7.7 -- Updated Ghidra disassembler to use `pyhidra `_. -- Added proper handling when a disassembler isn't setup/installed. -- Renamed ``dragodis.open()`` to ``dragodis.open_program()`` -- Updated README -- Interface has been completely refactored. -- Added support for: - - Flowcharts - - Function Signatures - - Insturctions - - Memory - - Operands - - Operand value types - - References - - Imports/Export symbols - - Stack/Global variables - - Segments - - -0.1.0 - 2020-11-25 ------------------- - -- Initial release - - -.. _Unreleased: https://github.com/dod-cyber-crime-center/dragodis/compare/0.4.0...HEAD -.. _0.4.0: https://github.com/dod-cyber-crime-center/dragodis/compare/0.3.0...0.4.0 -.. _0.3.0: https://github.com/dod-cyber-crime-center/dragodis/compare/0.2.0...0.3.0 -.. _0.2.0: https://github.com/dod-cyber-crime-center/dragodis/compare/0.1.0...0.2.0 diff --git a/README.rst b/README.md similarity index 51% rename from README.rst rename to README.md index 5e45d70..58d17d7 100644 --- a/README.rst +++ b/README.md @@ -1,6 +1,4 @@ -******** -Dragodis -******** +# Dragodis Dragodis is a Python framework which allows for the creation of universal disassembler scripts. Dragodis currently only supports @@ -21,83 +19,79 @@ of scripts between users of different disassemblers. Dragodis also aims to provi a cleaner and easier to use API than those provided by other disassemblers. -Usage -===== +## Usage -To use Dragodis, simply pass in the path to your input binary file into either the ``IDA`` or ``Ghidra`` class. +To use Dragodis, simply pass in the path to your input binary file into either the `IDA` or `Ghidra` class. This will create an instance of the disassembler with the given input file analyzed. -.. code-block:: python +```python +import dragodis - import dragodis +with dragodis.Ghidra(r"C:\strings.exe") as ghidra: + print(ghidra.get_dword(0x401000)) +``` - with dragodis.Ghidra(r"C:\strings.exe") as ghidra: - print(ghidra.get_dword(0x401000)) - - -.. code-block:: python - - import dragodis - - with dragodis.IDA(r"C:\strings.exe") as ida: - print(ida.get_dword(0x401000)) +```python +import dragodis +with dragodis.IDA(r"C:\strings.exe") as ida: + print(ida.get_dword(0x401000)) +``` A disassembler can also be run without using a context manager using the `start()` and `stop()` functions. -.. code-block:: python - - import dragodis - - ghidra = dragodis.Ghidra(r"C:\strings.exe") - ghidra.start() - ghidra.get_dword(0x401000) - ghidra.stop() +```python +import dragodis +ghidra = dragodis.Ghidra(r"C:\strings.exe") +ghidra.start() +ghidra.get_dword(0x401000) +ghidra.stop() +``` -Alternatively, you can use ``open_program()`` to choose the disassembler more dynamically by providing -the disassembler name in the ``disassembler`` parameter or by setting the ``DRAGODIS_DISASSEMBLER`` +Alternatively, you can use `open_program()` to choose the disassembler more dynamically by providing +the disassembler name in the `disassembler` parameter or by setting the `DRAGODIS_DISASSEMBLER` environment variable. -.. code-block:: python +```python +import dragodis - import dragodis +with dragodis.open_program(r"C:\strings.exe", disassembler="ida") as ida: + print(ida.get_dword(0x401000)) +``` - with dragodis.open_program(r"C:\strings.exe", disassembler="ida") as ida: - print(ida.get_dword(0x401000)) - - -It is highly recommended to use the ``DRAGODIS_DISASSEMBLER`` environment variable to ensure your scripts +It is highly recommended to use the `DRAGODIS_DISASSEMBLER` environment variable to ensure your scripts are cross compatible without any modification. As well, to give the user the power to choose which disassembler they would like to use. -*NOTE: A ``dragodis.NotInstalledError`` will be thrown if the disassembler chosen is not properly installed.* - -.. code-block:: python - import os - os.environ["DRAGODIS_DISASSEMBLER"] = "ida" +```{note} +A "NotInstalledError" will be thrown if the disassembler chosen is not properly installed. +``` - import dragodis +```python +import os +os.environ["DRAGODIS_DISASSEMBLER"] = "ida" - with dragodis.open_program(r"C:\strings.exe") as dis: - print(f"Disassembler used: {dis.name}") - print(dis.get_dword(0x401000)) +import dragodis +with dragodis.open_program(r"C:\strings.exe") as dis: + print(f"Disassembler used: {dis.name}") + print(dis.get_dword(0x401000)) +``` If you are locally within the disassembler's interpreter (the output window for IDA or pyhidraw interpreter in Ghidra) then you can initialize a disassembler object by directly acccessing the object: -.. code-block:: python - - # If in IDA - import dragodis - dis = dragodis.IDA() - - # If in Ghidra - import dragodis - dis = dragodis.Ghidra() +```python +# If in IDA +import dragodis +dis = dragodis.IDA() +# If in Ghidra +import dragodis +dis = dragodis.Ghidra() +``` -We can also directly call scripts that using the ``open_program()`` function locally in the disassembler. +We can also directly call scripts that using the `open_program()` function locally in the disassembler. When this happens, the input file path provided must match the detected input file path by the disassembler. diff --git a/docs/api.rst b/docs/api.rst deleted file mode 100644 index 92888d3..0000000 --- a/docs/api.rst +++ /dev/null @@ -1,6 +0,0 @@ -API -=== - -.. toctree:: - - api/interface \ No newline at end of file diff --git a/docs/changelog.md b/docs/changelog.md new file mode 100644 index 0000000..8261b35 --- /dev/null +++ b/docs/changelog.md @@ -0,0 +1,2 @@ +```{include} ../CHANGELOG.md +``` \ No newline at end of file diff --git a/docs/changelog.rst b/docs/changelog.rst deleted file mode 100644 index 7a5c44b..0000000 --- a/docs/changelog.rst +++ /dev/null @@ -1,2 +0,0 @@ - -.. include:: ../CHANGELOG.rst diff --git a/docs/conf.py b/docs/conf.py index 4a806c8..296dc90 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -4,6 +4,9 @@ # list see the documentation: # https://www.sphinx-doc.org/en/master/usage/configuration.html +# Pull dragodis version before we mess with the path. +from dragodis import __version__ + # -- Path setup -------------------------------------------------------------- # If extensions (or modules to document with autodoc) are in another directory, @@ -18,11 +21,12 @@ # -- Project information ----------------------------------------------------- project = 'Dragodis' -copyright = '2021, DC3' +copyright = '2022, DC3' author = 'DC3' # The full version, including alpha/beta/rc tags -release = '0.2.0' +version = __version__ +release = version # -- General configuration --------------------------------------------------- @@ -30,7 +34,10 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions = ["sphinx.ext.autodoc", "sphinx_rtd_theme" +extensions = [ + "sphinx.ext.autodoc", + "sphinx_rtd_theme", + "myst_parser", ] # Add any paths that contain templates here, relative to this directory. diff --git a/docs/differences.md b/docs/differences.md new file mode 100644 index 0000000..233f070 --- /dev/null +++ b/docs/differences.md @@ -0,0 +1,27 @@ +# Differences Between Disassemblers + +Given the nature of the project, there is bound to be some variance +in the way that different disassemblers handle certain situations. +Some of these situations may be simple to work around, while others may require +more effort to work around. Understanding these differences will aid in creating +better scripts that won't stop working when switching from one disassembler to another. + +Some of the differences that have been found so far include: +- When it comes to alignment, IDA combines all of the bytes into one line, Ghidra separates each byte into its own line. +- There may be differences between the min and max addresses of binaries between Ghidra and IDA. +- The initial current address may be different in Ghidra and IDA. +- IDA and Ghidra have different naming conventions for various components of the disassembly such as + functions. IDA names functions `sub_XXXXXX` by default, while Ghidra names functions + `FUN_00XXXXXX` by default. + +If you **do** need to write disassembler specific code, you can check the `.name` attribute of the +disassembler. + +```python +if dis.name == "IDA": + # do IDA specific thing +elif dis.name == "Ghidra": + # do Ghidra specific thing +else: + raise ValueError(f"{dis.name} disassembler is not supported.") +``` diff --git a/docs/differences.rst b/docs/differences.rst deleted file mode 100644 index e28cc3a..0000000 --- a/docs/differences.rst +++ /dev/null @@ -1,30 +0,0 @@ -Differences Between Disassemblers -================================= - -Given the nature of the project, there is bound to be some variance -in the way that different disassemblers handle certain situations. -Some of these situations may be simple to work around, while others may require -more effort to work around. Understanding these differences will aid in creating -better scripts that won't stop working when switching from one disassembler to another. - -Some of the differences that have been found so far include, - * When it comes to alignment, IDA combines all of the bytes into one line, Ghidra separates each byte - into its own line. - * There may be differences between the min and max addresses of binaries between Ghidra and IDA. - * The initial current address may be different in Ghidra and IDA. - * IDA and Ghidra have different naming conventions for various components of the disassembly such as - functions. IDA names functions ``sub_XXXXXX`` by default, while Ghidra names functions - ``FUN_00XXXXXX`` by default. - -If you **do** need to write disassembler specific code, you can check the ``.name`` attribute of the -disassembler. - -.. code-block:: python - - if dis.name == "IDA": - # do IDA specific thing - elif dis.name == "Ghidra": - # do Ghidra specific thing - else: - raise ValueError(f"{dis.name} disassembler is not supported.") - diff --git a/docs/examples/flowcharts.rst b/docs/examples/flowcharts.rst index 718e8cf..00b9d8e 100644 --- a/docs/examples/flowcharts.rst +++ b/docs/examples/flowcharts.rst @@ -9,17 +9,17 @@ A *Flowchart* is simply a collection of *BasicBlock* objects that can be obtaine A *BasicBlock* contains properties and methods for obtaining the start/end address, flow type, the lines within, as well as other basic blocks that come into or out of the block. -.. code: python +.. code:: python >>> flowchart = dis.get_flowchart(0x40100A) >>> print(flowchart) - + flowchart[0x00401000] >>> print("\n".join(map(str, flowchart.blocks))) - 0x00401003> - 0x0040100d> - 0x00401029> - 0x0040102b> + block[0x00401000 --> 0x00401003] + block[0x00401003 --> 0x0040100d] + block[0x0040100d --> 0x00401029] + block[0x00401029 --> 0x0040102b] >>> block = list(flowchart.blocks)[1] >>> print(hex(block.start)) @@ -29,19 +29,19 @@ flow type, the lines within, as well as other basic blocks that come into or out >>> print(block.flow_type) FlowType.conditional_jump >>> print("\n".join(map(str, block.lines()))) - > - > - > - > + 0x00401003: mov eax, [ebp+arg_0] + 0x00401006: movsx ecx, byte ptr [eax] + 0x00401009: test ecx, ecx + 0x0040100b: jz short loc_401029 >>> block2 = flowchart.get_block(block.start) >>> print(block == block2) True >>> print("\n".join(map(str, block.blocks_to))) - 0x00401003> - 0x00401029> + block[0x00401000 --> 0x00401003] + block[0x0040100d --> 0x00401029] >>> print("\n".join(map(str, block.blocks_from))) - 0x00401029> - 0x0040102b> + block[0x0040100d --> 0x00401029] + block[0x00401029 --> 0x0040102b] diff --git a/docs/examples/functions.rst b/docs/examples/functions.rst index 46c2b89..897b8ed 100644 --- a/docs/examples/functions.rst +++ b/docs/examples/functions.rst @@ -8,7 +8,47 @@ an easy way to interact with the various attributes of functions. >>> func = dis.get_function(0x40100A) >>> print(func) - + sub_401000() + >>> signature = func.signature + >>> print(signature) + _BYTE *__cdecl sub_401000(_BYTE *a1, char a2); + >>> print(signature.return_type) + byte * + >>> orig_type = signature.return_type + >>> signature.return_type = "int" + >>> print(signature) + INT __cdecl sub_401000(_BYTE *a1, char a2); + >>> signature.return_type = orig_type + >>> print(signature.calling_convention) + __cdecl + >>> for param in signature.parameters: + ... print(param) + stack[0x0]: _BYTE * a1 + stack[0x4]: char a2 + + >>> # Changing the calling convention also updates parameter locations. + >>> signature.calling_convention = "fastcall" + >>> for param in signature.parameters: + ... print(param) + ecx: _BYTE * a1 + dl: char a2 + >>> signature.calling_convention = "cdecl" + + >>> print(func.source_code) + _BYTE *__cdecl sub_401000(_BYTE *a1, char a2) + { + _BYTE *result; // eax + + while ( 1 ) + { + result = a1; + if ( !*a1 ) + break; + *a1++ ^= a2; + } + return result; + } + >>> print(hex(func.start)) 0x401000 @@ -18,7 +58,7 @@ an easy way to interact with the various attributes of functions. >>> # Give function a custom name. >>> func.name = "get_key" >>> print(func) - + get_key() >>> print(func.name) get_key diff --git a/docs/examples/index.rst b/docs/examples/index.rst index 1d96c70..af74389 100644 --- a/docs/examples/index.rst +++ b/docs/examples/index.rst @@ -12,4 +12,5 @@ Examples references flowcharts structures - segments \ No newline at end of file + segments + variables diff --git a/docs/examples/lines.rst b/docs/examples/lines.rst index 08e5fe8..84c2467 100644 --- a/docs/examples/lines.rst +++ b/docs/examples/lines.rst @@ -7,6 +7,8 @@ Lines can be either code or data. .. code:: python >>> line = dis.get_line(0x401014) + >>> print(line) + 0x00401014: movsx ecx, byte ptr [eax] >>> print(hex(line.address)) 0x401014 diff --git a/docs/examples/references.rst b/docs/examples/references.rst index c92b648..7086a78 100644 --- a/docs/examples/references.rst +++ b/docs/examples/references.rst @@ -8,25 +8,25 @@ in a disassembler. >>> refs = dis.references_to(0x401000) >>> for ref in refs: - ... print(f"{hex(ref.from_address)}, {ref.is_code}, {ref.is_data}") - 0x40103a, True, False - 0x401049, True, False - 0x401058, True, False - 0x401067, True, False - 0x401076, True, False - 0x401085, True, False - 0x401094, True, False - 0x4010a3, True, False - 0x4010b2, True, False - 0x4010c1, True, False - 0x4010d0, True, False - 0x4010df, True, False - 0x4010ee, True, False - 0x4010fd, True, False - 0x40110c, True, False - 0x40111b, True, False - 0x40112a, True, False - 0x401139, True, False + ... print(ref) + code_call: 0x0040103a --> 0x00401000 + code_call: 0x00401049 --> 0x00401000 + code_call: 0x00401058 --> 0x00401000 + code_call: 0x00401067 --> 0x00401000 + code_call: 0x00401076 --> 0x00401000 + code_call: 0x00401085 --> 0x00401000 + code_call: 0x00401094 --> 0x00401000 + code_call: 0x004010a3 --> 0x00401000 + code_call: 0x004010b2 --> 0x00401000 + code_call: 0x004010c1 --> 0x00401000 + code_call: 0x004010d0 --> 0x00401000 + code_call: 0x004010df --> 0x00401000 + code_call: 0x004010ee --> 0x00401000 + code_call: 0x004010fd --> 0x00401000 + code_call: 0x0040110c --> 0x00401000 + code_call: 0x0040111b --> 0x00401000 + code_call: 0x0040112a --> 0x00401000 + code_call: 0x00401139 --> 0x00401000 References can be obtained through the ``.references_to(address)`` and ``.references_from(address)`` functions found in the flat API. *Function* objects diff --git a/docs/examples/segments.rst b/docs/examples/segments.rst index 7df0efc..2b737a7 100644 --- a/docs/examples/segments.rst +++ b/docs/examples/segments.rst @@ -19,7 +19,7 @@ A *Memory* object for the underlying data can be obtained using the ``.open()`` >>> segment = dis.get_segment(".text") >>> print(segment) - 0x00409c00> + .text: 0x00401000 --> 0x00409c00 >>> segment.get_bytes(0x00401141, 4) b']\xc3\xcc\xcc' diff --git a/docs/examples/variables.rst b/docs/examples/variables.rst new file mode 100644 index 0000000..921efd7 --- /dev/null +++ b/docs/examples/variables.rst @@ -0,0 +1,49 @@ +Variables +========= + +A Dragodis *Variable* represents any global or stack based labeled data. +A variable can be pulled by address or from another object such as a function or operand. + +.. code:: python + + >>> var = dis.get_variable(0x40C000) + >>> print(var) + 0x0040c000: char aIdmmnVnsme + >>> print(hex(var.address)) + 0x40c000 + >>> print(var.name) + aIdmmnVnsme + >>> print(var.size) + 13 + >>> print(var.data_type) + char + >>> print(var.data_type.size) + 1 + + >>> insn = dis.get_instruction(0x401035) + >>> print(insn) + push offset aIdmmnVnsme; "Idmmn!Vnsme " + >>> print(insn.operands[0].variable) + 0x0040c000: char aIdmmnVnsme + + >>> func = dis.get_function(0x401030) + >>> for var in func.variables: + ... print(var) + 0x0040c000: char aIdmmnVnsme + 0x0040c010: char aVgqvQvpkleUkvj + 0x0040c02c: char aWkfRvjHAqltmEl + 0x0040c05c: char aKeoMwWpvkjcEjE + 0x0040c080: char aDflaGpwkvMjiVL + 0x0040c0a0: char aEgruGhbBiauCge + 0x0040c0c4: byte unk_40C0C4 + 0x0040c0f0: byte unk_40C0F0 + 0x0040c114: char asc_40C114 + 0x0040c120: char aQfbwfsqlFppb + 0x0040c130: char aTsudfs + 0x0040c138: byte unk_40C138 + 0x0040c140: byte unk_40C140 + 0x0040c15c: char aAkjdgbaKjgdbjk + 0x0040c174: byte unk_40C174 + 0x0040c19c: byte unk_40C19C + 0x0040c1c4: byte unk_40C1C4 + 0x0040c1f8: char aLmfoghknlmgfoh diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..c1cba0f --- /dev/null +++ b/docs/index.md @@ -0,0 +1,18 @@ + +```{include} ../README.md +``` + + +# Table of Contents + +```{toctree} +--- +maxdepth: 2 +--- + +install +examples/index +differences +API +changelog +``` diff --git a/docs/index.rst b/docs/index.rst deleted file mode 100644 index 9b82f93..0000000 --- a/docs/index.rst +++ /dev/null @@ -1,14 +0,0 @@ -.. include:: ../README.rst - - -Table of Contents -================= - -.. toctree:: - :maxdepth: 2 - - install - examples/index - differences - api - changelog diff --git a/docs/install.md b/docs/install.md new file mode 100644 index 0000000..8bdfa6c --- /dev/null +++ b/docs/install.md @@ -0,0 +1,41 @@ +# Installation + +Getting Dragodis set up is simple, but varies slightly depending on which +disassembler(s) you plan to use. + +First install dragodis like normal: + +```bash +pip install dragodis +``` + +Then follow one or more of the following instructions to setup your favorite disassembler. + +## IDA + +1. Download and install [IDA Pro 7.\*](https://www.hex-rays.com) with Python 3 mode. (Tested on version 7.4, 7.5, and 7.7) Make sure to run IDA at least once to accept the EULA. +2. Set the `IDA_INSTALL_DIR` environment variable to point to the directory where IDA is installed. (e.g. `C:\Program Files\IDA Pro 7.5`) +3. Dragodis uses [rpyc](https://rpyc.readthedocs.io/en/latest) to communicate with IDA. + This is installed automatically when you install Dragodis. However, if you are using a different python + environment than IDA, you can manually install the library in the IDA environment using the `--target` flag. + + ```bash + py -3.8 -m pip install rpyc --target="%IDA_INSTALL_DIR%\python\3" + ``` + +4. **WINDOWS**: If you are on Windows, you'll also need to install `pywin32` in the IDA interpreter. + + ```bash + py -3.8 -m pip install pywin32 --target="%IDA_INSTALL_DIR%\python\3" + ``` + +## Ghidra + +1. Download and install [Ghidra](https://ghidra-sre.org) to a desired location. +2. Set the `GHIDRA_INSTALL_DIR` environment variable to point to the directory where Ghidra is installed. + (e.g. `C:\Tools\ghidra_9.1.2_PUBLIC`) + +## Set Preferred Disassembler + +To set a preferred disassembler for when a script does not explicitly define one, set the `DRAGODIS_DISASSEMBLER` environment +variable to either `ida` or `ghidra`. diff --git a/docs/install.rst b/docs/install.rst deleted file mode 100644 index b482fe9..0000000 --- a/docs/install.rst +++ /dev/null @@ -1,54 +0,0 @@ -Installation -============ - -Getting Dragodis set up is simple, but varies slightly depending on which -disassembler(s) you plan to use. - -First install dragodis like normal: - -.. code-block:: bash - - pip install dragodis - - -Then follow one or more of the following instructions to setup your favorite disassembler. - - -IDA -*** - - #. Download and install `IDA Pro 7.* `_ with Python 3 mode. - (Tested on version 7.4, 7.5, and 7.7) Make sure to run IDA at least once to accept the EULA. - - #. Set the ``IDA_INSTALL_DIR`` environment variable to point to the directory where IDA is installed. - (e.g. ``C:\Program Files\IDA Pro 7.5``) - - #. Dragodis uses `rpyc `_ to communicate with IDA. - This is installed automatically when you install Dragodis. However, if you are using a different python - environment than IDA, you can manually install the library in the IDA environment using the ``--target`` flag. - - .. code-block:: bash - - py -3.7 -m pip install rpyc --target="%IDA_INSTALL_DIR%\python\3" - - #. **WINDOWS**: If you are on Windows, you'll also need to install ``pywin32`` in the IDA interpreter. - - .. code-block:: bash - - py -3.7 -m pip install pywin32 --target="%IDA_INSTALL_DIR%\python\3" - - -Ghidra -****** - - #. Download and install `Ghidra `_ to a desired location. - - #. Set the ``GHIDRA_INSTALL_DIR`` environment variable to point to the directory where Ghidra is installed. - (e.g. ``C:\Tools\ghidra_9.1.2_PUBLIC``) - - -Set Preferred Disassembler -************************** - -To set a preferred disassembler for when a script does not explicitly define one, set the ``DRAGODIS_DISASSEMBLER`` environment -variable to either ``ida`` or ``ghidra``. diff --git a/dragodis/ghidra/disassembler.py b/dragodis/ghidra/disassembler.py index 823e699..d1c2f47 100644 --- a/dragodis/ghidra/disassembler.py +++ b/dragodis/ghidra/disassembler.py @@ -4,8 +4,6 @@ import pathlib from typing import TYPE_CHECKING, Optional -import pyhidra - from dragodis import utils from dragodis.exceptions import NotInstalledError from dragodis.interface import BackendDisassembler @@ -90,6 +88,8 @@ def start(self): raise ValueError(f"Ghidra disassembler already running.") logger.debug(f"Starting pyhidra connection to {self.input_path}") + # Importing here since pyhidra requires environment variables to be setup during import. + import pyhidra self._bridge = pyhidra.open_program(self.input_path) self._flatapi = self._bridge.__enter__() self._program = self._flatapi.getCurrentProgram() diff --git a/dragodis/ghidra/flat.py b/dragodis/ghidra/flat.py index ca0d354..ac99a0e 100644 --- a/dragodis/ghidra/flat.py +++ b/dragodis/ghidra/flat.py @@ -22,6 +22,7 @@ if TYPE_CHECKING: from ghidra.program.model.address import Address + import ghidra.program.model.listing class Ghidra(FlatAPI, GhidraDisassembler): @@ -32,7 +33,10 @@ def _to_addr(self, addr: int) -> "Address": :raises NotExistError: If overflow error occurs. """ try: - return self._flatapi.toAddr(hex(addr)) + address = self._flatapi.toAddr(hex(addr)) + if address is not None: + return address + raise NotExistError(f"Invalid address {hex(addr)}") except OverflowError: raise NotExistError(f"Invalid address {hex(addr)}. Expect 32 bit integer, got {addr.bit_length()}") @@ -257,11 +261,11 @@ def imports(self) -> Iterable[GhidraImport]: def exports(self) -> Iterable[GhidraExport]: symbol_table = self._program.getSymbolTable() for address in symbol_table.getExternalEntryPointIterator(): - symbol = list(symbol_table.getUserSymbols(address))[0] + symbol = symbol_table.getPrimarySymbol(address) yield GhidraExport(self, symbol) @cached_property - def _static_functions(self) -> List["ghidra.program.database.function.FunctionDB"]: + def _static_functions(self) -> List[ghidra.program.model.listing.Function]: """ Obtains the static functions defined by the FID service. """ diff --git a/dragodis/ghidra/function.py b/dragodis/ghidra/function.py index 4520f97..73d25a5 100644 --- a/dragodis/ghidra/function.py +++ b/dragodis/ghidra/function.py @@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, Optional, Iterable from dragodis.ghidra.flowchart import GhidraFlowchart +from dragodis.ghidra.function_signature import GhidraFunctionSignature from dragodis.ghidra.instruction import GhidraInstruction from dragodis.ghidra.reference import GhidraReference from dragodis.ghidra.stack import GhidraStackFrame @@ -28,10 +29,6 @@ def __contains__(self, addr: int) -> bool: def _body(self) -> "ghidra.program.model.address.AddressSetView": return self._function.getBody() - @property - def _source_type(self) -> "ghidra.program.model.symbol.SourceType": - return self._function.getSignatureSource() - @property def start(self) -> int: return self._function.getEntryPoint().getOffset() @@ -60,7 +57,8 @@ def name(self) -> str: @name.setter def name(self, value: Optional[str]): - self._function.setName(value, self._source_type) + from ghidra.program.model.symbol import SourceType + self._function.setName(value, SourceType.USER_DEFINED) def set_comment(self, comment: Optional[str], comment_type=CommentType.plate): if comment_type in (CommentType.anterior, CommentType.plate): @@ -81,6 +79,10 @@ def source_code(self) -> Optional[str]: def stack_frame(self) -> GhidraStackFrame: return GhidraStackFrame(self._ghidra, self._function.getStackFrame()) + @property + def signature(self) -> GhidraFunctionSignature: + return GhidraFunctionSignature(self._ghidra, self._function) + @property def is_library(self) -> bool: return self._function in self._ghidra._static_functions diff --git a/dragodis/ghidra/function_signature.py b/dragodis/ghidra/function_signature.py index a83b4fe..c109de4 100644 --- a/dragodis/ghidra/function_signature.py +++ b/dragodis/ghidra/function_signature.py @@ -30,6 +30,31 @@ def declaration(self) -> str: # Including calling convention to be consistent with IDA. return self._function.getSignature().getPrototypeString(True) + @property + def calling_convention(self) -> str: + return self._function.getCallingConvention().getName() + + @calling_convention.setter + def calling_convention(self, name: str): + if not name.startswith("__"): + name = f"__{name}" + from ghidra.util.exception import InvalidInputException + try: + self._function.setCallingConvention(name.lower()) + except InvalidInputException as e: + raise ValueError(e) + + @property + def return_type(self) -> GhidraDataType: + return GhidraDataType(self._function.getReturnType()) + + @return_type.setter + def return_type(self, data_type: Union[GhidraDataType, str]): + from ghidra.program.model.symbol import SourceType + if isinstance(data_type, str): + data_type = self._ghidra.get_data_type(data_type) + self._function.setReturnType(data_type._data_type, SourceType.USER_DEFINED) + @property def parameters(self) -> List[FunctionParameter]: return [ diff --git a/dragodis/ghidra/instruction.py b/dragodis/ghidra/instruction.py index d666713..f92a5c5 100644 --- a/dragodis/ghidra/instruction.py +++ b/dragodis/ghidra/instruction.py @@ -1,4 +1,5 @@ from __future__ import annotations +import logging from typing import List, TYPE_CHECKING from dragodis.exceptions import NotExistError @@ -12,6 +13,9 @@ import ghidra +logger = logging.getLogger(__name__) + + class GhidraInstruction(Instruction): _Operand = GhidraOperand @@ -71,6 +75,11 @@ def root_mnemonic(self) -> str: @property def operands(self) -> List[GhidraOperand]: + # HACK: Ignore if operands are implied based on mnemonic. + # (This is done to better match how IDA does it.) + if "ES:" in self.text: + logger.debug(f"Ignoring implied operands at 0x%X: %s", self.address, self.text) + return [] return [ self._Operand(self._ghidra, self, index) for index in range(self._instruction.getNumOperands()) @@ -93,13 +102,25 @@ def stack_depth(self) -> int: @property def stack_delta(self) -> int: - from ghidra.app.cmd.function import CallDepthChangeInfo - addr = self._instruction.getAddress() - func = self._ghidra._listing.getFunctionContaining(addr) - info = CallDepthChangeInfo(func) - delta = info.getInstructionStackDepthChange(self._instruction) - if delta == func.UNKNOWN_STACK_DEPTH_CHANGE: - delta = 0 + delta = 0 + flowType = self._instruction.getFlowType() + if flowType.isCall() and flowType.isUnConditional(): + # the delta we are looking for is actually stored in the function and not the instruction + flows = self._instruction.getFlows() + if len(flows) == 1: + func = self._ghidra._flatapi.getFunctionAt(flows[0]) + delta = func.getStackPurgeSize() + else: + logger.warn(f'unexpected number of flows: {flows}') + else: + from ghidra.app.cmd.function import CallDepthChangeInfo + addr = self._instruction.getAddress() + func = self._ghidra._listing.getFunctionContaining(addr) + info = CallDepthChangeInfo(func) + delta = info.getInstructionStackDepthChange(self._instruction) + if delta == func.UNKNOWN_STACK_DEPTH_CHANGE: + delta = 0 + logger.debug(f'stack delta at {self._instruction.getAddress()}: {delta}') return delta @@ -110,6 +131,23 @@ class GhidraARMInstruction(GhidraInstruction, ARMInstruction): class Ghidrax86Instruction(GhidraInstruction, x86Instruction): _Operand = Ghidrax86Operand + @property + def mnemonic(self) -> str: + mnemonic = super().mnemonic + # Strip off .rep* prefix if there. + mnemonic, _, _ = mnemonic.partition(".rep") + return mnemonic + + @property + def rep(self) -> Optional[str]: + if self.data[0] in (0xF2, 0xF3): + mnemonic = str(self._instruction.getMnemonicString()).lower() + if ".rep" not in mnemonic: + raise AssertionError(f"Expected .rep suffix instruction at 0x{self.address:08x}") + _, _, rep = mnemonic.partition(".") + return rep + return None + GhidraInstruction._ARMInstruction = GhidraARMInstruction GhidraInstruction._x86Instruction = Ghidrax86Instruction diff --git a/dragodis/ida/data_type.py b/dragodis/ida/data_type.py index cdd1dc6..ede6b5b 100644 --- a/dragodis/ida/data_type.py +++ b/dragodis/ida/data_type.py @@ -1,8 +1,5 @@ from __future__ import annotations - -# from functools import cached_property -cached_property = property # FIXME: cached property disabled for now. from typing import TYPE_CHECKING from dragodis.interface.data_type import DataType @@ -26,7 +23,7 @@ def __init__( self._tinfo = tinfo self._address = address - @cached_property + @property def name(self) -> str: if self._tinfo: return str(self._tinfo).lower().strip("_") @@ -49,7 +46,7 @@ def name(self) -> str: flags &= self._ida._ida_bytes.DT_TYPE return TYPE_MAP[flags] - @cached_property + @property def size(self) -> int: if self._tinfo: return self._tinfo.get_size() diff --git a/dragodis/ida/flat.py b/dragodis/ida/flat.py index 1a2ce77..cbc9423 100644 --- a/dragodis/ida/flat.py +++ b/dragodis/ida/flat.py @@ -1,11 +1,10 @@ from __future__ import annotations -from functools import cached_property, lru_cache +from functools import lru_cache from .. import utils -cached_property = property # FIXME: cached property disabled for now. from typing import Iterable, Union from dragodis.interface.flat import FlatAPI @@ -29,14 +28,14 @@ class IDA(FlatAPI, IDADisassembler): - @cached_property + @property def _cached_memory(self): return CachedMemory(self) def _bytes_loaded(self, addr: int, num_bytes: int) -> bool: return self._ida_helpers.is_loaded(addr, num_bytes) - @cached_property + @property def bit_size(self) -> int: # IDA 7.6 adds ida_ida.inf_get_app_bitness() if self._idaapi.IDA_SDK_VERSION >= 760: @@ -49,7 +48,7 @@ def bit_size(self) -> int: else: return 16 - @cached_property + @property def is_big_endian(self) -> bool: return self._ida_ida.inf_is_be() @@ -233,7 +232,7 @@ def min_address(self) -> int: def open_memory(self, start: int, end: int) -> IDAMemory: return IDAMemory(self, start, end) - @cached_property + @property def processor_name(self) -> str: proc = self._ida_ida.inf_get_procname() # Switching "metapc" to "x86" to match Ghidra. diff --git a/dragodis/ida/flowchart.py b/dragodis/ida/flowchart.py index 95284a8..3b3ad7e 100644 --- a/dragodis/ida/flowchart.py +++ b/dragodis/ida/flowchart.py @@ -1,11 +1,7 @@ from __future__ import annotations - -from functools import cached_property -cached_property = property # FIXME: cached property disabled for now. from typing import Iterable, TYPE_CHECKING -from dragodis.ida.line import IDALine from dragodis.interface import Flowchart, BasicBlock, FlowType if TYPE_CHECKING: @@ -20,15 +16,15 @@ def __init__(self, ida: IDA, block: "ida_gdl.BasicBlock"): self._ida = ida self._block = block - @cached_property + @property def start(self) -> int: return self._block.start_ea - @cached_property + @property def end(self) -> int: return self._block.end_ea - @cached_property + @property def flow_type(self) -> FlowType: # IDA leaves self._block.type much to be desired, # so we'll just look at the last instruction instead. @@ -36,7 +32,7 @@ def flow_type(self) -> FlowType: return line.instruction.flow_type raise ValueError(f"Block at {hex(self.start)} has no instructions.") - @cached_property + @property def flowchart(self) -> "IDAFlowchart": return IDAFlowchart(self._ida, self._block._fc) diff --git a/dragodis/ida/function.py b/dragodis/ida/function.py index 8f228b8..69ca5ca 100644 --- a/dragodis/ida/function.py +++ b/dragodis/ida/function.py @@ -1,10 +1,5 @@ from __future__ import annotations - -from functools import cached_property -cached_property = property # FIXME: cached property disabled for now. - import logging -import re from typing import Optional, TYPE_CHECKING from dragodis.exceptions import * @@ -14,8 +9,6 @@ if TYPE_CHECKING: import ida_funcs - import ida_hexrays - import ida_typeinf from dragodis.ida.flat import IDA @@ -45,15 +38,15 @@ def __contains__(self, addr: int) -> bool: # return func is self # TODO: Should be able to do this when reuse caching is on return func.start == self.start - @cached_property + @property def start(self) -> int: return self._func_t.start_ea - @cached_property + @property def end(self) -> int: return self._func_t.end_ea - @cached_property + @property def flowchart(self) -> IDAFlowchart: return IDAFlowchart(self._ida, self._ida._ida_gdl.FlowChart(self._func_t)) @@ -93,7 +86,7 @@ def set_comment(self, comment: str, comment_type=CommentType.anterior): else: raise ValueError(f"Invalid comment type for function: {repr(comment_type)}") - @cached_property + @property def source_code(self) -> Optional[str]: decompiled_code = self._ida._ida_helpers.decompiled_code(self.start) if decompiled_code: diff --git a/dragodis/ida/function_argument_location.py b/dragodis/ida/function_argument_location.py index ec3c941..8d35c71 100644 --- a/dragodis/ida/function_argument_location.py +++ b/dragodis/ida/function_argument_location.py @@ -1,9 +1,5 @@ from __future__ import annotations - -from functools import cached_property -cached_property = property # FIXME: cached property disabled for now. - from typing import Tuple, TYPE_CHECKING from dragodis.ida.operand_value import IDARegister @@ -27,21 +23,21 @@ def __init__(self, ida: IDA, argloc: "ida_typeinf.argloc_t", size: int): class IDAStackLocation(StackLocation, IDAArgumentLocation): - @cached_property + @property def stack_offset(self) -> int: return self._argloc.stkoff() class IDARegisterLocation(RegisterLocation, IDAArgumentLocation): - @cached_property + @property def register(self) -> IDARegister: return IDARegister(self._ida, self._argloc.reg1(), self._size) class IDARegisterPairLocation(RegisterPairLocation, IDAArgumentLocation): - @cached_property + @property def registers(self) -> Tuple[IDARegister, IDARegister]: # Size is the combination of both registers. size = self._size // 2 @@ -53,12 +49,12 @@ def registers(self) -> Tuple[IDARegister, IDARegister]: class IDARelativeRegisterLocation(RelativeRegisterLocation, IDARegisterLocation): - @cached_property + @property def register(self) -> IDARegister: rrel = self._argloc.get_rrel() return IDARegister(self._ida, rrel.reg, self._size) - @cached_property + @property def offset(self) -> int: rrel = self._argloc.get_rrel() return rrel.off @@ -66,7 +62,7 @@ def offset(self) -> int: class IDAStaticLocation(StaticLocation, IDAArgumentLocation): - @cached_property + @property def address(self) -> int: return self._argloc.get_ea() diff --git a/dragodis/ida/function_signature.py b/dragodis/ida/function_signature.py index 96492c1..530ad48 100644 --- a/dragodis/ida/function_signature.py +++ b/dragodis/ida/function_signature.py @@ -2,18 +2,15 @@ from __future__ import annotations import logging import re -from typing import TYPE_CHECKING, List, Union, Optional, Tuple +from typing import TYPE_CHECKING, List, Union, Optional from dragodis.ida.data_type import IDADataType from dragodis.ida.function_argument_location import ( IDAArgumentLocation, IDAStaticLocation, IDARegisterLocation, IDAStackLocation, IDARegisterPairLocation, IDARelativeRegisterLocation ) -from dragodis.ida.operand import IDAOperand from dragodis.interface.function_signature import FunctionSignature, FunctionParameter from dragodis.exceptions import NotExistError -from dragodis.utils import cached_property -cached_property = property # FIXME: cached property disabled for now. if TYPE_CHECKING: import ida_typeinf @@ -27,6 +24,21 @@ class IDAFunctionSignature(FunctionSignature): # Caches function types. _func_types = set() + # pulled from ida_typeinf.CM_CC_* constants + _cc_map = { + 0x10: "__unknown", + 0x20: "__voidarg", + 0x30: "__cdecl", + 0x40: "__ellipsis", + 0x50: "__stdcall", + 0x60: "__Pascal", + 0x70: "__fastcall", + 0x80: "__thiscall", + 0xB0: "__golang", + 0xF0: "__usercall", + } + _cc_map_inv = {name: opcode for opcode, name in _cc_map.items()} + def __init__(self, ida: IDA, address: int): self._address = address self._ida = ida @@ -64,6 +76,40 @@ def declaration(self) -> str: # function typing to still work. return re.sub(r'\(', f' {self.name or "no_name"}(', f'{str(self._tif)};') + @property + def calling_convention(self) -> str: + cc = self._func_type_data.cc & self._ida._ida_typeinf.CM_CC_MASK + try: + return self._cc_map[cc] + except KeyError: + raise RuntimeError(f"{self} has unexpected calling convention: {hex(cc)}") + + @calling_convention.setter + def calling_convention(self, name: str): + if not name.startswith("__"): + name = f"__{name}" + name = name.lower() + try: + cc = self._cc_map_inv[name] + except KeyError: + raise ValueError(f"Invalid calling convention name: {name}") + # Set calling convention part of cm_t flags. + cc |= self._func_type_data.cc & (self._ida._ida_typeinf.CM_CC_MASK ^ 0xff) + self._func_type_data.cc = cc + self._apply() + self._parameters = None + + @property + def return_type(self) -> IDADataType: + return IDADataType(self._ida, self._func_type_data.rettype) + + @return_type.setter + def return_type(self, data_type: Union[IDADataType, str]): + if isinstance(data_type, str): + data_type = self._ida.get_data_type(data_type) + self._func_type_data.rettype = data_type._tinfo + self._apply() + @property def parameters(self) -> List[IDAFunctionParameter]: if self._parameters is None: @@ -203,7 +249,7 @@ def ordinal(self) -> int: except ValueError: raise NotExistError(f"Parameter has been removed from function signature.") - @cached_property + @property def data_type(self) -> IDADataType: return IDADataType(self._ida, self._funcarg.type) diff --git a/dragodis/ida/instruction.py b/dragodis/ida/instruction.py index 8a1d2c9..540a43a 100644 --- a/dragodis/ida/instruction.py +++ b/dragodis/ida/instruction.py @@ -1,5 +1,5 @@ -from __future__ import annotations +from __future__ import annotations from typing import List, TYPE_CHECKING from dragodis.exceptions import NotExistError @@ -8,17 +8,10 @@ from dragodis.interface.instruction import ( Instruction, x86Instruction, ARMInstruction, ARMConditionCode ) -from dragodis.utils import cached_property -cached_property = property # FIXME: cached property disabled for now. if TYPE_CHECKING: from dragodis.ida.flat import IDA -# Used for typing. -# noinspection PyUnreachableCode -if False: - import ida_ua - # TODO: Perhaps have a local helper utility that pulls and caches all the instruction # objects for the function, ready to be accessed? @@ -38,7 +31,7 @@ def __init__(self, ida: IDA, addr: int): def address(self): return self._addr - @cached_property + @property def flow_type(self) -> FlowType: if self._ida._ida_idp.is_call_insn(self._insn_t): return FlowType.call @@ -58,30 +51,30 @@ def flow_type(self) -> FlowType: else: return FlowType.unconditional_jump - @cached_property + @property def mnemonic(self) -> str: return (self._ida._ida_ua.ua_mnem(self.address) or "").lower() - @cached_property + @property def root_mnemonic(self) -> str: return self._insn_t.get_canon_mnem().lower() - @cached_property + @property def operands(self) -> List[IDAOperand]: return [ self._Operand(self, self._ida, self.address, index, op) for index, op in self._ida._ida_helpers.get_operands(self.address) ] - @cached_property + @property def text(self) -> str: return self._ida._idc.GetDisasm(self.address) - @cached_property + @property def stack_depth(self) -> int: return self._ida._idc.get_spd(self.address) - @cached_property + @property def stack_delta(self) -> int: # NOTE: IDA gives the delta in relation to the previous instruction, # but we want the delta that this instructions applies. @@ -94,20 +87,30 @@ def stack_delta(self) -> int: class IDAx86Instruction(IDAInstruction, x86Instruction): _Operand = IDAx86Operand + @property + def rep(self) -> Optional[str]: + if self.data[0] in (0xF2, 0xF3): + text = self.text.lower() + if not text.startswith("rep"): + raise AssertionError(f"Expected instruction to start with rep: {text}") + rep, _, _ = text.partition(" ") + return rep + return None + class IDAARMInstruction(IDAInstruction, ARMInstruction): _Operand = IDAARMOperand - @cached_property + @property def update_flags(self) -> bool: return bool(self._insn_t.auxpref & self._ida._ida_arm.aux_cond) - @cached_property + @property def condition_code(self) -> ARMConditionCode: condition = self._ida._ida_arm.get_cond(self._insn_t) return ARMConditionCode(condition) - @cached_property + @property def writeback(self) -> bool: return bool( self._insn_t.auxpref & ( @@ -118,11 +121,11 @@ def writeback(self) -> bool: or self.mnemonic in ("push", "pop") ) - @cached_property + @property def pre_indexed(self) -> bool: return self.writeback and not self._insn_t.auxpref & self._ida._ida_arm.aux_postidx - @cached_property + @property def post_indexed(self) -> bool: return self.writeback and bool(self._insn_t.auxpref & self._ida._ida_arm.aux_postidx) diff --git a/dragodis/ida/line.py b/dragodis/ida/line.py index 35434e9..93bd3d9 100644 --- a/dragodis/ida/line.py +++ b/dragodis/ida/line.py @@ -23,12 +23,13 @@ class IDALine(Line): def __init__(self, ida: IDA, addr: int): super().__init__(ida) self._ida = ida + if addr < 0: + raise NotExistError(f"Got negative address: {addr}") # IDA has no concept of a "line", so we'll keep track of the start address, # which IDA refers to as the "head". start_addr = self._ida._ida_bytes.get_item_head(addr) - # If ida returns the unsigned value of -1 for some bit length - if (start_addr - (1 << start_addr.bit_length())) == -1: - raise NotExistError(f"Line at {hex(addr)} does not exist") + if start_addr == self._ida._BADADDR: + raise NotExistError(f"Line at {hex(addr)} does not exist.") self._addr = start_addr self._name = None diff --git a/dragodis/ida/memory.py b/dragodis/ida/memory.py index 89ff465..1be1762 100644 --- a/dragodis/ida/memory.py +++ b/dragodis/ida/memory.py @@ -117,7 +117,6 @@ def read(self, size: int = None) -> bytes: return b"" address = self.start + self._offset - # data = self._ida._ida_helpers.get_bytes(address, size) data = self._cache.get(address, size) self._offset += len(data) diff --git a/dragodis/ida/operand.py b/dragodis/ida/operand.py index 0abd2b6..bbb0ddf 100644 --- a/dragodis/ida/operand.py +++ b/dragodis/ida/operand.py @@ -1,5 +1,5 @@ -from __future__ import annotations +from __future__ import annotations from typing import Optional, TYPE_CHECKING, Union, Tuple from dragodis.exceptions import NotExistError @@ -12,17 +12,12 @@ IDAARMPhrase, IDAx86Phrase, IDARegisterList, ) from dragodis.interface.types import ARMShiftType -from dragodis.utils import cached_property if TYPE_CHECKING: + import ida_ua from dragodis.ida.flat import IDA from dragodis.ida.instruction import IDAInstruction -# Used for typing. -# noinspection PyUnreachableCode -if False: - import ida_ua - # TODO: Cache this operand to return same object for same address. class IDAOperand(Operand): @@ -53,23 +48,24 @@ def address(self) -> int: def index(self) -> int: return self._index - @cached_property + @property def text(self) -> str: # Get operand text and then remove the color tags. # (Doing same thing as idc.print_operand()) text = self._ida._ida_ua.print_operand(self.address, self.index) return self._ida._ida_lines.tag_remove(text) - @cached_property + @property def type(self) -> OperandType: - # Equivalent to idc.get_operand_type(), but using already cached op_t object. - op_type = self._op_t.type + # NOTE: Getting operand type using get_operand_type() instead of _op_t.type because + # there are strange issues where `_op_t.type` will sometimes give us something completely wrong. + op_type = self._ida._idc.get_operand_type(self._addr, self._index) try: return self._type_map[op_type] except KeyError: raise RuntimeError(f"Unexpected operand type: {op_type}") - @cached_property + @property def value(self) -> OperandValue: operand_type = self.type # TODO: Should we be recording both .value and .addr in the MemoryReference object? @@ -78,17 +74,21 @@ def value(self) -> OperandValue: elif operand_type == OperandType.register: # o_reg return IDARegister(self._ida, self._op_t.reg, self.width) elif operand_type == OperandType.immediate: # o_imm - return IDAImmediate(self._op_t.value) + value = self._op_t.value + # Need to mask off the value based on width since IDA will sometimes + # include ff bytes in the front causing the value to be incorrect. + value &= (1 << (8 * self.width)) - 1 + return IDAImmediate(value) # Architecture specific operands types like phrase should be handled by the # appropriate subclass. raise ValueError(f"Invalid operand type: {operand_type!r} @ {hex(self.address)}:{self.index}") - @cached_property + @property def width(self) -> int: return self._ida._ida_ua.get_dtype_size(self._op_t.dtype) - @cached_property + @property def variable(self) -> Optional[IDAVariable]: value = self.value if isinstance(value, Phrase): @@ -120,7 +120,7 @@ class IDAARMOperand(IDAOperand, ARMOperand): 9: OperandType.register_list, # ida_arm.o_reglist } - @cached_property + @property def shift(self) -> Tuple[ARMShiftType, Union[int, IDARegister]]: if self._op_t.type == self._ida._ida_ua.o_phrase: # For a phrase, shift count is in op.value @@ -137,7 +137,7 @@ def shift(self) -> Tuple[ARMShiftType, Union[int, IDARegister]]: return ARMShiftType.LSL, 0 # not shifted - @cached_property + @property def value(self) -> OperandValue: # Get value for ARM specific types. operand_type = self.type @@ -159,7 +159,7 @@ def value(self) -> OperandValue: class IDAx86Operand(IDAOperand, x86Operand): - @cached_property + @property def type(self) -> OperandType: # For x86, there is a weird corner case, where we could have something like # dword_40DC20[eax*4], which really is closer to a phrase, without @@ -176,7 +176,7 @@ def type(self) -> OperandType: pass return type - @cached_property + @property def value(self) -> OperandValue: # Get value for x86 specific types. operand_type = self.type diff --git a/dragodis/ida/operand_value.py b/dragodis/ida/operand_value.py index c996838..97ef41c 100644 --- a/dragodis/ida/operand_value.py +++ b/dragodis/ida/operand_value.py @@ -1,14 +1,11 @@ from __future__ import annotations - -from typing import TYPE_CHECKING, Union, List, Optional +from typing import TYPE_CHECKING, Union, Optional from dragodis.interface.operand_value import ( OperandValue, Immediate, MemoryReference, Register, RegisterList, Phrase, ) -from dragodis.utils import cached_property -cached_property = property # FIXME: cached property disabled for now. if TYPE_CHECKING: import ida_ua @@ -40,11 +37,11 @@ def __eq__(self, other: "IDARegister"): return self._reg == other._reg and self._width == other._width return False - @cached_property + @property def bit_width(self) -> int: return self._width * 8 - @cached_property + @property def name(self) -> str: return self._ida._ida_idp.get_reg_name(self._reg, self._width).lower() @@ -67,7 +64,7 @@ def __init__(self, ida: IDA, insn_t: "ida_ua.insn_t", op_t: "ida_ua.op_t"): self._op_t = op_t self._width = ida.bit_size // 8 - @cached_property + @property def base(self) -> IDARegister: """ The base register @@ -89,7 +86,7 @@ def scale(self) -> int: """ return 1 - @cached_property + @property def offset(self) -> Union[IDARegister, int]: """ The offset or displacement. @@ -130,7 +127,7 @@ def __init__(self, ida: IDA, insn_t: "ida_ua.insn_t", op_t: "ida_ua.op_t"): self._op_t = op_t self._width = ida.bit_size // 8 - @cached_property + @property def base(self) -> Optional[IDARegister]: """ The base register. @@ -142,7 +139,7 @@ def base(self) -> Optional[IDARegister]: return None return IDARegister(self._ida, base_reg, self._width) - @cached_property + @property def index(self) -> Optional[IDARegister]: """ The index register @@ -154,7 +151,7 @@ def index(self) -> Optional[IDARegister]: return None return IDARegister(self._ida, index_reg, self._width) - @cached_property + @property def scale(self) -> int: """ The scaling factor for the index. @@ -165,7 +162,7 @@ def scale(self) -> int: """ return 1 << self._ida._ida_intel.sib_scale(self._op_t) - @cached_property + @property def offset(self) -> int: """ The offset or displacement. diff --git a/dragodis/ida/reference.py b/dragodis/ida/reference.py index 43f7994..e3b0062 100644 --- a/dragodis/ida/reference.py +++ b/dragodis/ida/reference.py @@ -2,21 +2,13 @@ Interface for cross references. """ from __future__ import annotations - -from functools import cached_property -cached_property = property # FIXME: cached property disabled for now. - from typing import TYPE_CHECKING from dragodis.interface import Reference, ReferenceType if TYPE_CHECKING: - from dragodis.ida.flat import IDA - -# Used for typing -# noinspection PyUnreachableCode -if False: import ida_xref + from dragodis.ida.flat import IDA # noinspection PyPropertyAccess @@ -42,23 +34,23 @@ def __init__(self, ida: IDA, xref: "ida_xref.xrefblk_t"): self._ida = ida self._xref = xref - @cached_property + @property def from_address(self) -> int: return self._xref.frm - @cached_property + @property def is_code(self) -> bool: return bool(self._xref.iscode) - @cached_property + @property def is_data(self) -> bool: return self.type.name.startswith("data") # TODO: confirm - @cached_property + @property def to_address(self) -> int: return self._xref.to - @cached_property + @property def type(self) -> ReferenceType: try: return self._type_map[self._xref.type] diff --git a/dragodis/ida/sdk/ida_helpers.py b/dragodis/ida/sdk/ida_helpers.py index 679bef4..21e6977 100644 --- a/dragodis/ida/sdk/ida_helpers.py +++ b/dragodis/ida/sdk/ida_helpers.py @@ -321,7 +321,11 @@ def decompiled_code(address: int, _visited=None) -> Optional[ida_hexrays.cfuncpt logger.debug("Unable to load Hexrays decompiler.") return None fail_obj = ida_hexrays.hexrays_failure_t() - code = ida_hexrays.decompile(address, fail_obj) + try: + code = ida_hexrays.decompile(address, fail_obj) + except ida_hexrays.DecompilationFailure as e: + logger.warning(f"Failed to decompile function: {e}") + return None if code and not fail_obj.code: return code diff --git a/dragodis/ida/segment.py b/dragodis/ida/segment.py index a6b6e4e..eb2aa8e 100644 --- a/dragodis/ida/segment.py +++ b/dragodis/ida/segment.py @@ -1,14 +1,10 @@ from __future__ import annotations - from typing import TYPE_CHECKING, Iterable -from dragodis.exceptions import UnsupportedError from dragodis.ida.line import IDALine from dragodis.ida.memory import IDAMemory -from dragodis.interface import Segment, SegmentType, SegmentPermission -from dragodis.utils import cached_property -cached_property = property # FIXME: cached property disabled for now. +from dragodis.interface import Segment, SegmentPermission if TYPE_CHECKING: import ida_segment @@ -22,15 +18,15 @@ def __init__(self, ida: IDA, segment_t: "ida_segment.segment_t"): self._segment_t = segment_t self._end = None # caching for end address. - @cached_property + @property def name(self) -> str: return self._ida._ida_segment.get_segm_name(self._segment_t) - @cached_property + @property def start(self) -> int: return self._segment_t.start_ea - @cached_property + @property def end(self) -> int: if self._end is None: # Exclude any overlay of uninitialized bytes from the segment. @@ -43,11 +39,11 @@ def end(self) -> int: self._end = end return self._end - @cached_property + @property def bit_size(self) -> int: return self._segment_t.abits() - @cached_property + @property def permissions(self) -> SegmentPermission: perm = self._segment_t.perm ret = SegmentPermission(0) diff --git a/dragodis/interface/__init__.py b/dragodis/interface/__init__.py index 87df5e3..a2b11d3 100644 --- a/dragodis/interface/__init__.py +++ b/dragodis/interface/__init__.py @@ -19,5 +19,6 @@ from .reference import Reference from .segment import Segment from .stack import StackFrame +from .symbol import Symbol from .types import * from .variable import * diff --git a/dragodis/interface/data_type.py b/dragodis/interface/data_type.py index 8b00af5..cc3de2a 100644 --- a/dragodis/interface/data_type.py +++ b/dragodis/interface/data_type.py @@ -10,6 +10,12 @@ class DataType(metaclass=abc.ABCMeta): def __str__(self) -> str: return self.name + def __repr__(self) -> str: + return f"" + + def __eq__(self, other): + return isinstance(other, DataType) and self.name == other.name and self.size == other.size + @property def name(self) -> str: """ diff --git a/dragodis/interface/flat.py b/dragodis/interface/flat.py index 01c59d9..f823f97 100644 --- a/dragodis/interface/flat.py +++ b/dragodis/interface/flat.py @@ -41,6 +41,12 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # forward to Disassembler class self.__capstone_dis = None + def __repr__(self): + return ( + f"" + ) + @property def _capstone_dis(self) -> capstone.Cs: """ diff --git a/dragodis/interface/flowchart.py b/dragodis/interface/flowchart.py index 2ce1421..27ac85a 100644 --- a/dragodis/interface/flowchart.py +++ b/dragodis/interface/flowchart.py @@ -16,8 +16,11 @@ class BasicBlock(metaclass=abc.ABCMeta): def __init__(self, api: FlatAPI): self._api = api - def __repr__(self): - return f" 0x{self.end:08x}>" + def __str__(self) -> str: + return f"block[0x{self.start:08x} --> 0x{self.end:08x}]" + + def __repr__(self) -> str: + return f" 0x{self.end:08x}>" def __hash__(self): return hash(self.start) @@ -161,11 +164,15 @@ def __eq__(self, other): return False return all(b1 == b2 for b1, b2 in zip(self.blocks, other.blocks)) - def __repr__(self): + def __str__(self) -> str: # Get first block to indicate address. start = self.start start = f"0x{start:08x}" if start is not None else "Empty" - return f"" + return f"flowchart[{start}]" + + def __repr__(self): + blocks = "\t\n".join(map(repr, self.blocks)) + return f"" # TODO: This function should enforce that the order of the blocks is by address? # I.E. Whatever the order IDA does. diff --git a/dragodis/interface/function.py b/dragodis/interface/function.py index 49acde8..9b65cee 100644 --- a/dragodis/interface/function.py +++ b/dragodis/interface/function.py @@ -9,7 +9,7 @@ from dragodis.interface.types import ReferenceType if TYPE_CHECKING: - from dragodis.interface import Reference, Instruction, Flowchart, FlatAPI + from dragodis.interface import Reference, Instruction, Flowchart, FlatAPI, Variable from dragodis.interface.line import CommentType, Line @@ -29,12 +29,11 @@ def __contains__(self, addr: int) -> bool: """ return self.start <= addr < self.end + def __str__(self) -> str: + return f"{self.name}()" + def __repr__(self): - return ( - f"" - ) + return f"" @property @abc.abstractmethod @@ -82,6 +81,9 @@ def lines(self, start: int = None, end: int = None, reverse=False) -> Iterable[L NOTE: This is BFS using the flowchart. If you need something simpler you can use .lines() directly: + + .. code:: python + lines = dis.lines(func.start, func.end) """ for line in self.flowchart.lines(start, reverse=reverse): @@ -98,6 +100,14 @@ def instructions(self, start: int = None, end: int = None, reverse=False) -> Ite if insn: yield insn + @property + def variables(self) -> Iterable[Variable]: + """ + Iterates the variables in the function. + """ + for insn in self.instructions(): + yield from insn.variables + @property @abc.abstractmethod def name(self) -> str: diff --git a/dragodis/interface/function_argument_location.py b/dragodis/interface/function_argument_location.py index 418eaef..9f7979f 100644 --- a/dragodis/interface/function_argument_location.py +++ b/dragodis/interface/function_argument_location.py @@ -13,6 +13,12 @@ class ArgumentLocation(metaclass=abc.ABCMeta): class StackLocation(ArgumentLocation): + def __str__(self) -> str: + return f"stack[0x{self.stack_offset:x}]" + + def __repr__(self) -> str: + return f"" + @property @abc.abstractmethod def stack_offset(self) -> int: @@ -28,6 +34,12 @@ def stack_offset(self) -> int: class RegisterLocation(ArgumentLocation): + def __str__(self) -> str: + return self.register.name + + def __repr__(self) -> str: + return f"" + @property @abc.abstractmethod def register(self) -> Register: @@ -38,6 +50,12 @@ def register(self) -> Register: class RegisterPairLocation(ArgumentLocation): + def __str__(self) -> str: + return f"({', '.join(reg.name for reg in self.registers)})" + + def __repr__(self) -> str: + return f"" + @property @abc.abstractmethod def registers(self) -> Tuple[Register, Register]: @@ -48,6 +66,12 @@ def registers(self) -> Tuple[Register, Register]: class RelativeRegisterLocation(RegisterLocation): + def __str__(self) -> str: + return f"{self.register.name}[0x{self.offset:x}]" + + def __repr__(self) -> str: + return f"" + @property @abc.abstractmethod def offset(self) -> int: @@ -58,6 +82,12 @@ def offset(self) -> int: class StaticLocation(ArgumentLocation): + def __str__(self) -> str: + return f"0x{self.address:08x}" + + def __repr__(self) -> str: + return f"" + @property @abc.abstractmethod def address(self) -> int: diff --git a/dragodis/interface/function_signature.py b/dragodis/interface/function_signature.py index 561c503..8b6ba26 100644 --- a/dragodis/interface/function_signature.py +++ b/dragodis/interface/function_signature.py @@ -8,12 +8,17 @@ from dragodis.interface.function_argument_location import ArgumentLocation -# TODO: Add support for getting return type. class FunctionSignature(metaclass=abc.ABCMeta): """ Interface for a function signature. """ + def __str__(self) -> str: + return self.declaration + + def __repr__(self) -> str: + return f"" + @property @abc.abstractmethod def name(self) -> str: @@ -29,6 +34,35 @@ def declaration(self) -> str: """ @property + @abc.abstractmethod + def calling_convention(self) -> str: + """ + The calling convention used in the function signature. + """ + + @calling_convention.setter + @abc.abstractmethod + def calling_convention(self, name: str): + """ + Sets the calling convention for the function signature. + """ + + @property + @abc.abstractmethod + def return_type(self) -> DataType: + """ + The return type of the function. + """ + + @return_type.setter + @abc.abstractmethod + def return_type(self, data_type: Union[DataType, str]): + """ + Sets the return type of the function with given data type. + """ + + @property + @abc.abstractmethod def parameters(self) -> List[FunctionParameter]: """ List of parameters in the function signature. @@ -90,6 +124,12 @@ class FunctionParameter(metaclass=abc.ABCMeta): def __init__(self, signature: FunctionSignature): self.signature = signature + def __str__(self) -> str: + return f"{self.location}: {self.declaration}" + + def __repr__(self) -> str: + return f"" + # TODO: Provide ability to change name? @property @abc.abstractmethod diff --git a/dragodis/interface/instruction.py b/dragodis/interface/instruction.py index c5b0f14..4072c24 100644 --- a/dragodis/interface/instruction.py +++ b/dragodis/interface/instruction.py @@ -10,7 +10,7 @@ from dragodis.interface.operand import Operand, ARMOperand, x86Operand if TYPE_CHECKING: - from dragodis.interface import Reference, FlatAPI, Line + from dragodis.interface import Reference, FlatAPI, Line, Variable class Instruction(metaclass=abc.ABCMeta): @@ -201,6 +201,15 @@ def stack_delta(self) -> int: The change in stack depth if the instruction was applied. """ + @property + def variables(self) -> Iterable[Variable]: + """ + Iterates the variables in the instruction. + """ + for operand in self.operands: + if variable := operand.variable: + yield variable + class ARMConditionCode(IntEnum): INVALID = -1 @@ -260,6 +269,9 @@ def pre_indexed(self) -> bool: """ Whether the instruction has a pre-indexed writeback. Ie, the register is updated before evaluation: + + .. code:: + [R1, 8]! """ return self.writeback and "!" in self.text @@ -269,6 +281,9 @@ def post_indexed(self) -> bool: """ Whether the instruction has a post-indexed writeback. Ie, the register is updated after evaluation: + + .. code:: + [R1], 8 """ return self.writeback and "!" not in self.text @@ -277,6 +292,17 @@ def post_indexed(self) -> bool: class x86Instruction(Instruction): _Operand = x86Operand + @property + @abc.abstractmethod + def rep(self) -> Optional[str]: + """ + Rep prefix applied to instruction if provided. + + .. code:: + + rep + repne + """ Instruction._ARMInstruction = ARMInstruction diff --git a/dragodis/interface/line.py b/dragodis/interface/line.py index d99764d..b35fd8b 100644 --- a/dragodis/interface/line.py +++ b/dragodis/interface/line.py @@ -23,6 +23,9 @@ def __len__(self): """Here for convenience""" return self.size + def __str__(self): + return f"0x{self.address:08x}: {self.value}" + def __repr__(self): return f"" diff --git a/dragodis/interface/memory.py b/dragodis/interface/memory.py index bc356b0..1e4dc31 100644 --- a/dragodis/interface/memory.py +++ b/dragodis/interface/memory.py @@ -22,6 +22,9 @@ def __init__(self, start: int, end: int): self.end = end self._offset = 0 + def __repr__(self) -> str: + return f"" + def __enter__(self): return self diff --git a/dragodis/interface/operand.py b/dragodis/interface/operand.py index 2f430d9..fb6ffe3 100644 --- a/dragodis/interface/operand.py +++ b/dragodis/interface/operand.py @@ -27,9 +27,12 @@ def __init__(self, instruction: Instruction): """ self.instruction = instruction - def __str__(self): + def __str__(self) -> str: return self.text + def __repr__(self) -> str: + return f"" + @property def _capstone_op(self) -> Union[capstone.arm.ArmOp, capstone.x86.X86Op]: return self.instruction._capstone_insn.operands[self.index] diff --git a/dragodis/interface/operand_value.py b/dragodis/interface/operand_value.py index 74018ab..ea14b54 100644 --- a/dragodis/interface/operand_value.py +++ b/dragodis/interface/operand_value.py @@ -21,6 +21,12 @@ class Immediate(int, OperandValue): Defines an immediate or constant used in an operand. """ + def __str__(self) -> str: + return str(int(self)) + + def __repr__(self) -> str: + return f"" + class MemoryReference(int, OperandValue): """ @@ -28,15 +34,13 @@ class MemoryReference(int, OperandValue): item in the disassembler. """ - @property - @abc.abstractmethod - def name(self) -> str: - """ - The referenced name of the defined memory reference. - """ + def __str__(self) -> str: + return f"0x{self:08x}" + + def __repr__(self) -> str: + return f"" -# TODO: Should Register be of type str? class Register(OperandValue, metaclass=abc.ABCMeta): """ Register objects represent the register components of operands. @@ -46,7 +50,7 @@ def __str__(self): return self.name def __repr__(self): - return f"" + return f"" @abc.abstractmethod def __eq__(self, register: "Register"): @@ -66,27 +70,60 @@ def name(self) -> str: class RegisterList(List[Register], OperandValue): """ Defines a list of registers used as an operand. - e.g. + + .. code:: + {R4-R10,LR} """ + def __str__(self) -> str: + return f"{{{','.join(self)}}}" + + def __repr__(self) -> str: + return f"" + class Phrase(OperandValue): """ Defines an operand phrase of one of the following forms: + + .. code:: + [base + index * scale + offset] [base + offset] """ # TODO: For capstone, x86 had "segment" and ARM had "lshift" + def __str__(self) -> str: + segments = [] + if (base := self.base) is not None: + segments.append(str(base)) + if (index := self.index) is not None: + segments.append(f"{index}*0x{self.scale:x}") + if offset := self.offset: + if isinstance(offset, int): + segments.append(f"0x{offset:x}") + else: + segments.append(str(offset)) + return f"[{' + '.join(segments)}]" + + def __repr__(self) -> str: + offset = self.offset + if isinstance(offset, int): + offset = f"0x{offset:x}" + return f"" + @property @abc.abstractmethod def base(self) -> Optional[Register]: """ The base register if operand is a phrase. May be None if there is no base: - e.g. dword ptr [EAX*0x4 + DAT_0040dc20] + + .. code:: + + dword ptr [EAX*0x4 + DAT_0040dc20] """ @property diff --git a/dragodis/interface/reference.py b/dragodis/interface/reference.py index d02504c..fc87d83 100644 --- a/dragodis/interface/reference.py +++ b/dragodis/interface/reference.py @@ -12,6 +12,12 @@ class Reference(metaclass=abc.ABCMeta): References represent the references to or from any address or function found in a disassembler. """ + def __str__(self) -> str: + return f"{self.type.name}: 0x{self.from_address:08x} --> 0x{self.to_address:08x}" + + def __repr__(self) -> str: + return f" 0x{self.to_address:08x}>" + @property @abc.abstractmethod def from_address(self) -> int: diff --git a/dragodis/interface/segment.py b/dragodis/interface/segment.py index 29b3209..0faa6ed 100644 --- a/dragodis/interface/segment.py +++ b/dragodis/interface/segment.py @@ -15,8 +15,11 @@ class Segment(metaclass=abc.ABCMeta): Interface for accessing segment information. (Sometimes referred to as 'sections') """ + def __str__(self) -> str: + return f"{self.name}: 0x{self.start:08x} --> 0x{self.end:08x}" + def __repr__(self): - return f" 0x{self.end:08x}>" + return f"" def __contains__(self, addr: int) -> bool: """ diff --git a/dragodis/interface/stack.py b/dragodis/interface/stack.py index ec8687c..a22b7c6 100644 --- a/dragodis/interface/stack.py +++ b/dragodis/interface/stack.py @@ -12,6 +12,12 @@ class StackFrame(MutableMapping, metaclass=abc.ABCMeta): """Function Stack Frame""" + def __str__(self) -> str: + return str(dict(self)) + + def __repr__(self): + return f"" + @abc.abstractmethod def __getitem__(self, name_or_offset: Union[str, int]) -> StackVariable: """ diff --git a/dragodis/interface/symbol.py b/dragodis/interface/symbol.py index fd927bb..308e699 100644 --- a/dragodis/interface/symbol.py +++ b/dragodis/interface/symbol.py @@ -16,6 +16,12 @@ class Symbol(metaclass=abc.ABCMeta): Symbols match a specific address to a string name. """ + def __str__(self) -> str: + return f"{self.name}: 0x{self.address:08x}" + + def __repr__(self) -> str: + return f"" + @property @abc.abstractmethod def address(self) -> int: @@ -43,6 +49,12 @@ class Import(Symbol): Imports are a type of Symbol which have an external source or module. """ + def __str__(self) -> str: + return f"{self.namespace}/{self.name}: 0x{self.address:08x}" + + def __repr__(self) -> str: + return f"" + @property @abc.abstractmethod def namespace(self) -> Optional[str]: @@ -57,3 +69,5 @@ class Export(Symbol): Exports are a type of Symbol which are declared entry points to the binary. """ + def __repr__(self) -> str: + return f"" diff --git a/dragodis/interface/variable.py b/dragodis/interface/variable.py index 6bd170f..7abf5c3 100644 --- a/dragodis/interface/variable.py +++ b/dragodis/interface/variable.py @@ -9,6 +9,12 @@ class Variable(metaclass=ABCMeta): """Function Local/Global Variable""" + def __str__(self) -> str: + return f"{self.data_type} {self.name}" + + def __repr__(self) -> str: + return f"" + @property @abstractmethod def name(self) -> str: @@ -38,6 +44,12 @@ def data_type(self) -> DataType: class GlobalVariable(Variable, metaclass=ABCMeta): """Global variable usually defined in the .data section.""" + def __str__(self) -> str: + return f"0x{self.address:08x}: {super().__str__()}" + + def __repr__(self) -> str: + return f"" + @property @abstractmethod def address(self) -> int: @@ -49,6 +61,12 @@ def address(self) -> int: class StackVariable(Variable, metaclass=ABCMeta): """Function Stack/Local Variable""" + def __str__(self) -> str: + return f"stack[0x{self.stack_offset:x}]: {super().__str__()}" + + def __repr__(self) -> str: + return f"" + @property @abstractmethod def stack_offset(self) -> int: diff --git a/noxfile.py b/noxfile.py index f8da7de..f02b9a6 100644 --- a/noxfile.py +++ b/noxfile.py @@ -32,6 +32,7 @@ def doc(session): shutil.rmtree("dist/docs", ignore_errors=True) session.install("sphinx") session.install("sphinx-rtd-theme") + session.install("myst-parser") session.install("-e", ".") # Autodoc diff --git a/setup.cfg b/setup.cfg index 72ea54b..65bbe51 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,8 +1,36 @@ [metadata] +name = dragodis version = attr:dragodis.__version__ +author = DC3 +license = MIT +url = https://github.com/dod-cyber-crime-center/dragodis description = A universal interface for running scripts under multiple disassemblers. long-description-content-type = text/markdown long-description = file:README.md +keywords = malware, ida, idapro, ghidra, disassembler +classifiers = + Development Status :: 4 - Beta + Intended Audience :: Developers + License :: OSI Approved :: MIT License + Programming Language :: Python :: 3 + Programming Language :: Python :: 3.8 + +[options] +include_package_data = True +packages = find: +python_requires = >=3.8 +install_requires = + bytesparse + capstone + rpyc + pyhidra>=0.1.4 + pywin32; platform_system == 'Windows' + pefile + pyelftools + +[options.extras_require] +testing = + pytest>=3.0.0 [tool:pytest] testpaths = diff --git a/setup.py b/setup.py index e912223..d67e563 100644 --- a/setup.py +++ b/setup.py @@ -1,37 +1,6 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -import sys -from setuptools import setup, find_packages +from setuptools import setup -setup( - name="dragodis", - author="DC3", - url="https://github.com/Defense-Cyber-Crime-Center/Dragodis", - keywords=["malware", "ida", "idapro", "ghidra", "disassembler"], - classifiers=[ - "Development Status :: 3 - Alpha", - "Intended Audience :: Developers", - "License :: OSI Approved :: MIT License", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", - ], - packages=find_packages(), - include_package_data=True, - license="MIT", - python_requires=">=3.8", - install_requires=[ - "bytesparse", - "capstone", - "rpyc", - "pyhidra>=0.1.4", - "pywin32; platform_system == 'Windows'", - "pefile", - "pyelftools", - ], - extras_require={ - "testing": [ - "pytest", - ] - } -) + +if __name__ == "__main__": + setup() diff --git a/tests/test_function_signature.py b/tests/test_function_signature.py index 9745197..60e82ad 100644 --- a/tests/test_function_signature.py +++ b/tests/test_function_signature.py @@ -43,6 +43,42 @@ def test_signature_declaration_ghidra(disassembler, address, declaration): assert signature.declaration == declaration +@pytest.mark.parametrize("address,calling_convention", [ + (0x401000, "__cdecl"), + (0x40a0c4, "__stdcall"), +]) +def test_calling_convention(disassembler, address, calling_convention): + signature = disassembler.get_function_signature(address) + # test getting calling convention + assert signature.calling_convention == calling_convention + # Ghidra doesn't include the "__" in declaration + assert calling_convention.lstrip("_") in signature.declaration + # test changing calling convention + signature.calling_convention = "fastcall" + assert signature.calling_convention == "__fastcall" + assert "fastcall" in signature.declaration + # reset + signature.calling_convention = calling_convention + assert signature.calling_convention == calling_convention + + +@pytest.mark.parametrize("address,return_types", [ + (0x401150, ("int", "undefined4")), + (0x40a0c4, ("lpvoid", "LPVOID")), +]) +def test_return_type(disassembler, address, return_types): + signature = disassembler.get_function_signature(address) + # test getting the return type + orig_type = signature.return_type + assert str(orig_type) in return_types + # test setting the return type + signature.return_type = "char *" + assert str(signature.return_type) == "char *" + # reset and test setting with DataType object. + signature.return_type = orig_type + assert signature.return_type == orig_type + + def test_parameters(disassembler): signature = disassembler.get_function_signature(0x401000) assert len(signature.parameters) == 2 diff --git a/tests/test_instruction.py b/tests/test_instruction.py index 417ae5c..33c2e53 100644 --- a/tests/test_instruction.py +++ b/tests/test_instruction.py @@ -62,10 +62,16 @@ def test_flow_type(disassembler, address, flow_type): @pytest.mark.parametrize("address,root_mnem", [ (0x40100D, "movsx"), + # original has "rep movsd" or "MOVSD.REP" + # NOTE: not requiring the "d" to be removed since that is not possible for Ghidra to detect. + (0x405c5a, ("movsd", "movs")), ]) def test_root_mnemonic_x86(disassembler, address, root_mnem): instruction = disassembler.get_instruction(address) - assert instruction.root_mnemonic == root_mnem + if isinstance(root_mnem, tuple): + assert instruction.root_mnemonic in root_mnem + else: + assert instruction.root_mnemonic == root_mnem @pytest.mark.parametrize("address,root_mnem", [ @@ -77,6 +83,16 @@ def test_root_mnemonic_arm(disassembler, address, root_mnem): assert instruction.root_mnemonic == root_mnem +@pytest.mark.parametrize("address,rep", [ + (0x4047aa, "rep"), # rep movsd + (0x408590, "rep"), # rep stosd + (0x40858a, None), # mov +]) +def test_rep_x86(disassembler, address, rep): + instruction = disassembler.get_instruction(address) + assert instruction.rep == rep + + @pytest.mark.parametrize("address,condition_code", [ (0x106cc, ARMConditionCode.NE), # bne (0x103e0, ARMConditionCode.NE), # ldmiane (popne for IDA)