Skip to content

Commit

Permalink
Made the project installable with pip
Browse files Browse the repository at this point in the history
  • Loading branch information
bontchev committed Feb 9, 2018
1 parent b9821b0 commit ce62bb5
Show file tree
Hide file tree
Showing 6 changed files with 227 additions and 13 deletions.
91 changes: 91 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# Created by .ignore support plugin (hsz.mobi)
### Python template
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# IPython Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# dotenv
.env

# virtualenv
venv/
ENV/

# Spyder project settings
.spyderproject

# Rope project settings
.ropeproject
9 changes: 8 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#pcodedmp.py - A VBA p-code disassembler
# pcodedmp.py - A VBA p-code disassembler

## Introduction

Expand Down Expand Up @@ -79,10 +79,14 @@ For reference, it is the result of compiling the following VBA code:

- Office 2016 64-bit only: The `Declare` part of external function declarations (e.g., `Private Declare PtrSafe Function SomeFunc Lib "SomeLib" Alias "SomeName" () As Long`) is not disassembled.

- Office 2000 and higher: The type of a subroutine or function argument of type `ParamArray` is not disassembled correctly. For instance, `Sub Foo (ParamArrag arg())` will be disassembled as `Sub Foo (arg)`.

- All versions of Office: The `Alias "SomeName"` part of external function declarations (e.g., `Private Declare PtrSafe Function SomeFunc Lib "SomeLib" Alias "SomeName" () As Long`) is not disassembled.

- All versions of Office: The `Public` property of custom type definitions (e.g., `Public Type SomeType`) is not disassembled.

- All versions of Office: The custom type of a subroutine or function argument is not disassembled correctly and `CustomType` is used instead. For instance, `Sub Foo (arg As Bar)` will be disassembled as `Sub Foo (arg As CustomType)`.

I do not have access to 64-bit Office 2016 and the few samples of documents, generated by this version of Office, that I have, have been insufficient for me to figure out where the corresponding information resides. I know where it resides in the other versions of Office, but it has been moved elsewhere in 64-bit Office 2016 and the old algorithms no longer work.

## To do
Expand All @@ -104,3 +108,6 @@ Version 1.2.0: Disassembling the various declarations (`New`, `Type`, `Dim`, `Re
Version 1.2.1: Now runs under Python 3.x too. Improved support of 64-bit Office documents. Implemented support of some VBA7-specific features (`Friend`, `PtrSafe`, `LongPtr`). Improved the disassembling of `Dim` declarations.

Version 1.2.2: Implemented handling of documents saved in Open XML format (which is the default format of Office 2007 and higher) - `.docm`, `.xlsm`, `.pptm`.

Version 1.2.3: Fixed a few crashes and documented better some disassembly failures. Converted the script into a package that can be installed with ``pip``. Use the command ``pip install pcodedmp``.

Empty file added pcodedmp/__init__.py
Empty file.
33 changes: 21 additions & 12 deletions pcodedmp.py → pcodedmp/pcodedmp.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,12 @@ def decode(x):
def decode(x):
return x

__author__ = 'Vesselin Bontchev <[email protected]>'
__description__ = 'A VBA p-code disassembler'
__license__ = 'GPL'
__VERSION__ = '1.2.2'
__uri__ = 'https://github.com/bontchev/pcodedmp'
__VERSION__ = '1.2.3'
__author__ = 'Vesselin Bontchev'
__email__ = '[email protected]'

def hexdump(buffer, length=16):
theHex = lambda data: ' '.join('{:02X}'.format(ord(i)) for i in data)
Expand Down Expand Up @@ -850,16 +853,18 @@ def disasmArg(indirectTable, identifiers, argOffset, endian, vbaVer, is64bit):
argName = 'ByRef ' + argName
if (argOpts & 0x0200):
argName = 'Optional ' + argName
if ((flags & 0x0040) == 0):
argName = 'ParamArray ' + argName + '()'
# TODO - ParamArray arguments aren't disassebled properly
#if ((flags & 0x0040) == 0):
# argName = 'ParamArray ' + argName + '()'
if (flags & 0x0020):
argName += ' As '
argTypeName = ''
if ((argType & 0xFFFF0000) == 0xFFFF0000):
if (argType & 0xFFFF0000):
argTypeID = argType & 0x000000FF
argTypeName = getTypeName(argTypeID)
else:
argTypeName = getName(indirectTable, identifiers, argType + 6, endian, vbaVer, is64bit)
# TODO - Custom type arguments aren't disassembled properly
#else:
# argTypeName = getName(indirectTable, identifiers, argType + 6, endian, vbaVer, is64bit)
argName += argTypeName
return argName

Expand Down Expand Up @@ -917,7 +922,7 @@ def disasmFunc(indirectTable, declarationTable, identifiers, dword, opType, endi
funcDecl += ' Lib "' + libName + '" '
argList = []
while ((argOffset != 0xFFFFFFFF) and (argOffset != 0) and (argOffset + 26 < len(indirectTable))):
argName = disasmArg(indirectTable,identifiers, argOffset, endian, vbaVer, is64bit)
argName = disasmArg(indirectTable, identifiers, argOffset, endian, vbaVer, is64bit)
argList.append(argName)
argOffset = getDWord(indirectTable, argOffset + 20, endian)
funcDecl += '(' + ', '.join(argList) + ')'
Expand Down Expand Up @@ -1070,7 +1075,7 @@ def pcodeDump(moduleData, vbaProjectData, dirData, identifiers, is64bit, verbose
if (verbose):
print('Internal Office version: 0x{:04X}.'.format(version))
# Office 2010 is 0x0097; Office 2013 is 0x00A3;
# Office 2016 PC 32-bt is 0x00B2, 64-bit is 0x00D7, Mac is 0x00D9
# Office 2016 PC 32-bit is 0x00B2, 64-bit is 0x00D7, Mac is 0x00D9
if (version >= 0x6B):
if (version >= 0x97):
vbaVer = 7
Expand Down Expand Up @@ -1170,7 +1175,6 @@ def processProject(vbaParser, verbose, disasmOnly):
i += 1
print('')
print('_VBA_PROJECT parsing done.')
if (not disasmOnly):
print('-' * 79)
print('Module streams:')
for module in codeModules:
Expand All @@ -1185,6 +1189,7 @@ def processFile(fileName, verbose, disasmOnly):
# TODO:
# - Handle VBA3 documents
print('Processing file: {}'.format(fileName))
vbaParser = None
try:
vbaParser = VBA_Parser(fileName)
if (vbaParser.ole_file is None):
Expand All @@ -1194,9 +1199,10 @@ def processFile(fileName, verbose, disasmOnly):
processProject(vbaParser, verbose, disasmOnly)
except Exception as e:
print('Error: {}.'.format(e), file=sys.stderr)
vbaParser.close()
if (vbaParser):
vbaParser.close()

if __name__ == '__main__':
def main():
parser = argparse.ArgumentParser(description='Dumps the p-code of VBA-containing documents.')
parser.add_argument('-v', '--version', action='version',
version='%(prog)s version {}'.format(__VERSION__))
Expand Down Expand Up @@ -1226,3 +1232,6 @@ def processFile(fileName, verbose, disasmOnly):
print('Error: {}.'.format(e), file=sys.stderr)
sys.exit(-1)
sys.exit(0)

if __name__ == '__main__':
main()
5 changes: 5 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[bdist_wheel]
universal = 1

[metadata]
license_file = LICENSE
102 changes: 102 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#!/usr/bin/env python

import codecs
import os
import re

try:
from setuptools import setup
except ImportError:
from distutils.core import setup

try:
from pypandoc import convert

def read_md(f): return convert(f, 'rst', format='md')

except ImportError:
print("warning: pypandoc module not found, "
"could not convert Markdown to RST")

def read_md(f): return open(f, 'r').read()

###################################################################

NAME = 'pcodedmp'
PACKAGES = [NAME]
META_PATH = os.path.join(NAME, NAME + '.py')
KEYWORDS = ['vba', 'p-code', 'disassembler']
CLASSIFIERS = [
'Development Status :: 5 - Production/Stable',
'Environment :: Console',
'Intended Audience :: Developers',
'License :: OSI Approved :: GNU General Public License v3 (GPLv3)',
'Natural Language :: English',
'Operating System :: OS Independent',
'Programming Language :: Python',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Topic :: Security',
'Topic :: Software Development :: Disassemblers',
'Topic :: Utilities',
]
INSTALL_REQUIRES = ['oletools>=0.50']

###################################################################

HERE = os.path.abspath(os.path.dirname(__file__))


def read(*parts):
"""
Build an absolute path from *parts* and and return the contents of the
resulting file. Assume UTF-8 encoding.
"""
with codecs.open(os.path.join(HERE, *parts), 'r', 'utf-8') as f:
return f.read()


META_FILE = read(META_PATH)


def find_meta(meta):
"""
Extract __*meta*__ from META_FILE.
"""
meta_match = re.search(
r"^__{meta}__ = ['\"]([^'\"]*)['\"]".format(meta=meta),
META_FILE, re.M
)
if meta_match:
return meta_match.group(1)
raise RuntimeError('Unable to find __{meta}__ string.'.format(meta=meta))

entry_points = {
'console_scripts': [
NAME + '=' + NAME + '.' + NAME + ':main',
],
}

if __name__ == '__main__':
setup(
name=NAME,
description=find_meta('description'),
license=find_meta('license'),
url=find_meta('uri'),
version=find_meta('VERSION'),
author=find_meta('author'),
author_email=find_meta('email'),
maintainer=find_meta('author'),
maintainer_email=find_meta('email'),
keywords=KEYWORDS,
long_description=read_md('README.md'),
packages=PACKAGES,
classifiers=CLASSIFIERS,
install_requires=INSTALL_REQUIRES,
entry_points=entry_points,
)

0 comments on commit ce62bb5

Please sign in to comment.