From 834c8d7144f84fe110c163c2522fa5f80264ec15 Mon Sep 17 00:00:00 2001 From: Vesselin Bontchev Date: Fri, 23 Sep 2016 14:31:39 +0300 Subject: [PATCH 01/11] Implemented VBA7 support --- pcodedmp.py | 669 +++++++++++++++++----------------------------------- 1 file changed, 216 insertions(+), 453 deletions(-) diff --git a/pcodedmp.py b/pcodedmp.py index a2570d7..fa0aa96 100644 --- a/pcodedmp.py +++ b/pcodedmp.py @@ -10,7 +10,7 @@ __author__ = 'Vesselin Bontchev ' __license__ = 'GPL' -__VERSION__ = '1.00' +__VERSION__ = '1.01' def getWord(buffer, offset, endian): return unpack_from(endian + 'H', buffer, offset)[0] @@ -120,12 +120,12 @@ def processDir(vbaParser, dirPath, verbose, disasmonly): dirDataCompressed = vbaParser.ole_file.openstream(dirPath).read() dirData = decompress_stream(dirDataCompressed) streamSize = len(dirData) - if (disasmonly): - return dirData - print('%d bytes' % streamSize) - if (verbose): - print(hexdump3(dirData, length=16)) - print('dir stream parsed:') + codeModules = [] + if (not disasmonly): + print('%d bytes' % streamSize) + if (verbose): + print(hexdump3(dirData, length=16)) + print('dir stream parsed:') offset = 0 # The "dir" stream is ALWAYS in little-endian format, even on a Mac while offset < streamSize: @@ -143,17 +143,21 @@ def processDir(vbaParser, dirPath, verbose, disasmonly): tagName = 'UNKNOWN' else: tagName = tags[tag] - print('%08X: %s' % (offset, tagName), end='') + if (not disasmonly): + print('%08X: %s' % (offset, tagName), end='') offset += 6 if (wLength): - print(':') - print(hexdump3(dirData[offset:offset + wLength], length=16)) + if (not disasmonly): + print(':') + print(hexdump3(dirData[offset:offset + wLength], length=16)) + if (tagName == 'MOD_STREAM'): + codeModules.append(dirData[offset:offset + wLength]) offset += wLength - else: + elif (not disasmonly): print('') except: break - return dirData + return dirData, codeModules def process_VBA_PROJECT(vbaParser, vbaProjectPath, verbose, disasmonly): vbaProjectData = vbaParser.ole_file.openstream(vbaProjectPath).read() @@ -186,7 +190,9 @@ def getTheIdentifiers(vbaProjectData): offset += 2 for ref in range(numRefs): offset, refLength = getVar(vbaProjectData, offset, endian, False) - if (refLength): + if (refLength == 0): + offset += 6 + else: if ((unicodeRef and (refLength < 5)) or ((not unicodeRef) and (refLength < 3))): offset += refLength else: @@ -197,9 +203,7 @@ def getTheIdentifiers(vbaProjectData): offset += refLength if (c in ['C', 'D']): offset = skipStructure(vbaProjectData, offset, endian, False, 1, False) - offset += 10 - else: - offset += 16 + offset += 10 offset, word = getVar(vbaProjectData, offset, endian, False) if (word): offset = skipStructure(vbaProjectData, offset, endian, False, 1, False) @@ -280,294 +284,11 @@ def getTheIdentifiers(vbaProjectData): print('Error: %s.' % e, file=sys.stderr) return identifiers -def getTheCodeModuleNames(projectData): - # AFAIK, the contents of this stream is ignored. - # The names of the code modules should be obtained from the "dir" stream; - # from the contents of the records of type MOD_STREAM - codeModules = [] - for line in projectData.splitlines(): - line = line.strip() - module = None - if '=' in line: - # split line at the 1st equal sign: - name, value = line.split('=', 1) - # looking for code modules - if name == 'Document': - # split value at the 1st slash, keep 1st part: - value = value.split('/', 1)[0] - module = value - elif name in ('Module', 'Class', 'BaseClass'): - module = value - if module: - codeModules.append(module) - return codeModules - -# TODO: -# - Populate the VBA3 table - -opcodes3 = { -} - #'name', '0x', 'imp_', 'func_', 'var_', 'rec_', 'type_', 'context_' # 2, 2, 2, 4, 4, 4, 4, 4 -opcodes5 = { - 0 : { 'mnem' : 'Imp', 'args' : [], 'varg' : False }, - 1 : { 'mnem' : 'Eqv', 'args' : [], 'varg' : False }, - 2 : { 'mnem' : 'Xor', 'args' : [], 'varg' : False }, - 3 : { 'mnem' : 'Or', 'args' : [], 'varg' : False }, - 4 : { 'mnem' : 'And', 'args' : [], 'varg' : False }, - 5 : { 'mnem' : 'Eq', 'args' : [], 'varg' : False }, - 6 : { 'mnem' : 'Ne', 'args' : [], 'varg' : False }, - 7 : { 'mnem' : 'Le', 'args' : [], 'varg' : False }, - 8 : { 'mnem' : 'Ge', 'args' : [], 'varg' : False }, - 9 : { 'mnem' : 'Lt', 'args' : [], 'varg' : False }, - 10 : { 'mnem' : 'Gt', 'args' : [], 'varg' : False }, - 11 : { 'mnem' : 'Add', 'args' : [], 'varg' : False }, - 12 : { 'mnem' : 'Sub', 'args' : [], 'varg' : False }, - 13 : { 'mnem' : 'Mod', 'args' : [], 'varg' : False }, - 14 : { 'mnem' : 'IDiv', 'args' : [], 'varg' : False }, - 15 : { 'mnem' : 'Mul', 'args' : [], 'varg' : False }, - 16 : { 'mnem' : 'Div', 'args' : [], 'varg' : False }, - 17 : { 'mnem' : 'Concat', 'args' : [], 'varg' : False }, - 18 : { 'mnem' : 'Like', 'args' : [], 'varg' : False }, - 19 : { 'mnem' : 'Pwr', 'args' : [], 'varg' : False }, - 20 : { 'mnem' : 'Is', 'args' : [], 'varg' : False }, - 21 : { 'mnem' : 'Not', 'args' : [], 'varg' : False }, - 22 : { 'mnem' : 'UMi', 'args' : [], 'varg' : False }, - 23 : { 'mnem' : 'FnAbs', 'args' : [], 'varg' : False }, - 24 : { 'mnem' : 'FnFix', 'args' : [], 'varg' : False }, - 25 : { 'mnem' : 'FnInt', 'args' : [], 'varg' : False }, - 26 : { 'mnem' : 'FnSgn', 'args' : [], 'varg' : False }, - 27 : { 'mnem' : 'FnLen', 'args' : [], 'varg' : False }, - 28 : { 'mnem' : 'FnLenB', 'args' : [], 'varg' : False }, - 29 : { 'mnem' : 'Paren', 'args' : [], 'varg' : False }, - 30 : { 'mnem' : 'Sharp', 'args' : [], 'varg' : False }, - 31 : { 'mnem' : 'LdLHS', 'args' : [], 'varg' : False }, - 32 : { 'mnem' : 'Ld', 'args' : ['name'], 'varg' : False }, - 33 : { 'mnem' : 'MemLd', 'args' : ['name'], 'varg' : False }, - 34 : { 'mnem' : 'DictLd', 'args' : ['name'], 'varg' : False }, - 35 : { 'mnem' : 'IndexLd', 'args' : ['0x'], 'varg' : False }, - 36 : { 'mnem' : 'ArgsLd', 'args' : ['name', '0x'], 'varg' : False }, - 37 : { 'mnem' : 'ArgsMemLd', 'args' : ['name', '0x'], 'varg' : False }, - 38 : { 'mnem' : 'ArgsDictLd', 'args' : ['name', '0x'], 'varg' : False }, - 39 : { 'mnem' : 'St', 'args' : ['name'], 'varg' : False }, - 40 : { 'mnem' : 'MemSt', 'args' : ['name'], 'varg' : False }, - 41 : { 'mnem' : 'DictSt', 'args' : ['name'], 'varg' : False }, - 42 : { 'mnem' : 'IndexSt', 'args' : ['name'], 'varg' : False }, - 43 : { 'mnem' : 'ArgsSt', 'args' : ['name', '0x'], 'varg' : False }, - 44 : { 'mnem' : 'ArgsMemSt', 'args' : ['name', '0x'], 'varg' : False }, - 45 : { 'mnem' : 'ArgsDictSt', 'args' : ['name', '0x'], 'varg' : False }, - 46 : { 'mnem' : 'set', 'args' : ['name'], 'varg' : False }, - 47 : { 'mnem' : 'Memset', 'args' : ['name'], 'varg' : False }, - 48 : { 'mnem' : 'Dictset', 'args' : ['name'], 'varg' : False }, - 49 : { 'mnem' : 'Indexset', 'args' : ['name'], 'varg' : False }, - 50 : { 'mnem' : 'ArgsSet', 'args' : ['name', '0x'], 'varg' : False }, - 51 : { 'mnem' : 'ArgsMemSet', 'args' : ['name', '0x'], 'varg' : False }, - 52 : { 'mnem' : 'ArgsDictSet', 'args' : ['name', '0x'], 'varg' : False }, - 53 : { 'mnem' : 'MemLdWith', 'args' : ['name'], 'varg' : False }, - 54 : { 'mnem' : 'DictLdWith', 'args' : ['name'], 'varg' : False }, - 55 : { 'mnem' : 'ArgsMemLdWith', 'args' : ['name', '0x'], 'varg' : False }, - 56 : { 'mnem' : 'ArgsDictLdWith', 'args' : ['name', '0x'], 'varg' : False }, - 57 : { 'mnem' : 'MemStWith', 'args' : ['name'], 'varg' : False }, - 58 : { 'mnem' : 'DictStWith', 'args' : ['name'], 'varg' : False }, - 59 : { 'mnem' : 'ArgsMemStWith', 'args' : ['name', '0x'], 'varg' : False }, - 60 : { 'mnem' : 'ArgsDictStWith', 'args' : ['name', '0x'], 'varg' : False }, - 61 : { 'mnem' : 'MemSetWith', 'args' : ['name'], 'varg' : False }, - 62 : { 'mnem' : 'DictSetWith', 'args' : ['name'], 'varg' : False }, - 63 : { 'mnem' : 'ArgsMemSetWith', 'args' : ['name', '0x'], 'varg' : False }, - 64 : { 'mnem' : 'ArgsDictSetWith', 'args' : ['name', '0x'], 'varg' : False }, - 65 : { 'mnem' : 'ArgsCall', 'args' : ['name', '0x'], 'varg' : False }, - 66 : { 'mnem' : 'ArgsMemCall', 'args' : ['name', '0x'], 'varg' : False }, - 67 : { 'mnem' : 'ArgsMemCallWith', 'args' : ['name', '0x'], 'varg' : False }, - 68 : { 'mnem' : 'ArgsArray', 'args' : ['name', '0x'], 'varg' : False }, - 69 : { 'mnem' : 'Bos', 'args' : ['0x'], 'varg' : False }, - 70 : { 'mnem' : 'BosImplicit', 'args' : [], 'varg' : False }, - 71 : { 'mnem' : 'Bol', 'args' : [], 'varg' : False }, - 72 : { 'mnem' : 'Case', 'args' : [], 'varg' : False }, - 73 : { 'mnem' : 'CaseTo', 'args' : [], 'varg' : False }, - 74 : { 'mnem' : 'CaseGt', 'args' : [], 'varg' : False }, - 75 : { 'mnem' : 'CaseLt', 'args' : [], 'varg' : False }, - 76 : { 'mnem' : 'CaseGe', 'args' : [], 'varg' : False }, - 77 : { 'mnem' : 'CaseLe', 'args' : [], 'varg' : False }, - 78 : { 'mnem' : 'CaseNe', 'args' : [], 'varg' : False }, - 79 : { 'mnem' : 'CaseEq', 'args' : [], 'varg' : False }, - 80 : { 'mnem' : 'CaseElse', 'args' : [], 'varg' : False }, - 81 : { 'mnem' : 'CaseDone', 'args' : [], 'varg' : False }, - 82 : { 'mnem' : 'Circle', 'args' : ['0x'], 'varg' : False }, - 83 : { 'mnem' : 'Close', 'args' : ['0x'], 'varg' : False }, - 84 : { 'mnem' : 'CloseAll', 'args' : [], 'varg' : False }, - 85 : { 'mnem' : 'Coerce', 'args' : [], 'varg' : False }, - 86 : { 'mnem' : 'CoerceVar', 'args' : [], 'varg' : False }, - 87 : { 'mnem' : 'Context', 'args' : ['context_'], 'varg' : False }, - 88 : { 'mnem' : 'Debug', 'args' : [], 'varg' : False }, - 89 : { 'mnem' : 'DefType', 'args' : ['0x', '0x'], 'varg' : False }, - 90 : { 'mnem' : 'Dim', 'args' : [], 'varg' : False }, - 91 : { 'mnem' : 'DimImplicit', 'args' : [], 'varg' : False }, - 92 : { 'mnem' : 'Do', 'args' : [], 'varg' : False }, - 93 : { 'mnem' : 'DoEvents', 'args' : [], 'varg' : False }, - 94 : { 'mnem' : 'DoUnitil', 'args' : [], 'varg' : False }, - 95 : { 'mnem' : 'DoWhile', 'args' : [], 'varg' : False }, - 96 : { 'mnem' : 'Else', 'args' : [], 'varg' : False }, - 97 : { 'mnem' : 'ElseBlock', 'args' : [], 'varg' : False }, - 98 : { 'mnem' : 'ElseIfBlock', 'args' : [], 'varg' : False }, - 99 : { 'mnem' : 'ElseIfTypeBlock', 'args' : [], 'varg' : False }, -100 : { 'mnem' : 'End', 'args' : [], 'varg' : False }, -101 : { 'mnem' : 'EndContext', 'args' : [], 'varg' : False }, -102 : { 'mnem' : 'EndFunc', 'args' : [], 'varg' : False }, -103 : { 'mnem' : 'EndIf', 'args' : [], 'varg' : False }, -104 : { 'mnem' : 'EndIfBlock', 'args' : [], 'varg' : False }, -105 : { 'mnem' : 'EndImmediate', 'args' : [], 'varg' : False }, -106 : { 'mnem' : 'EndProp', 'args' : [], 'varg' : False }, -107 : { 'mnem' : 'EndSelect', 'args' : [], 'varg' : False }, -108 : { 'mnem' : 'EndSub', 'args' : [], 'varg' : False }, -109 : { 'mnem' : 'EndType', 'args' : [], 'varg' : False }, -110 : { 'mnem' : 'EndWith', 'args' : [], 'varg' : False }, -111 : { 'mnem' : 'Erase', 'args' : ['0x'], 'varg' : False }, -112 : { 'mnem' : 'Error', 'args' : [], 'varg' : False }, -113 : { 'mnem' : 'ExitDo', 'args' : [], 'varg' : False }, -114 : { 'mnem' : 'ExitFor', 'args' : [], 'varg' : False }, -115 : { 'mnem' : 'ExitFunc', 'args' : [], 'varg' : False }, -116 : { 'mnem' : 'ExitProp', 'args' : [], 'varg' : False }, -117 : { 'mnem' : 'ExitSub', 'args' : [], 'varg' : False }, -118 : { 'mnem' : 'FnCurDir', 'args' : [], 'varg' : False }, -119 : { 'mnem' : 'FnDir', 'args' : [], 'varg' : False }, -120 : { 'mnem' : 'Empty0', 'args' : [], 'varg' : False }, -121 : { 'mnem' : 'Empty1', 'args' : [], 'varg' : False }, -122 : { 'mnem' : 'FnError', 'args' : [], 'varg' : False }, -123 : { 'mnem' : 'FnFormat', 'args' : [], 'varg' : False }, -124 : { 'mnem' : 'FnFreeFile', 'args' : [], 'varg' : False }, -125 : { 'mnem' : 'FnInStr', 'args' : [], 'varg' : False }, -126 : { 'mnem' : 'FnInStr3', 'args' : [], 'varg' : False }, -127 : { 'mnem' : 'FnInStr4', 'args' : [], 'varg' : False }, -128 : { 'mnem' : 'FnInStrB', 'args' : [], 'varg' : False }, -129 : { 'mnem' : 'FnInStrB3', 'args' : [], 'varg' : False }, -130 : { 'mnem' : 'FnInStrB4', 'args' : [], 'varg' : False }, -131 : { 'mnem' : 'FnLBound', 'args' : ['0x'], 'varg' : False }, -132 : { 'mnem' : 'FnMid', 'args' : [], 'varg' : False }, -133 : { 'mnem' : 'FnMidB', 'args' : [], 'varg' : False }, -134 : { 'mnem' : 'FnStrComp', 'args' : [], 'varg' : False }, -135 : { 'mnem' : 'FnStrComp3', 'args' : [], 'varg' : False }, -136 : { 'mnem' : 'FnStringVar', 'args' : [], 'varg' : False }, -137 : { 'mnem' : 'FnStringStr', 'args' : [], 'varg' : False }, -138 : { 'mnem' : 'FnUBound', 'args' : ['0x'], 'varg' : False }, -139 : { 'mnem' : 'For', 'args' : [], 'varg' : False }, -140 : { 'mnem' : 'ForEach', 'args' : [], 'varg' : False }, -141 : { 'mnem' : 'ForEachAs', 'args' : [], 'varg' : False }, -142 : { 'mnem' : 'ForStep', 'args' : [], 'varg' : False }, -143 : { 'mnem' : 'FuncDefn', 'args' : ['func_'], 'varg' : False }, -144 : { 'mnem' : 'FuncDefnSave', 'args' : ['func_'], 'varg' : False }, -145 : { 'mnem' : 'GetRec', 'args' : [], 'varg' : False }, -146 : { 'mnem' : 'GoSub', 'args' : ['name'], 'varg' : False }, -147 : { 'mnem' : 'GoTo', 'args' : ['name'], 'varg' : False }, -148 : { 'mnem' : 'If', 'args' : [], 'varg' : False }, -149 : { 'mnem' : 'IfBlock', 'args' : [], 'varg' : False }, -150 : { 'mnem' : 'TypeOf', 'args' : ['imp_'], 'varg' : False }, -151 : { 'mnem' : 'IfTypeBlock', 'args' : [], 'varg' : False }, -152 : { 'mnem' : 'Input', 'args' : [], 'varg' : False }, -153 : { 'mnem' : 'InputDone', 'args' : [], 'varg' : False }, -154 : { 'mnem' : 'InputItem', 'args' : [], 'varg' : False }, -155 : { 'mnem' : 'Label', 'args' : ['name'], 'varg' : False }, -156 : { 'mnem' : 'Let', 'args' : [], 'varg' : False }, -157 : { 'mnem' : 'Line', 'args' : ['0x'], 'varg' : False }, -158 : { 'mnem' : 'LineCont', 'args' : [], 'varg' : True }, -159 : { 'mnem' : 'LineInput', 'args' : [], 'varg' : False }, -160 : { 'mnem' : 'LineNum', 'args' : ['name'], 'varg' : False }, -161 : { 'mnem' : 'LitCy', 'args' : ['0x', '0x', '0x', '0x'], 'varg' : False }, -162 : { 'mnem' : 'LitDate', 'args' : ['0x', '0x', '0x', '0x'], 'varg' : False }, -163 : { 'mnem' : 'LitDefault', 'args' : [], 'varg' : False }, -164 : { 'mnem' : 'LitDI2', 'args' : ['0x'], 'varg' : False }, -165 : { 'mnem' : 'LitDI4', 'args' : ['0x', '0x'], 'varg' : False }, -166 : { 'mnem' : 'LitHI2', 'args' : ['0x'], 'varg' : False }, -167 : { 'mnem' : 'LitHI4', 'args' : ['0x', '0x'], 'varg' : False }, -168 : { 'mnem' : 'LitNothing', 'args' : [], 'varg' : False }, -169 : { 'mnem' : 'LitOI2', 'args' : ['0x'], 'varg' : False }, -170 : { 'mnem' : 'LitOI4', 'args' : ['0x', '0x'], 'varg' : False }, -171 : { 'mnem' : 'LitR4', 'args' : ['0x', '0x'], 'varg' : False }, -172 : { 'mnem' : 'LitR8', 'args' : ['0x', '0x', '0x', '0x'], 'varg' : False }, -173 : { 'mnem' : 'LitSmallI2', 'args' : [], 'varg' : False }, -174 : { 'mnem' : 'LitStr', 'args' : [], 'varg' : True }, -175 : { 'mnem' : 'LitVarSpecial', 'args' : [], 'varg' : False }, -176 : { 'mnem' : 'Lock', 'args' : [], 'varg' : False }, -177 : { 'mnem' : 'Loop', 'args' : [], 'varg' : False }, -178 : { 'mnem' : 'LoopUntil', 'args' : [], 'varg' : False }, -179 : { 'mnem' : 'LoopWhile', 'args' : [], 'varg' : False }, -180 : { 'mnem' : 'LSet', 'args' : [], 'varg' : False }, -181 : { 'mnem' : 'Me', 'args' : [], 'varg' : False }, -182 : { 'mnem' : 'MeImplicit', 'args' : [], 'varg' : False }, -183 : { 'mnem' : 'MemRedim', 'args' : ['name', '0x', 'type_'], 'varg' : False }, -184 : { 'mnem' : 'MemRedimWith', 'args' : ['name', '0x', 'type_'], 'varg' : False }, -185 : { 'mnem' : 'MemRedimAs', 'args' : ['name', '0x', 'type_'], 'varg' : False }, -186 : { 'mnem' : 'MemRedimAsWith', 'args' : ['name', '0x', 'type_'], 'varg' : False }, -187 : { 'mnem' : 'Mid', 'args' : [], 'varg' : False }, -188 : { 'mnem' : 'MidB', 'args' : [], 'varg' : False }, -189 : { 'mnem' : 'Name', 'args' : [], 'varg' : False }, -190 : { 'mnem' : 'New', 'args' : ['imp_'], 'varg' : False }, -191 : { 'mnem' : 'Next', 'args' : [], 'varg' : False }, -192 : { 'mnem' : 'NextVar', 'args' : [], 'varg' : False }, -193 : { 'mnem' : 'OnError', 'args' : ['name'], 'varg' : False }, -194 : { 'mnem' : 'OnGosub', 'args' : [], 'varg' : True }, -195 : { 'mnem' : 'OnGoto', 'args' : [], 'varg' : True }, -196 : { 'mnem' : 'Open', 'args' : ['0x'], 'varg' : False }, -197 : { 'mnem' : 'Option', 'args' : [], 'varg' : False }, -198 : { 'mnem' : 'OptionBase', 'args' : [], 'varg' : False }, -199 : { 'mnem' : 'ParamByVal', 'args' : [], 'varg' : False }, -200 : { 'mnem' : 'ParamOmitted', 'args' : [], 'varg' : False }, -201 : { 'mnem' : 'ParamNamed', 'args' : ['name'], 'varg' : False }, -202 : { 'mnem' : 'PrintChan', 'args' : [], 'varg' : False }, -203 : { 'mnem' : 'PrintComma', 'args' : [], 'varg' : False }, -204 : { 'mnem' : 'PrintEos', 'args' : [], 'varg' : False }, -205 : { 'mnem' : 'PrintItemComma', 'args' : [], 'varg' : False }, -206 : { 'mnem' : 'PrintItemNL', 'args' : [], 'varg' : False }, -207 : { 'mnem' : 'PrintItemSemi', 'args' : [], 'varg' : False }, -208 : { 'mnem' : 'PrintNL', 'args' : [], 'varg' : False }, -209 : { 'mnem' : 'PrintObj', 'args' : [], 'varg' : False }, -210 : { 'mnem' : 'PrintSemi', 'args' : [], 'varg' : False }, -211 : { 'mnem' : 'PrintSpc', 'args' : [], 'varg' : False }, -212 : { 'mnem' : 'PrintTab', 'args' : [], 'varg' : False }, -213 : { 'mnem' : 'PrintTabComma', 'args' : [], 'varg' : False }, -214 : { 'mnem' : 'PSet', 'args' : ['0x'], 'varg' : False }, -215 : { 'mnem' : 'PutRec', 'args' : [], 'varg' : False }, -216 : { 'mnem' : 'QuoteRem', 'args' : ['0x'], 'varg' : True }, -217 : { 'mnem' : 'Redim', 'args' : ['name', '0x', 'type_'], 'varg' : False }, -218 : { 'mnem' : 'RedimAs', 'args' : ['name', '0x', 'type_'], 'varg' : False },\ -219 : { 'mnem' : 'Reparse', 'args' : [], 'varg' : True }, -220 : { 'mnem' : 'Rem', 'args' : [], 'varg' : True }, -221 : { 'mnem' : 'Resume', 'args' : ['name'], 'varg' : False }, -222 : { 'mnem' : 'Return', 'args' : [], 'varg' : False }, -223 : { 'mnem' : 'RSet', 'args' : [], 'varg' : False }, -224 : { 'mnem' : 'Scale', 'args' : ['0x'], 'varg' : False }, -225 : { 'mnem' : 'Seek', 'args' : [], 'varg' : False }, -226 : { 'mnem' : 'SelectCase', 'args' : [], 'varg' : False }, -227 : { 'mnem' : 'SelectIs', 'args' : [], 'varg' : False }, -228 : { 'mnem' : 'SelectType', 'args' : [], 'varg' : False }, -229 : { 'mnem' : 'SetStmt', 'args' : [], 'varg' : False }, -230 : { 'mnem' : 'Stack', 'args' : [], 'varg' : False }, -231 : { 'mnem' : 'Stop', 'args' : [], 'varg' : False }, -232 : { 'mnem' : 'Type', 'args' : ['rec_'], 'varg' : False }, -233 : { 'mnem' : 'Unlock', 'args' : [], 'varg' : False }, -234 : { 'mnem' : 'VarDefn', 'args' : ['var_'], 'varg' : False }, -235 : { 'mnem' : 'Wend', 'args' : [], 'varg' : False }, -236 : { 'mnem' : 'While', 'args' : [], 'varg' : False }, -237 : { 'mnem' : 'With', 'args' : [], 'varg' : False }, -238 : { 'mnem' : 'WriteChan', 'args' : [], 'varg' : False }, -239 : { 'mnem' : 'ConstFuncExpr', 'args' : [], 'varg' : False }, -240 : { 'mnem' : 'LbConst', 'args' : ['name'], 'varg' : False }, -241 : { 'mnem' : 'LbIf', 'args' : [], 'varg' : False }, -242 : { 'mnem' : 'LbElse', 'args' : [], 'varg' : False }, -243 : { 'mnem' : 'LbElseIf', 'args' : [], 'varg' : False }, -244 : { 'mnem' : 'LbEndIf', 'args' : [], 'varg' : False }, -245 : { 'mnem' : 'LbMark', 'args' : [], 'varg' : False }, -246 : { 'mnem' : 'EndForVariable', 'args' : [], 'varg' : False }, -247 : { 'mnem' : 'StartForVariable', 'args' : [], 'varg' : False }, -248 : { 'mnem' : 'NewRedim', 'args' : [], 'varg' : False }, -249 : { 'mnem' : 'StartWithExpr', 'args' : [], 'varg' : False }, -250 : { 'mnem' : 'SetOrSt', 'args' : ['name'], 'varg' : False }, -251 : { 'mnem' : 'EndEnum', 'args' : [], 'varg' : False }, -252 : { 'mnem' : 'Illegal', 'args' : [], 'varg' : False } -} - -opcodes6 = { +# VBA7 opcodes; VBA3, VBA5 and VBA6 will be upconverted to these. +opcodes = { 0 : { 'mnem' : 'Imp', 'args' : [], 'varg' : False }, 1 : { 'mnem' : 'Eqv', 'args' : [], 'varg' : False }, 2 : { 'mnem' : 'Xor', 'args' : [], 'varg' : False }, @@ -742,158 +463,209 @@ def getTheCodeModuleNames(projectData): 171 : { 'mnem' : 'LitDefault', 'args' : [], 'varg' : False }, 172 : { 'mnem' : 'LitDI2', 'args' : ['0x'], 'varg' : False }, 173 : { 'mnem' : 'LitDI4', 'args' : ['0x', '0x'], 'varg' : False }, -174 : { 'mnem' : 'LitHI2', 'args' : ['0x'], 'varg' : False }, -175 : { 'mnem' : 'LitHI4', 'args' : ['0x', '0x'], 'varg' : False }, -176 : { 'mnem' : 'LitNothing', 'args' : [], 'varg' : False }, -177 : { 'mnem' : 'LitOI2', 'args' : ['0x'], 'varg' : False }, -178 : { 'mnem' : 'LitOI4', 'args' : ['0x', '0x'], 'varg' : False }, -179 : { 'mnem' : 'LitR4', 'args' : ['0x', '0x'], 'varg' : False }, -180 : { 'mnem' : 'LitR8', 'args' : ['0x', '0x', '0x', '0x'], 'varg' : False }, -181 : { 'mnem' : 'LitSmallI2', 'args' : [], 'varg' : False }, -182 : { 'mnem' : 'LitStr', 'args' : [], 'varg' : True }, -183 : { 'mnem' : 'LitVarSpecial', 'args' : [], 'varg' : False }, -184 : { 'mnem' : 'Lock', 'args' : [], 'varg' : False }, -185 : { 'mnem' : 'Loop', 'args' : [], 'varg' : False }, -186 : { 'mnem' : 'LoopUntil', 'args' : [], 'varg' : False }, -187 : { 'mnem' : 'LoopWhile', 'args' : [], 'varg' : False }, -188 : { 'mnem' : 'LSet', 'args' : [], 'varg' : False }, -189 : { 'mnem' : 'Me', 'args' : [], 'varg' : False }, -190 : { 'mnem' : 'MeImplicit', 'args' : [], 'varg' : False }, -191 : { 'mnem' : 'MemRedim', 'args' : ['name', '0x', 'type_'], 'varg' : False }, -192 : { 'mnem' : 'MemRedimWith', 'args' : ['name', '0x', 'type_'], 'varg' : False }, -193 : { 'mnem' : 'MemRedimAs', 'args' : ['name', '0x', 'type_'], 'varg' : False }, -194 : { 'mnem' : 'MemRedimAsWith', 'args' : ['name', '0x', 'type_'], 'varg' : False }, -195 : { 'mnem' : 'Mid', 'args' : [], 'varg' : False }, -196 : { 'mnem' : 'MidB', 'args' : [], 'varg' : False }, -197 : { 'mnem' : 'Name', 'args' : [], 'varg' : False }, -198 : { 'mnem' : 'New', 'args' : ['imp_'], 'varg' : False }, -199 : { 'mnem' : 'Next', 'args' : [], 'varg' : False }, -200 : { 'mnem' : 'NextVar', 'args' : [], 'varg' : False }, -201 : { 'mnem' : 'OnError', 'args' : ['name'], 'varg' : False }, -202 : { 'mnem' : 'OnGosub', 'args' : [], 'varg' : True }, -203 : { 'mnem' : 'OnGoto', 'args' : [], 'varg' : True }, -204 : { 'mnem' : 'Open', 'args' : ['0x'], 'varg' : False }, -205 : { 'mnem' : 'Option', 'args' : [], 'varg' : False }, -206 : { 'mnem' : 'OptionBase', 'args' : [], 'varg' : False }, -207 : { 'mnem' : 'ParamByVal', 'args' : [], 'varg' : False }, -208 : { 'mnem' : 'ParamOmitted', 'args' : [], 'varg' : False }, -209 : { 'mnem' : 'ParamNamed', 'args' : ['name'], 'varg' : False }, -210 : { 'mnem' : 'PrintChan', 'args' : [], 'varg' : False }, -211 : { 'mnem' : 'PrintComma', 'args' : [], 'varg' : False }, -212 : { 'mnem' : 'PrintEos', 'args' : [], 'varg' : False }, -213 : { 'mnem' : 'PrintItemComma', 'args' : [], 'varg' : False }, -214 : { 'mnem' : 'PrintItemNL', 'args' : [], 'varg' : False }, -215 : { 'mnem' : 'PrintItemSemi', 'args' : [], 'varg' : False }, -216 : { 'mnem' : 'PrintNL', 'args' : [], 'varg' : False }, -217 : { 'mnem' : 'PrintObj', 'args' : [], 'varg' : False }, -218 : { 'mnem' : 'PrintSemi', 'args' : [], 'varg' : False }, -219 : { 'mnem' : 'PrintSpc', 'args' : [], 'varg' : False }, -220 : { 'mnem' : 'PrintTab', 'args' : [], 'varg' : False }, -221 : { 'mnem' : 'PrintTabComma', 'args' : [], 'varg' : False }, -222 : { 'mnem' : 'PSet', 'args' : ['0x'], 'varg' : False }, -223 : { 'mnem' : 'PutRec', 'args' : [], 'varg' : False }, -224 : { 'mnem' : 'QuoteRem', 'args' : ['0x'], 'varg' : True }, -225 : { 'mnem' : 'Redim', 'args' : ['name', '0x', 'type_'], 'varg' : False }, -226 : { 'mnem' : 'RedimAs', 'args' : ['name', '0x', 'type_'], 'varg' : False }, -227 : { 'mnem' : 'Reparse', 'args' : [], 'varg' : True }, -228 : { 'mnem' : 'Rem', 'args' : [], 'varg' : True }, -229 : { 'mnem' : 'Resume', 'args' : ['name'], 'varg' : False }, -230 : { 'mnem' : 'Return', 'args' : [], 'varg' : False }, -231 : { 'mnem' : 'RSet', 'args' : [], 'varg' : False }, -232 : { 'mnem' : 'Scale', 'args' : ['0x'], 'varg' : False }, -233 : { 'mnem' : 'Seek', 'args' : [], 'varg' : False }, -234 : { 'mnem' : 'SelectCase', 'args' : [], 'varg' : False }, -235 : { 'mnem' : 'SelectIs', 'args' : [], 'varg' : False }, -236 : { 'mnem' : 'SelectType', 'args' : [], 'varg' : False }, -237 : { 'mnem' : 'SetStmt', 'args' : [], 'varg' : False }, -238 : { 'mnem' : 'Stack', 'args' : [], 'varg' : False }, -239 : { 'mnem' : 'Stop', 'args' : [], 'varg' : False }, -240 : { 'mnem' : 'Type', 'args' : ['rec_'], 'varg' : False }, -241 : { 'mnem' : 'Unlock', 'args' : [], 'varg' : False }, -242 : { 'mnem' : 'VarDefn', 'args' : ['var_'], 'varg' : False }, -243 : { 'mnem' : 'Wend', 'args' : [], 'varg' : False }, -244 : { 'mnem' : 'While', 'args' : [], 'varg' : False }, -245 : { 'mnem' : 'With', 'args' : [], 'varg' : False }, -246 : { 'mnem' : 'WriteChan', 'args' : [], 'varg' : False }, -247 : { 'mnem' : 'ConstFuncExpr', 'args' : [], 'varg' : False }, -248 : { 'mnem' : 'LbConst', 'args' : ['name'], 'varg' : False }, -249 : { 'mnem' : 'LbIf', 'args' : [], 'varg' : False }, -250 : { 'mnem' : 'LbElse', 'args' : [], 'varg' : False }, -251 : { 'mnem' : 'LbElseIf', 'args' : [], 'varg' : False }, -252 : { 'mnem' : 'LbEndIf', 'args' : [], 'varg' : False }, -253 : { 'mnem' : 'LbMark', 'args' : [], 'varg' : False }, -254 : { 'mnem' : 'EndForVariable', 'args' : [], 'varg' : False }, -255 : { 'mnem' : 'StartForVariable', 'args' : [], 'varg' : False }, -256 : { 'mnem' : 'NewRedim', 'args' : [], 'varg' : False }, -257 : { 'mnem' : 'StartWithExpr', 'args' : [], 'varg' : False }, -258 : { 'mnem' : 'SetOrSt', 'args' : ['name'], 'varg' : False }, -259 : { 'mnem' : 'EndEnum', 'args' : [], 'varg' : False }, -260 : { 'mnem' : 'Illegal', 'args' : [], 'varg' : False } +174 : { 'mnem' : 'LitDI8', 'args' : ['0x', '0x', '0x', '0x'], 'varg' : False }, +175 : { 'mnem' : 'LitHI2', 'args' : ['0x'], 'varg' : False }, +176 : { 'mnem' : 'LitHI4', 'args' : ['0x', '0x'], 'varg' : False }, +177 : { 'mnem' : 'LitHI8', 'args' : ['0x', '0x', '0x', '0x'], 'varg' : False }, +178 : { 'mnem' : 'LitNothing', 'args' : [], 'varg' : False }, +179 : { 'mnem' : 'LitOI2', 'args' : ['0x'], 'varg' : False }, +180 : { 'mnem' : 'LitOI4', 'args' : ['0x', '0x'], 'varg' : False }, +181 : { 'mnem' : 'LitOI8', 'args' : ['0x', '0x', '0x', '0x'], 'varg' : False }, +182 : { 'mnem' : 'LitR4', 'args' : ['0x', '0x'], 'varg' : False }, +183 : { 'mnem' : 'LitR8', 'args' : ['0x', '0x', '0x', '0x'], 'varg' : False }, +184 : { 'mnem' : 'LitSmallI2', 'args' : [], 'varg' : False }, +185 : { 'mnem' : 'LitStr', 'args' : [], 'varg' : True }, +186 : { 'mnem' : 'LitVarSpecial', 'args' : [], 'varg' : False }, +187 : { 'mnem' : 'Lock', 'args' : [], 'varg' : False }, +188 : { 'mnem' : 'Loop', 'args' : [], 'varg' : False }, +189 : { 'mnem' : 'LoopUntil', 'args' : [], 'varg' : False }, +190 : { 'mnem' : 'LoopWhile', 'args' : [], 'varg' : False }, +191 : { 'mnem' : 'LSet', 'args' : [], 'varg' : False }, +192 : { 'mnem' : 'Me', 'args' : [], 'varg' : False }, +193 : { 'mnem' : 'MeImplicit', 'args' : [], 'varg' : False }, +194 : { 'mnem' : 'MemRedim', 'args' : ['name', '0x', 'type_'], 'varg' : False }, +195 : { 'mnem' : 'MemRedimWith', 'args' : ['name', '0x', 'type_'], 'varg' : False }, +196 : { 'mnem' : 'MemRedimAs', 'args' : ['name', '0x', 'type_'], 'varg' : False }, +197 : { 'mnem' : 'MemRedimAsWith', 'args' : ['name', '0x', 'type_'], 'varg' : False }, +198 : { 'mnem' : 'Mid', 'args' : [], 'varg' : False }, +199 : { 'mnem' : 'MidB', 'args' : [], 'varg' : False }, +200 : { 'mnem' : 'Name', 'args' : [], 'varg' : False }, +201 : { 'mnem' : 'New', 'args' : ['imp_'], 'varg' : False }, +202 : { 'mnem' : 'Next', 'args' : [], 'varg' : False }, +203 : { 'mnem' : 'NextVar', 'args' : [], 'varg' : False }, +204 : { 'mnem' : 'OnError', 'args' : ['name'], 'varg' : False }, +205 : { 'mnem' : 'OnGosub', 'args' : [], 'varg' : True }, +206 : { 'mnem' : 'OnGoto', 'args' : [], 'varg' : True }, +207 : { 'mnem' : 'Open', 'args' : ['0x'], 'varg' : False }, +208 : { 'mnem' : 'Option', 'args' : [], 'varg' : False }, +209 : { 'mnem' : 'OptionBase', 'args' : [], 'varg' : False }, +210 : { 'mnem' : 'ParamByVal', 'args' : [], 'varg' : False }, +211 : { 'mnem' : 'ParamOmitted', 'args' : [], 'varg' : False }, +212 : { 'mnem' : 'ParamNamed', 'args' : ['name'], 'varg' : False }, +213 : { 'mnem' : 'PrintChan', 'args' : [], 'varg' : False }, +214 : { 'mnem' : 'PrintComma', 'args' : [], 'varg' : False }, +215 : { 'mnem' : 'PrintEos', 'args' : [], 'varg' : False }, +216 : { 'mnem' : 'PrintItemComma', 'args' : [], 'varg' : False }, +217 : { 'mnem' : 'PrintItemNL', 'args' : [], 'varg' : False }, +218 : { 'mnem' : 'PrintItemSemi', 'args' : [], 'varg' : False }, +219 : { 'mnem' : 'PrintNL', 'args' : [], 'varg' : False }, +220 : { 'mnem' : 'PrintObj', 'args' : [], 'varg' : False }, +221 : { 'mnem' : 'PrintSemi', 'args' : [], 'varg' : False }, +222 : { 'mnem' : 'PrintSpc', 'args' : [], 'varg' : False }, +223 : { 'mnem' : 'PrintTab', 'args' : [], 'varg' : False }, +224 : { 'mnem' : 'PrintTabComma', 'args' : [], 'varg' : False }, +225 : { 'mnem' : 'PSet', 'args' : ['0x'], 'varg' : False }, +226 : { 'mnem' : 'PutRec', 'args' : [], 'varg' : False }, +227 : { 'mnem' : 'QuoteRem', 'args' : ['0x'], 'varg' : True }, +228 : { 'mnem' : 'Redim', 'args' : ['name', '0x', 'type_'], 'varg' : False }, +229 : { 'mnem' : 'RedimAs', 'args' : ['name', '0x', 'type_'], 'varg' : False }, +230 : { 'mnem' : 'Reparse', 'args' : [], 'varg' : True }, +231 : { 'mnem' : 'Rem', 'args' : [], 'varg' : True }, +232 : { 'mnem' : 'Resume', 'args' : ['name'], 'varg' : False }, +233 : { 'mnem' : 'Return', 'args' : [], 'varg' : False }, +234 : { 'mnem' : 'RSet', 'args' : [], 'varg' : False }, +235 : { 'mnem' : 'Scale', 'args' : ['0x'], 'varg' : False }, +236 : { 'mnem' : 'Seek', 'args' : [], 'varg' : False }, +237 : { 'mnem' : 'SelectCase', 'args' : [], 'varg' : False }, +238 : { 'mnem' : 'SelectIs', 'args' : [], 'varg' : False }, +239 : { 'mnem' : 'SelectType', 'args' : [], 'varg' : False }, +240 : { 'mnem' : 'SetStmt', 'args' : [], 'varg' : False }, +241 : { 'mnem' : 'Stack', 'args' : [], 'varg' : False }, +242 : { 'mnem' : 'Stop', 'args' : [], 'varg' : False }, +243 : { 'mnem' : 'Type', 'args' : ['rec_'], 'varg' : False }, +244 : { 'mnem' : 'Unlock', 'args' : [], 'varg' : False }, +245 : { 'mnem' : 'VarDefn', 'args' : ['var_'], 'varg' : False }, +246 : { 'mnem' : 'Wend', 'args' : [], 'varg' : False }, +247 : { 'mnem' : 'While', 'args' : [], 'varg' : False }, +248 : { 'mnem' : 'With', 'args' : [], 'varg' : False }, +249 : { 'mnem' : 'WriteChan', 'args' : [], 'varg' : False }, +250 : { 'mnem' : 'ConstFuncExpr', 'args' : [], 'varg' : False }, +251 : { 'mnem' : 'LbConst', 'args' : ['name'], 'varg' : False }, +252 : { 'mnem' : 'LbIf', 'args' : [], 'varg' : False }, +253 : { 'mnem' : 'LbElse', 'args' : [], 'varg' : False }, +254 : { 'mnem' : 'LbElseIf', 'args' : [], 'varg' : False }, +255 : { 'mnem' : 'LbEndIf', 'args' : [], 'varg' : False }, +256 : { 'mnem' : 'LbMark', 'args' : [], 'varg' : False }, +257 : { 'mnem' : 'EndForVariable', 'args' : [], 'varg' : False }, +258 : { 'mnem' : 'StartForVariable', 'args' : [], 'varg' : False }, +259 : { 'mnem' : 'NewRedim', 'args' : [], 'varg' : False }, +260 : { 'mnem' : 'StartWithExpr', 'args' : [], 'varg' : False }, +261 : { 'mnem' : 'SetOrSt', 'args' : ['name'], 'varg' : False }, +262 : { 'mnem' : 'EndEnum', 'args' : [], 'varg' : False }, +263 : { 'mnem' : 'Illegal', 'args' : [], 'varg' : False } } -internalNames = [ -'', '0', 'Abs', 'Access', 'AddressOf', 'Alias', 'And', 'Any', -'Append', 'Array', 'As', 'Assert', 'B', 'Base', 'BF', 'Binary', -'Boolean', 'ByRef', 'Byte', 'ByVal', 'Call', 'Case', 'CBool', 'CByte', -'CCur', 'CDate', 'CDec', 'CDbl', 'CDecl', 'ChDir', 'CInt', 'Circle', -'CLng', 'Close', 'Compare', 'Const', 'CSng', 'CStr', 'CurDir', 'CurDir$', -'CVar', 'CVDate', 'CVErr', 'Currency', 'Database', 'Date', 'Date$', 'Debug', -'Decimal', 'Declare', 'DefBool', 'DefByte', 'DefCur', 'DefDate', 'DefDec', 'DefDbl', -'DefInt', 'DefLng', 'DefObj', 'DefSng', 'DefStr', 'DefVar', 'Dim', 'Dir', -'Dir$', 'Do', 'DoEvents', 'Double', 'Each', 'Else', 'ElseIf', 'Empty', -'End', 'EndIf', 'Enum', 'Eqv', 'Erase', 'Error', 'Error$', 'Event', -'WithEvents', 'Exit', 'Explicit', 'F', 'False', 'Fix', 'For', 'Format', -'Format$', 'FreeFile', 'Friend', 'Function', 'Get', 'Global', 'Go', 'GoSub', -'Goto', 'If', 'Imp', 'Implements', 'In', 'Input', 'Input$', 'InputB', -'InputB', 'InStr', 'InputB$', 'Int', 'InStrB', 'Is', 'Integer', 'Left', -'LBound', 'LenB', 'Len', 'Lib', 'Let', 'Line', 'Like', 'Load', -'Local', 'Lock', 'Long', 'Loop', 'LSet', 'Me', 'Mid', 'Mid$', -'MidB', 'MidB$', 'Mod', 'Module', 'Name', 'New', 'Next', 'Not', -'Nothing', 'Null', 'Object', 'On', 'Open', 'Option', 'Optional', 'Or', -'Output', 'ParamArray', 'Preserve', 'Print', 'Private', 'Property', 'PSet', 'Public', -'Put', 'RaiseEvent', 'Random', 'Randomize', 'Read', 'ReDim', 'Rem', 'Resume', -'Return', 'RGB', 'RSet', 'Scale', 'Seek', 'Select', 'Set', 'Sgn', -'Shared', 'Single', 'Spc', 'Static', 'Step', 'Stop', 'StrComp', 'String', -'String$', 'Sub', 'Tab', 'Text', 'Then', 'To', 'True', 'Type', -'TypeOf', 'UBound', 'Unload', 'Unlock', 'Unknown', 'Until', 'Variant', 'WEnd', -'While', 'Width', 'With', 'Write', 'Xor', '#Const', '#Else', '#ElseIf', -'#End', '#If', 'Attribute', 'VB_Base', 'VB_Control', 'VB_Creatable', 'VB_Customizable', 'VB_Description', -'VB_Exposed', 'VB_Ext_Key', 'VB_HelpID', 'VB_Invoke_Func', 'VB_Invoke_Property', 'VB_Invoke_PropertyPut', 'VB_Invoke_PropertyPutRef', 'VB_MemberFlags', -'VB_Name', 'VB_PredecraredID', 'VB_ProcData', 'VB_TemplateDerived', 'VB_VarDescription', 'VB_VarHelpID', 'VB_VarMemberFlags', 'VB_VarProcData', -'VB_UserMemID', 'VB_VarUserMemID', 'VB_GlobalNameSpace', ',', '.', '"', '_', '!', -'#', '&', "'", '(', ')', '*', '+', '-', -' /', ':', ';', '<', '<=', '<>', '=', '=<', -'=>', '>', '><', '>=', '?', '\\', '^', ':=' -] +def translateOpcode(opcode, vbaVer): + if (vbaVer == 3): + if ( 0 <= opcode <= 67): + return opcode + elif ( 68 <= opcode <= 70): + return opcode + 2 + elif ( 71 <= opcode <= 111): + return opcode + 4 + elif (112 <= opcode <= 150): + return opcode + 8 + elif (151 <= opcode <= 164): + return opcode + 9 + elif (165 <= opcode <= 166): + return opcode + 10 + elif (167 <= opcode <= 169): + return opcode + 11 + elif (170 <= opcode <= 238): + return opcode + 12 + else: # opcode == 239 + return opcode + 24 + elif (vbaVer == 5): + if ( 0 <= opcode <= 68): + return opcode + elif ( 69 <= opcode <= 71): + return opcode + 1 + elif ( 72 <= opcode <= 112): + return opcode + 3 + elif (113 <= opcode <= 151): + return opcode + 7 + elif (152 <= opcode <= 165): + return opcode + 8 + elif (166 <= opcode <= 167): + return opcode + 9 + elif (168 <= opcode <= 170): + return opcode + 10 + else: # 171 <= opcode <= 252 + return opcode + 11 + elif (vbaVer == 6): + if ( 0 <= opcode <= 173): + return opcode + elif (174 <= opcode <= 175): + return opcode + 1 + elif (176 <= opcode <= 178): + return opcode + 2 + else: # 179 <= opcode <= 260 + return opcode + 3 + else: + return opcode + +def getID(idCode, identifiers): + internalNames = [ + '', '0', 'Abs', 'Access', 'AddressOf', 'Alias', 'And', 'Any', + 'Append', 'Array', 'As', 'Assert', 'B', 'Base', 'BF', 'Binary', + 'Boolean', 'ByRef', 'Byte', 'ByVal', 'Call', 'Case', 'CBool', 'CByte', + 'CCur', 'CDate', 'CDec', 'CDbl', 'CDecl', 'ChDir', 'CInt', 'Circle', + 'CLng', 'Close', 'Compare', 'Const', 'CSng', 'CStr', 'CurDir', 'CurDir$', + 'CVar', 'CVDate', 'CVErr', 'Currency', 'Database', 'Date', 'Date$', 'Debug', + 'Decimal', 'Declare', 'DefBool', 'DefByte', 'DefCur', 'DefDate', 'DefDec', 'DefDbl', + 'DefInt', 'DefLng', 'DefObj', 'DefSng', 'DefStr', 'DefVar', 'Dim', 'Dir', + 'Dir$', 'Do', 'DoEvents', 'Double', 'Each', 'Else', 'ElseIf', 'Empty', + 'End', 'EndIf', 'Enum', 'Eqv', 'Erase', 'Error', 'Error$', 'Event', + 'WithEvents', 'Exit', 'Explicit', 'F', 'False', 'Fix', 'For', 'Format', + 'Format$', 'FreeFile', 'Friend', 'Function', 'Get', 'Global', 'Go', 'GoSub', + 'Goto', 'If', 'Imp', 'Implements', 'In', 'Input', 'Input$', 'InputB', + 'InputB', 'InStr', 'InputB$', 'Int', 'InStrB', 'Is', 'Integer', 'Left', + 'LBound', 'LenB', 'Len', 'Lib', 'Let', 'Line', 'Like', 'Load', + 'Local', 'Lock', 'Long', 'Loop', 'LSet', 'Me', 'Mid', 'Mid$', + 'MidB', 'MidB$', 'Mod', 'Module', 'Name', 'New', 'Next', 'Not', + 'Nothing', 'Null', 'Object', 'On', 'Open', 'Option', 'Optional', 'Or', + 'Output', 'ParamArray', 'Preserve', 'Print', 'Private', 'Property', 'PSet', 'Public', + 'Put', 'RaiseEvent', 'Random', 'Randomize', 'Read', 'ReDim', 'Rem', 'Resume', + 'Return', 'RGB', 'RSet', 'Scale', 'Seek', 'Select', 'Set', 'Sgn', + 'Shared', 'Single', 'Spc', 'Static', 'Step', 'Stop', 'StrComp', 'String', + 'String$', 'Sub', 'Tab', 'Text', 'Then', 'To', 'True', 'Type', + 'TypeOf', 'UBound', 'Unload', 'Unlock', 'Unknown', 'Until', 'Variant', 'WEnd', + 'While', 'Width', 'With', 'Write', 'Xor', '#Const', '#Else', '#ElseIf', + '#End', '#If', 'Attribute', 'VB_Base', 'VB_Control', 'VB_Creatable', 'VB_Customizable', 'VB_Description', + 'VB_Exposed', 'VB_Ext_Key', 'VB_HelpID', 'VB_Invoke_Func', 'VB_Invoke_Property', 'VB_Invoke_PropertyPut', 'VB_Invoke_PropertyPutRef', 'VB_MemberFlags', + 'VB_Name', 'VB_PredecraredID', 'VB_ProcData', 'VB_TemplateDerived', 'VB_VarDescription', 'VB_VarHelpID', 'VB_VarMemberFlags', 'VB_VarProcData', + 'VB_UserMemID', 'VB_VarUserMemID', 'VB_GlobalNameSpace', ',', '.', '"', '_', '!', + '#', '&', "'", '(', ')', '*', '+', '-', + ' /', ':', ';', '<', '<=', '<>', '=', '=<', + '=>', '>', '><', '>=', '?', '\\', '^', ':=' + ] -varTypes = ['', '?', '%', '&', '!', '#', '@', '?', '$', '?', '?', '?', '?', '?'] -varTypesLong = ['Var', '?', 'Int', 'Lng', 'Sng', 'Dbl', 'Cur', 'Date', 'Str', 'Obj', 'Err', 'Bool', 'Var'] -specials = ['False', 'True', 'Null', 'Empty'] -options = ['Base 0', 'Base 1', 'Compare Text', 'Compare Binary', 'Explicit', 'Private Module'] + idCode >>= 1 + if (idCode >= 0x100): + return identifiers[idCode - 0x100] + else: + return internalNames[idCode] def dumpLine(moduleData, lineStart, lineLength, endian, vbaVer, identifiers, verbose, line): + varTypes = ['', '?', '%', '&', '!', '#', '@', '?', '$', '?', '?', '?', '?', '?'] + varTypesLong = ['Var', '?', 'Int', 'Lng', 'Sng', 'Dbl', 'Cur', 'Date', 'Str', 'Obj', 'Err', 'Bool', 'Var'] + specials = ['False', 'True', 'Null', 'Empty'] + options = ['Base 0', 'Base 1', 'Compare Text', 'Compare Binary', 'Explicit', 'Private Module'] + print('Line #%d:' % line) if (verbose): print(hexdump3(moduleData[lineStart:lineStart + lineLength], length=16)) offset = lineStart endOfLine = lineStart + lineLength - if (vbaVer == 3): - opcodes = opcodes3 - elif (vbaVer == 5): - opcodes = opcodes5 - elif (vbaVer == 6): - opcodes = opcodes6 - else: - print('Unsupported VBA version: %d.' % vbaVer) - return while (offset < endOfLine): offset, opcode = getVar(moduleData, offset, endian, False) opType = (opcode & ~0x03FF) >> 10 opcode &= 0x03FF - if (not opcode in opcodes): + translatedOpcode = translateOpcode(opcode, vbaVer) + if (not translatedOpcode in opcodes): print('Unrecognized opcode 0x%04X at offset 0x%08X.' % (opcode, offset)) return - instruction = opcodes[opcode] + instruction = opcodes[translatedOpcode] mnemonic = instruction['mnem'] print('\t', end='') if (verbose): @@ -935,11 +707,7 @@ def dumpLine(moduleData, lineStart, lineLength, endian, vbaVer, identifiers, ver for arg in instruction['args']: if (arg == 'name'): offset, word = getVar(moduleData, offset, endian, False) - word >>= 1 - if (word >= 0x100): - varName = identifiers[word - 0x100] - else: - varName = internalNames[word] + varName = getID(word, identifiers) if (opType < len(varTypes)): strType = varTypes[opType] else: @@ -1008,12 +776,7 @@ def dumpLine(moduleData, lineStart, lineLength, endian, vbaVer, identifiers, ver vars = [] for i in range(wLength / 2): offset1, word = getVar(moduleData, offset1, endian, False) - word >>= 1 - if (word >= 0x100): - varName = identifiers[word - 0x100] - else: - varName = internalNames[word] - vars.append(varName) + vars.append(getID(word, identifiers)) print('%s ' % (', '.join(v for v in vars)), end='') else: hexdump = ' '.join('{:02X}'.format(ord(c)) for c in substring) @@ -1035,7 +798,10 @@ def pcodeDump(moduleData, vbaProjectData, dirData, identifiers, verbose, disasmo # - Handle VBA3 modules vbaVer = 3 try: - if (getWord(vbaProjectData, 2, endian) >= 0x6B): + version = getWord(vbaProjectData, 2, endian) + # TODO: + # - Handle VBA7 + if (version >= 0x6B): # VBA6 vbaVer = 6 offset = 0x0019 @@ -1081,10 +847,8 @@ def processFile(fileName, verbose, disasmonly): for vbaRoot, projectPath, dirPath in vbaProjects: print('=' * 79) if (not disasmonly): - print('PROJECT: %s' % projectPath) print('dir stream: %s' % dirPath) - projectData = processPROJECT(vbaParser, projectPath, disasmonly) - dirData = processDir(vbaParser, dirPath, verbose, disasmonly) + dirData, codeModules = processDir(vbaParser, dirPath, verbose, disasmonly) vbaProjectPath = vbaRoot + 'VBA/_VBA_PROJECT' vbaProjectData = process_VBA_PROJECT(vbaParser, vbaProjectPath, verbose, disasmonly) identifiers = getTheIdentifiers(vbaProjectData) @@ -1095,7 +859,6 @@ def processFile(fileName, verbose, disasmonly): print('%s' % identifier) print('') print('_VBA_PROJECT parsing done.') - codeModules = getTheCodeModuleNames(projectData) if (not disasmonly): print('-' * 79) print('Module streams:') From d61463405e47f4aca0824ff8069b10ef4f378dc0 Mon Sep 17 00:00:00 2001 From: Vesselin Bontchev Date: Fri, 23 Sep 2016 14:40:55 +0300 Subject: [PATCH 02/11] Update README.md --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 61f6018..9cb0bd7 100644 --- a/README.md +++ b/README.md @@ -20,9 +20,9 @@ The script should work both in Python 2.6+ and 3.x, although I've been using it ## Usage -The script takes as a command-line argument a list of one or more names of files or directories. If the name is an OLE2 document, it will be inspected for VBA code and the p-code of each code module will be disassembled. If the name is a directory, all the files in this directory and its subdirectories will be similarly processed. In addition to the disassembled p-code, by default the script also displays the contents of the `PROJECT` stream (which is ASCII text), the parsed records of the `dir` stream, as well as the identifiers (variable and function names) used in the VBA modules and stored in the `_VBA_PROJECT` stream. +The script takes as a command-line argument a list of one or more names of files or directories. If the name is an OLE2 document, it will be inspected for VBA code and the p-code of each code module will be disassembled. If the name is a directory, all the files in this directory and its subdirectories will be similarly processed. In addition to the disassembled p-code, by default the script also displays the parsed records of the `dir` stream, as well as the identifiers (variable and function names) used in the VBA modules and stored in the `_VBA_PROJECT` stream. -The script supports VBA5 (Office 97, MacOffice 98) and VBA6 (Office 2000 and higher). +The script supports VBA5 (Office 97, MacOffice 98), VBA6 (Office 2000 to Office 2009) and VBA7 (Office 2010 and higher). The script also accepts the following command-line options: @@ -73,10 +73,10 @@ For reference, it is the result of compiling the following VBA code: - While the script should support documents created by MacOffice, this has not been tested (and you know how well untested code usually works). This should be tested and any bugs related to it should be fixed. -- The 64-bit versions of Office use yet another VBA version - VBA7. It uses different p-code opcodes and the current version of the script will not be able to disassemble them correctly. I know how to do it but I need documents with macros created by such a version of Office for testing. - -- I am not an experienced Python programmer and the code is ugly. Especially the humongous opcode tables make me want to barf every time I look at them. Somebody more familiar with Python than me should probably rewrite the script and make it look better. +- I am not an experienced Python programmer and the code is ugly. Somebody more familiar with Python than me should probably rewrite the script and make it look better. ## Change log Version 1.00: Initial version. + +Version 1.01: Storing the opcodes in a more efficient manner. Implemented VBA7 support. From d6935b99f3201e1adc919948e6557c39ddd4e684 Mon Sep 17 00:00:00 2001 From: Vesselin Bontchev Date: Fri, 23 Sep 2016 14:49:52 +0300 Subject: [PATCH 03/11] Minor fixes --- pcodedmp.py | 82 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 46 insertions(+), 36 deletions(-) diff --git a/pcodedmp.py b/pcodedmp.py index fa0aa96..51a3a6f 100644 --- a/pcodedmp.py +++ b/pcodedmp.py @@ -44,15 +44,15 @@ def getTypeAndLength(buffer, offset, endian): else: return ord(buffer[offset + 1]), ord(buffer[offset]) -def processPROJECT(vbaParser, projectPath, disasmonly): +def processPROJECT(vbaParser, projectPath, disasmOnly): projectData = vbaParser.ole_file.openstream(projectPath).read() - if (not disasmonly): + if (not disasmOnly): print('-' * 79) print('PROJECT dump:') print(projectData) return projectData -def processDir(vbaParser, dirPath, verbose, disasmonly): +def processDir(vbaParser, dirPath, verbose, disasmOnly): tags = { 1 : 'PROJ_SYSKIND', # 0 - Win16, 1 - Win32, 2 - Mac, 3 - Win64 2 : 'PROJ_LCID', @@ -114,14 +114,14 @@ def processDir(vbaParser, dirPath, verbose, disasmonly): 72 : 'MOD_UNICODE_DOCSTRING', 73 : 'MOD_UNICODE_HELPFILE' } - if (not disasmonly): + if (not disasmOnly): print('-' * 79) print('dir stream after decompression:') dirDataCompressed = vbaParser.ole_file.openstream(dirPath).read() dirData = decompress_stream(dirDataCompressed) streamSize = len(dirData) codeModules = [] - if (not disasmonly): + if (not disasmOnly): print('%d bytes' % streamSize) if (verbose): print(hexdump3(dirData, length=16)) @@ -143,25 +143,25 @@ def processDir(vbaParser, dirPath, verbose, disasmonly): tagName = 'UNKNOWN' else: tagName = tags[tag] - if (not disasmonly): + if (not disasmOnly): print('%08X: %s' % (offset, tagName), end='') offset += 6 if (wLength): - if (not disasmonly): + if (not disasmOnly): print(':') print(hexdump3(dirData[offset:offset + wLength], length=16)) if (tagName == 'MOD_STREAM'): codeModules.append(dirData[offset:offset + wLength]) offset += wLength - elif (not disasmonly): + elif (not disasmOnly): print('') except: break return dirData, codeModules -def process_VBA_PROJECT(vbaParser, vbaProjectPath, verbose, disasmonly): +def process_VBA_PROJECT(vbaParser, vbaProjectPath, verbose, disasmOnly): vbaProjectData = vbaParser.ole_file.openstream(vbaProjectPath).read() - if (disasmonly): + if (disasmOnly): return vbaProjectData print('-' * 79) print('_VBA_PROJECT stream:') @@ -359,9 +359,9 @@ def getTheIdentifiers(vbaProjectData): 67 : { 'mnem' : 'ArgsMemCallWith', 'args' : ['name', '0x'], 'varg' : False }, 68 : { 'mnem' : 'ArgsArray', 'args' : ['name', '0x'], 'varg' : False }, 69 : { 'mnem' : 'Assert', 'args' : [], 'varg' : False }, - 70 : { 'mnem' : 'Bos', 'args' : ['0x'], 'varg' : False }, - 71 : { 'mnem' : 'BosImplicit', 'args' : [], 'varg' : False }, - 72 : { 'mnem' : 'Bol', 'args' : [], 'varg' : False }, + 70 : { 'mnem' : 'BoS', 'args' : ['0x'], 'varg' : False }, + 71 : { 'mnem' : 'BoSImplicit', 'args' : [], 'varg' : False }, + 72 : { 'mnem' : 'BoL', 'args' : [], 'varg' : False }, 73 : { 'mnem' : 'LdAddressOf', 'args' : [], 'varg' : False }, 74 : { 'mnem' : 'MemAddressOf', 'args' : [], 'varg' : False }, 75 : { 'mnem' : 'Case', 'args' : [], 'varg' : False }, @@ -504,7 +504,7 @@ def getTheIdentifiers(vbaProjectData): 212 : { 'mnem' : 'ParamNamed', 'args' : ['name'], 'varg' : False }, 213 : { 'mnem' : 'PrintChan', 'args' : [], 'varg' : False }, 214 : { 'mnem' : 'PrintComma', 'args' : [], 'varg' : False }, -215 : { 'mnem' : 'PrintEos', 'args' : [], 'varg' : False }, +215 : { 'mnem' : 'PrintEoS', 'args' : [], 'varg' : False }, 216 : { 'mnem' : 'PrintItemComma', 'args' : [], 'varg' : False }, 217 : { 'mnem' : 'PrintItemNL', 'args' : [], 'varg' : False }, 218 : { 'mnem' : 'PrintItemSemi', 'args' : [], 'varg' : False }, @@ -592,7 +592,8 @@ def translateOpcode(opcode, vbaVer): return opcode + 10 else: # 171 <= opcode <= 252 return opcode + 11 - elif (vbaVer == 6): + #elif (vbaVer == 6): + elif (vbaVer in [6, 7]): if ( 0 <= opcode <= 173): return opcode elif (174 <= opcode <= 175): @@ -604,7 +605,7 @@ def translateOpcode(opcode, vbaVer): else: return opcode -def getID(idCode, identifiers): +def getID(idCode, identifiers, vbaVer): internalNames = [ '', '0', 'Abs', 'Access', 'AddressOf', 'Alias', 'And', 'Any', 'Append', 'Array', 'As', 'Assert', 'B', 'Base', 'BF', 'Binary', @@ -642,7 +643,10 @@ def getID(idCode, identifiers): idCode >>= 1 if (idCode >= 0x100): - return identifiers[idCode - 0x100] + if (vbaVer < 7): + return identifiers[idCode - 0x100] + else: + return identifiers[idCode - 0x104] else: return internalNames[idCode] @@ -674,8 +678,10 @@ def dumpLine(moduleData, lineStart, lineLength, endian, vbaVer, identifiers, ver if (mnemonic in ['Coerce', 'CoerceVar', 'DefType']): if (opType < len(varTypesLong)): print('(%s) ' % varTypesLong[opType], end='') + elif (opType == 17): + print('(Byte) ', end='') else: - print('(%d)' % opType, end='') + print('(%d) ' % opType, end='') elif (mnemonic in ['Dim', 'DimImplicit', 'Type']): if (opType == 8): print('(Public) ', end='') @@ -701,13 +707,13 @@ def dumpLine(moduleData, lineStart, lineLength, endian, vbaVer, identifiers, ver opType -= 16 elif (mnemonic == 'Option'): print(' (%s)' % options[opType], end='') - elif (mnemonic in ['ReDim', 'RedimAs']): - if (opType): + elif (mnemonic in ['Redim', 'RedimAs']): + if (opType & 16): print('(Preserve) ', end='') for arg in instruction['args']: if (arg == 'name'): offset, word = getVar(moduleData, offset, endian, False) - varName = getID(word, identifiers) + varName = getID(word, identifiers, vbaVer) if (opType < len(varTypes)): strType = varTypes[opType] else: @@ -769,14 +775,14 @@ def dumpLine(moduleData, lineStart, lineLength, endian, vbaVer, identifiers, ver offset, wLength = getVar(moduleData, offset, endian, False) substring = moduleData[offset:offset + wLength] print('0x%04X ' % wLength, end='') - if (mnemonic in ['LitStr', 'QuoteRem', 'Rem']): + if (mnemonic in ['LitStr', 'QuoteRem', 'Rem', 'Reparse']): print('"%s"' % substring, end='') elif (mnemonic in ['OnGosub', 'OnGoto']): offset1 = offset vars = [] for i in range(wLength / 2): offset1, word = getVar(moduleData, offset1, endian, False) - vars.append(getID(word, identifiers)) + vars.append(getID(word, identifiers, vbaVer)) print('%s ' % (', '.join(v for v in vars)), end='') else: hexdump = ' '.join('{:02X}'.format(ord(c)) for c in substring) @@ -786,8 +792,8 @@ def dumpLine(moduleData, lineStart, lineLength, endian, vbaVer, identifiers, ver offset += 1 print('') -def pcodeDump(moduleData, vbaProjectData, dirData, identifiers, verbose, disasmonly): - if (verbose and not disasmonly): +def pcodeDump(moduleData, vbaProjectData, dirData, identifiers, verbose, disasmOnly): + if (verbose and not disasmOnly): print(hexdump3(moduleData, length=16)) # Determine endinanness: PC (little-endian) or Mac (big-endian) if (getWord(moduleData, 2, '<') > 0xFF): @@ -799,9 +805,13 @@ def pcodeDump(moduleData, vbaProjectData, dirData, identifiers, verbose, disasmo vbaVer = 3 try: version = getWord(vbaProjectData, 2, endian) - # TODO: - # - Handle VBA7 - if (version >= 0x6B): + if (verbose): + print('Office version: 0x%04X.' % version) + if (version >= 0xA0): # TODO - Office 2013 is 0x00A3; check Office 2010 + # VBA7 + vbaVer = 7 + offset = 0x0019 + elif (version >= 0x6B): # VBA6 vbaVer = 6 offset = 0x0019 @@ -834,7 +844,7 @@ def pcodeDump(moduleData, vbaProjectData, dirData, identifiers, verbose, disasmo print('Error: %s.' % e, file=sys.stderr) return -def processFile(fileName, verbose, disasmonly): +def processFile(fileName, verbose, disasmOnly): # TODO: # - Handle VBA3 documents print('Processing file: %s' % fileName) @@ -846,27 +856,27 @@ def processFile(fileName, verbose, disasmonly): return for vbaRoot, projectPath, dirPath in vbaProjects: print('=' * 79) - if (not disasmonly): + if (not disasmOnly): print('dir stream: %s' % dirPath) - dirData, codeModules = processDir(vbaParser, dirPath, verbose, disasmonly) + dirData, codeModules = processDir(vbaParser, dirPath, verbose, disasmOnly) vbaProjectPath = vbaRoot + 'VBA/_VBA_PROJECT' - vbaProjectData = process_VBA_PROJECT(vbaParser, vbaProjectPath, verbose, disasmonly) + vbaProjectData = process_VBA_PROJECT(vbaParser, vbaProjectPath, verbose, disasmOnly) identifiers = getTheIdentifiers(vbaProjectData) - if (not disasmonly): + if (not disasmOnly): print('Identifiers:') print('') for identifier in identifiers: print('%s' % identifier) print('') print('_VBA_PROJECT parsing done.') - if (not disasmonly): + if (not disasmOnly): print('-' * 79) print('Module streams:') for module in codeModules: modulePath = vbaRoot + 'VBA/' + module moduleData = vbaParser.ole_file.openstream(modulePath).read() print ('%s - %d bytes' % (modulePath, len(moduleData))) - pcodeDump(moduleData, vbaProjectData, dirData, identifiers, verbose, disasmonly) + pcodeDump(moduleData, vbaProjectData, dirData, identifiers, verbose, disasmOnly) except Exception as e: print('Error: %s.' % e, file=sys.stderr) vbaParser.close() @@ -888,7 +898,7 @@ def processFile(fileName, verbose, disasmonly): for name, subdirList, fileList in os.walk(name): for fname in fileList: fullName = os.path.join(name, fname) - processFile(fullName, args.verbose, args.disasmonly) + processFile(fullName, args.verbose, args.disasmOnly) if args.norecurse: while len(subdirList) > 0: del(subdirList[0]) From a7cc81f400ff7155951c7c6d6c1fe11758133b38 Mon Sep 17 00:00:00 2001 From: Vesselin Bontchev Date: Fri, 23 Sep 2016 21:51:17 +0300 Subject: [PATCH 04/11] Updated README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9cb0bd7..e103806 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ The script also accepts the following command-line options: `-n`, `--norecurse` If a name specified on the command line is a directory, process only the files in this directory; do not process the files in its subdirectories. -`-d`, `--disasmonly` Only the p-code will be disassembled, without the parsed contents of the `dir` stream, the contents of the `PROJECT` stream, or the identifiers in the `_VBA_PROJECT` stream. +`-d`, `--disasmonly` Only the p-code will be disassembled, without the parsed contents of the `dir` stream or the identifiers in the `_VBA_PROJECT` stream. `--verbose` The contents of the `dir` and `_VBA_PROJECT` streams is dumped in hex and ASCII form. In addition, the raw bytes of each compiled into p-code VBA line is also dumped in hex and ASCII. From fafa57a363170393dcbdcf3bd40364a9effa137e Mon Sep 17 00:00:00 2001 From: Vesselin Bontchev Date: Fri, 30 Sep 2016 21:54:14 +0300 Subject: [PATCH 05/11] Fixed a problem with Office 2010 --- pcodedmp.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pcodedmp.py b/pcodedmp.py index 51a3a6f..7ce895c 100644 --- a/pcodedmp.py +++ b/pcodedmp.py @@ -807,7 +807,8 @@ def pcodeDump(moduleData, vbaProjectData, dirData, identifiers, verbose, disasmO version = getWord(vbaProjectData, 2, endian) if (verbose): print('Office version: 0x%04X.' % version) - if (version >= 0xA0): # TODO - Office 2013 is 0x00A3; check Office 2010 + # TODO - Office 2010 is 0x0097; Office 2013 is 0x00A3; check Office 2016 + if (version >= 0x97): # VBA7 vbaVer = 7 offset = 0x0019 @@ -848,6 +849,7 @@ def processFile(fileName, verbose, disasmOnly): # TODO: # - Handle VBA3 documents print('Processing file: %s' % fileName) + vbaParser = None try: vbaParser = VBA_Parser(fileName) vbaProjects = vbaParser.find_vba_projects() @@ -879,7 +881,8 @@ def processFile(fileName, verbose, disasmOnly): pcodeDump(moduleData, vbaProjectData, dirData, identifiers, verbose, disasmOnly) except Exception as e: print('Error: %s.' % e, file=sys.stderr) - vbaParser.close() + if (vbaParser): + vbaParser.close() if __name__ == '__main__': parser = argparse.ArgumentParser(version='%(prog)s version ' + __VERSION__, From 1bed76d36e6675c5d2e7c0150e1c7c07ee33cd18 Mon Sep 17 00:00:00 2001 From: Vesselin Bontchev Date: Tue, 4 Oct 2016 15:07:06 +0300 Subject: [PATCH 06/11] Fixed a typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e103806..e6caee6 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ It is not widely known, but macros written in VBA (Visual Basic for Applications - _Source code_. The original source code of the macro module is compressed and stored at the end of the module stream. This makes it relatively easy to locate and extract and most free DFIR tools for macro analysis like [oledump](https://blog.didierstevens.com/programs/oledump-py/) or [olevba](http://www.decalage.info/python/olevba) or even many professional anti-virus tools look only at this form. However, most of the time the source code is completely ignored by Office. In fact, it is possible to remove the source code (and therefore make all these tools think that there are no macros present), yet the macros will still execute without any problems. I have created a [proof of concept](http://bontchev.my.contact.bg/poc2b.doc) document illustrating this. Most tools will not see any macros in it but if opened with Word version 2000 or higher, it will display a message and will launch `calc.exe`. It is surprising that malware authors are not using this trick more widely. -- _P-code_. As each VBA line is entered into the VBA editor, it is immediately compiled into p-code (a pseudo code for a stack machine) and stored in a different place in the module stream. The p-code is precisely what is executed most of the time. In fact, even when you open the source of a macro module in the VBA editor, what is displayed is not the decompressed source code but the p-code decompiled into source. Only if the document is opened under a version of Office that uses a different VBA version from the one that has been used to create the document, the stored compressed source code is re-compiled into p-code and then that p-code is executed. This makes it possible to open a VBA-containing document on any version of Office that suppots VBA and have the macros inside remain executable, despite the fact that the different versions of VBA use different (incompatible) p-code instructions. +- _P-code_. As each VBA line is entered into the VBA editor, it is immediately compiled into p-code (a pseudo code for a stack machine) and stored in a different place in the module stream. The p-code is precisely what is executed most of the time. In fact, even when you open the source of a macro module in the VBA editor, what is displayed is not the decompressed source code but the p-code decompiled into source. Only if the document is opened under a version of Office that uses a different VBA version from the one that has been used to create the document, the stored compressed source code is re-compiled into p-code and then that p-code is executed. This makes it possible to open a VBA-containing document on any version of Office that supports VBA and have the macros inside remain executable, despite the fact that the different versions of VBA use different (incompatible) p-code instructions. - _Execodes_. When the p-code has been executed at least once, a further tokenized form of it is stored elsewhere in the document (in streams, the names of which begin with `__SRP_`, followed by a number). From there is can be executed much faster. However, the format of the execodes is extremely complex and is specific for the particular Office version (not VBA version) in which they have been created. This makes them extremely non-portable. In addition, their presence is not necessary - they can be removed and the macros will run just fine (from the p-code). From 01fab8af3b0be461d4c2eb99c2c1e9677262b867 Mon Sep 17 00:00:00 2001 From: Vesselin Bontchev Date: Thu, 6 Oct 2016 13:45:10 +0300 Subject: [PATCH 07/11] Accounted for some weirdness in the identifier fetching --- pcodedmp.py | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/pcodedmp.py b/pcodedmp.py index 7ce895c..931d206 100644 --- a/pcodedmp.py +++ b/pcodedmp.py @@ -8,7 +8,7 @@ import sys import os -__author__ = 'Vesselin Bontchev ' +__author__ = 'Vesselin Bontchev ' __license__ = 'GPL' __VERSION__ = '1.01' @@ -266,6 +266,7 @@ def getTheIdentifiers(vbaProjectData): # Now offset points to the start of the variable names area for id in range(numIDs): isKwd = False + ident = '' idType, idLength = getTypeAndLength(vbaProjectData, offset, endian) offset += 2 if ((idLength == 0) and (idType == 0)): @@ -276,7 +277,8 @@ def getTheIdentifiers(vbaProjectData): if (idType & 0x80): offset += 6 if (idLength): - identifiers.append(vbaProjectData[offset:offset + idLength]) + ident = vbaProjectData[offset:offset + idLength] + identifiers.append(ident) offset += idLength if (not isKwd): offset += 4 @@ -617,7 +619,7 @@ def getID(idCode, identifiers, vbaVer): 'DefInt', 'DefLng', 'DefObj', 'DefSng', 'DefStr', 'DefVar', 'Dim', 'Dir', 'Dir$', 'Do', 'DoEvents', 'Double', 'Each', 'Else', 'ElseIf', 'Empty', 'End', 'EndIf', 'Enum', 'Eqv', 'Erase', 'Error', 'Error$', 'Event', - 'WithEvents', 'Exit', 'Explicit', 'F', 'False', 'Fix', 'For', 'Format', + 'WithEvents', 'Explicit', 'F', 'False', 'Fix', 'For', 'Format', 'Format$', 'FreeFile', 'Friend', 'Function', 'Get', 'Global', 'Go', 'GoSub', 'Goto', 'If', 'Imp', 'Implements', 'In', 'Input', 'Input$', 'InputB', 'InputB', 'InStr', 'InputB$', 'Int', 'InStrB', 'Is', 'Integer', 'Left', @@ -641,14 +643,23 @@ def getID(idCode, identifiers, vbaVer): '=>', '>', '><', '>=', '?', '\\', '^', ':=' ] + origCode = idCode idCode >>= 1 - if (idCode >= 0x100): - if (vbaVer < 7): - return identifiers[idCode - 0x100] + try: + if (idCode >= 0x100): + idCode -= 0x100 + if (vbaVer >= 7): + idCode -= 4 + if (idCode > 0xBE): + idCode -= 1 + return identifiers[idCode] else: - return identifiers[idCode - 0x104] - else: - return internalNames[idCode] + if (vbaVer >= 7): + if (idCode >= 0xC3): + idCode -= 1 + return internalNames[idCode] + except: + return 'id_{0:04X}'.format(origCode) def dumpLine(moduleData, lineStart, lineLength, endian, vbaVer, identifiers, verbose, line): varTypes = ['', '?', '%', '&', '!', '#', '@', '?', '$', '?', '?', '?', '?', '?'] @@ -656,7 +667,11 @@ def dumpLine(moduleData, lineStart, lineLength, endian, vbaVer, identifiers, ver specials = ['False', 'True', 'Null', 'Empty'] options = ['Base 0', 'Base 1', 'Compare Text', 'Compare Binary', 'Explicit', 'Private Module'] + if (verbose and (lineLength > 0)): + print('%04X: ' % lineStart, end='') print('Line #%d:' % line) + if (lineLength <= 0): + return if (verbose): print(hexdump3(moduleData[lineStart:lineStart + lineLength], length=16)) offset = lineStart @@ -867,8 +882,10 @@ def processFile(fileName, verbose, disasmOnly): if (not disasmOnly): print('Identifiers:') print('') + i = 0 for identifier in identifiers: - print('%s' % identifier) + print('%04X: %s' % (i, identifier)) + i += 1 print('') print('_VBA_PROJECT parsing done.') if (not disasmOnly): From b25cc88597d39145bfe456894e2e1c3077e80617 Mon Sep 17 00:00:00 2001 From: Vesselin Bontchev Date: Thu, 6 Oct 2016 22:36:03 +0300 Subject: [PATCH 08/11] Added support for documents created by the 64-bit Office --- pcodedmp.py | 93 ++++++++++++++++++++++++++++------------------------- 1 file changed, 50 insertions(+), 43 deletions(-) diff --git a/pcodedmp.py b/pcodedmp.py index 931d206..4e8998f 100644 --- a/pcodedmp.py +++ b/pcodedmp.py @@ -10,7 +10,7 @@ __author__ = 'Vesselin Bontchev ' __license__ = 'GPL' -__VERSION__ = '1.01' +__VERSION__ = '2.00' def getWord(buffer, offset, endian): return unpack_from(endian + 'H', buffer, offset)[0] @@ -117,12 +117,13 @@ def processDir(vbaParser, dirPath, verbose, disasmOnly): if (not disasmOnly): print('-' * 79) print('dir stream after decompression:') + is64bit = False dirDataCompressed = vbaParser.ole_file.openstream(dirPath).read() dirData = decompress_stream(dirDataCompressed) streamSize = len(dirData) codeModules = [] if (not disasmOnly): - print('%d bytes' % streamSize) + print('{0:d} bytes'.format(streamSize)) if (verbose): print(hexdump3(dirData, length=16)) print('dir stream parsed:') @@ -144,20 +145,23 @@ def processDir(vbaParser, dirPath, verbose, disasmOnly): else: tagName = tags[tag] if (not disasmOnly): - print('%08X: %s' % (offset, tagName), end='') + print('{0:08X}: {1}'.format(offset, tagName), end='') offset += 6 if (wLength): if (not disasmOnly): print(':') print(hexdump3(dirData[offset:offset + wLength], length=16)) - if (tagName == 'MOD_STREAM'): + if (tagName == 'MOD_STREAM'): codeModules.append(dirData[offset:offset + wLength]) + elif (tagName == 'PROJ_SYSKIND'): + sysKind = getDWord(dirData, offset, '<') + is64bit = sysKind == 3 offset += wLength elif (not disasmOnly): print('') except: break - return dirData, codeModules + return dirData, codeModules, is64bit def process_VBA_PROJECT(vbaParser, vbaProjectPath, verbose, disasmOnly): vbaProjectData = vbaParser.ole_file.openstream(vbaProjectPath).read() @@ -165,7 +169,7 @@ def process_VBA_PROJECT(vbaParser, vbaProjectPath, verbose, disasmOnly): return vbaProjectData print('-' * 79) print('_VBA_PROJECT stream:') - print('%d bytes' % len(vbaProjectData)) + print('{0:d} bytes'.format(len(vbaProjectData))) if (verbose): print(hexdump3(vbaProjectData, length=16)) return vbaProjectData @@ -283,7 +287,7 @@ def getTheIdentifiers(vbaProjectData): if (not isKwd): offset += 4 except Exception as e: - print('Error: %s.' % e, file=sys.stderr) + print('Error: {0}.'.format(e), file=sys.stderr) return identifiers #'name', '0x', 'imp_', 'func_', 'var_', 'rec_', 'type_', 'context_' @@ -557,7 +561,7 @@ def getTheIdentifiers(vbaProjectData): 263 : { 'mnem' : 'Illegal', 'args' : [], 'varg' : False } } -def translateOpcode(opcode, vbaVer): +def translateOpcode(opcode, vbaVer, is64bit): if (vbaVer == 3): if ( 0 <= opcode <= 67): return opcode @@ -595,7 +599,8 @@ def translateOpcode(opcode, vbaVer): else: # 171 <= opcode <= 252 return opcode + 11 #elif (vbaVer == 6): - elif (vbaVer in [6, 7]): + #elif (vbaVer in [6, 7]): + elif (not is64bit): if ( 0 <= opcode <= 173): return opcode elif (174 <= opcode <= 175): @@ -607,7 +612,7 @@ def translateOpcode(opcode, vbaVer): else: return opcode -def getID(idCode, identifiers, vbaVer): +def getID(idCode, identifiers, vbaVer, is64bit): internalNames = [ '', '0', 'Abs', 'Access', 'AddressOf', 'Alias', 'And', 'Any', 'Append', 'Array', 'As', 'Assert', 'B', 'Base', 'BF', 'Binary', @@ -650,6 +655,8 @@ def getID(idCode, identifiers, vbaVer): idCode -= 0x100 if (vbaVer >= 7): idCode -= 4 + if (is64bit): + idCode -= 3 if (idCode > 0xBE): idCode -= 1 return identifiers[idCode] @@ -661,15 +668,15 @@ def getID(idCode, identifiers, vbaVer): except: return 'id_{0:04X}'.format(origCode) -def dumpLine(moduleData, lineStart, lineLength, endian, vbaVer, identifiers, verbose, line): +def dumpLine(moduleData, lineStart, lineLength, endian, vbaVer, is64bit, identifiers, verbose, line): varTypes = ['', '?', '%', '&', '!', '#', '@', '?', '$', '?', '?', '?', '?', '?'] varTypesLong = ['Var', '?', 'Int', 'Lng', 'Sng', 'Dbl', 'Cur', 'Date', 'Str', 'Obj', 'Err', 'Bool', 'Var'] specials = ['False', 'True', 'Null', 'Empty'] options = ['Base 0', 'Base 1', 'Compare Text', 'Compare Binary', 'Explicit', 'Private Module'] if (verbose and (lineLength > 0)): - print('%04X: ' % lineStart, end='') - print('Line #%d:' % line) + print('{0:04X}: '.format(lineStart), end='') + print('Line #{0:d}:'.format(line)) if (lineLength <= 0): return if (verbose): @@ -680,23 +687,23 @@ def dumpLine(moduleData, lineStart, lineLength, endian, vbaVer, identifiers, ver offset, opcode = getVar(moduleData, offset, endian, False) opType = (opcode & ~0x03FF) >> 10 opcode &= 0x03FF - translatedOpcode = translateOpcode(opcode, vbaVer) + translatedOpcode = translateOpcode(opcode, vbaVer, is64bit) if (not translatedOpcode in opcodes): - print('Unrecognized opcode 0x%04X at offset 0x%08X.' % (opcode, offset)) + print('Unrecognized opcode 0x{0:04X} at offset 0x{1:08X}.'.format(opcode, offset)) return instruction = opcodes[translatedOpcode] mnemonic = instruction['mnem'] print('\t', end='') if (verbose): - print('%04X ' % opcode, end='') - print('%s ' % mnemonic, end='') + print('{0:04X} '.format(opcode), end='') + print('{0} '.format(mnemonic), end='') if (mnemonic in ['Coerce', 'CoerceVar', 'DefType']): if (opType < len(varTypesLong)): - print('(%s) ' % varTypesLong[opType], end='') + print('({0}) '.format(varTypesLong[opType]), end='') elif (opType == 17): print('(Byte) ', end='') else: - print('(%d) ' % opType, end='') + print('({0:d}) '.format(opType), end='') elif (mnemonic in ['Dim', 'DimImplicit', 'Type']): if (opType == 8): print('(Public) ', end='') @@ -705,7 +712,7 @@ def dumpLine(moduleData, lineStart, lineLength, endian, vbaVer, identifiers, ver elif (opType == 32): print('(Static) ', end='') elif (mnemonic == 'LitVarSpecial'): - print('(%s)' % specials[opType], end='') + print('({0})'.format(specials[opType]), end='') elif (mnemonic == 'FuncDefn'): if (opType == 1): print('(Sub / Property Set) ', end='') @@ -721,14 +728,14 @@ def dumpLine(moduleData, lineStart, lineLength, endian, vbaVer, identifiers, ver else: opType -= 16 elif (mnemonic == 'Option'): - print(' (%s)' % options[opType], end='') + print(' ({0})'.format(options[opType]), end='') elif (mnemonic in ['Redim', 'RedimAs']): if (opType & 16): print('(Preserve) ', end='') for arg in instruction['args']: if (arg == 'name'): offset, word = getVar(moduleData, offset, endian, False) - varName = getID(word, identifiers, vbaVer) + varName = getID(word, identifiers, vbaVer, is64bit) if (opType < len(varTypes)): strType = varTypes[opType] else: @@ -747,11 +754,11 @@ def dumpLine(moduleData, lineStart, lineLength, endian, vbaVer, identifiers, ver varName = '(Next)' elif (opType != 0): varName = '' - print('%s%s ' % (varName, strType), end='') + print('{0}{1} '.format(varName, strType), end='') elif (arg in ['0x', 'imp_']): offset, word = getVar(moduleData, offset, endian, False) if (mnemonic != 'Open'): - print('%s%04X ' % (arg, word), end='') + print('{0}{1:04X} '.format(arg, word), end='') else: # This is a rather messy way of processing what is probably # just a bit field but I couldn't figure out a smarter way @@ -785,29 +792,29 @@ def dumpLine(moduleData, lineStart, lineLength, endian, vbaVer, identifiers, ver print(')', end='') elif (arg in ['func_', 'var_', 'rec_', 'type_', 'context_']): offset, dword = getVar(moduleData, offset, endian, True) - print('%s%08X ' % (arg, dword), end='') + print('{0}{1:08X} '.format(arg, dword), end='') if (instruction['varg']): offset, wLength = getVar(moduleData, offset, endian, False) substring = moduleData[offset:offset + wLength] - print('0x%04X ' % wLength, end='') + print('0x{0:04X} '.format(wLength), end='') if (mnemonic in ['LitStr', 'QuoteRem', 'Rem', 'Reparse']): - print('"%s"' % substring, end='') + print('"{0}"'.format(substring), end='') elif (mnemonic in ['OnGosub', 'OnGoto']): offset1 = offset vars = [] for i in range(wLength / 2): offset1, word = getVar(moduleData, offset1, endian, False) - vars.append(getID(word, identifiers, vbaVer)) - print('%s ' % (', '.join(v for v in vars)), end='') + vars.append(getID(word, identifiers, vbaVer, is64bit)) + print('{0} '.format(', '.join(v for v in vars)), end='') else: hexdump = ' '.join('{:02X}'.format(ord(c)) for c in substring) - print('%s' % hexdump, end='') + print('{0}'.format(hexdump), end='') offset += wLength if (wLength & 1): offset += 1 print('') -def pcodeDump(moduleData, vbaProjectData, dirData, identifiers, verbose, disasmOnly): +def pcodeDump(moduleData, vbaProjectData, dirData, identifiers, is64bit, verbose, disasmOnly): if (verbose and not disasmOnly): print(hexdump3(moduleData, length=16)) # Determine endinanness: PC (little-endian) or Mac (big-endian) @@ -821,7 +828,7 @@ def pcodeDump(moduleData, vbaProjectData, dirData, identifiers, verbose, disasmO try: version = getWord(vbaProjectData, 2, endian) if (verbose): - print('Office version: 0x%04X.' % version) + print('Office version: 0x{0:04X}.'.format(version)) # TODO - Office 2010 is 0x0097; Office 2013 is 0x00A3; check Office 2016 if (version >= 0x97): # VBA7 @@ -855,15 +862,15 @@ def pcodeDump(moduleData, vbaProjectData, dirData, identifiers, verbose, disasmO offset, lineLength = getVar(moduleData, offset, endian, False) offset += 2 offset, lineOffset = getVar(moduleData, offset, endian, True) - dumpLine(moduleData, pcodeStart + lineOffset, lineLength, endian, vbaVer, identifiers, verbose, line) + dumpLine(moduleData, pcodeStart + lineOffset, lineLength, endian, vbaVer, is64bit, identifiers, verbose, line) except Exception as e: - print('Error: %s.' % e, file=sys.stderr) + print('Error: {0}.'.format(e), file=sys.stderr) return def processFile(fileName, verbose, disasmOnly): # TODO: # - Handle VBA3 documents - print('Processing file: %s' % fileName) + print('Processing file: {0}'.format(fileName)) vbaParser = None try: vbaParser = VBA_Parser(fileName) @@ -874,8 +881,8 @@ def processFile(fileName, verbose, disasmOnly): for vbaRoot, projectPath, dirPath in vbaProjects: print('=' * 79) if (not disasmOnly): - print('dir stream: %s' % dirPath) - dirData, codeModules = processDir(vbaParser, dirPath, verbose, disasmOnly) + print('dir stream: {0}'.format(dirPath)) + dirData, codeModules, is64bit = processDir(vbaParser, dirPath, verbose, disasmOnly) vbaProjectPath = vbaRoot + 'VBA/_VBA_PROJECT' vbaProjectData = process_VBA_PROJECT(vbaParser, vbaProjectPath, verbose, disasmOnly) identifiers = getTheIdentifiers(vbaProjectData) @@ -884,7 +891,7 @@ def processFile(fileName, verbose, disasmOnly): print('') i = 0 for identifier in identifiers: - print('%04X: %s' % (i, identifier)) + print('{0:04X}: {1}'.format(i, identifier)) i += 1 print('') print('_VBA_PROJECT parsing done.') @@ -894,10 +901,10 @@ def processFile(fileName, verbose, disasmOnly): for module in codeModules: modulePath = vbaRoot + 'VBA/' + module moduleData = vbaParser.ole_file.openstream(modulePath).read() - print ('%s - %d bytes' % (modulePath, len(moduleData))) - pcodeDump(moduleData, vbaProjectData, dirData, identifiers, verbose, disasmOnly) + print ('{0} - {1:d} bytes'.format(modulePath, len(moduleData))) + pcodeDump(moduleData, vbaProjectData, dirData, identifiers, is64bit, verbose, disasmOnly) except Exception as e: - print('Error: %s.' % e, file=sys.stderr) + print('Error: {0}.'.format(e), file=sys.stderr) if (vbaParser): vbaParser.close() @@ -925,8 +932,8 @@ def processFile(fileName, verbose, disasmOnly): elif os.path.isfile(name): processFile(name, args.verbose, args.disasmonly) else: - print(name + ' does not exist.', file=sys.stderr) + print('{0} does not exist.'.format(name), file=sys.stderr) except Exception as e: - print('Error: %s.' % e, file=sys.stderr) + print('Error: {0}.'.format(e), file=sys.stderr) sys.exit(-1) sys.exit(0) From 7d3310b2ba90d078c0a2f17527caaef7c1d94560 Mon Sep 17 00:00:00 2001 From: Vesselin Bontchev Date: Sat, 8 Oct 2016 01:50:58 +0300 Subject: [PATCH 09/11] Changed the version number --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e6caee6..9eaa0cb 100644 --- a/README.md +++ b/README.md @@ -79,4 +79,4 @@ For reference, it is the result of compiling the following VBA code: Version 1.00: Initial version. -Version 1.01: Storing the opcodes in a more efficient manner. Implemented VBA7 support. +Version 2.00: Storing the opcodes in a more efficient manner. Implemented VBA7 support. Implemented support for documents created by the 64-bit version of Office. From d1b9ed3eabbf30812d9943f3d05e02698860f593 Mon Sep 17 00:00:00 2001 From: Vesselin Bontchev Date: Sat, 8 Oct 2016 01:56:05 +0300 Subject: [PATCH 10/11] Fixed the arguments of some instructions --- pcodedmp.py | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/pcodedmp.py b/pcodedmp.py index 4e8998f..e977628 100644 --- a/pcodedmp.py +++ b/pcodedmp.py @@ -326,7 +326,7 @@ def getTheIdentifiers(vbaProjectData): 28 : { 'mnem' : 'FnLenB', 'args' : [], 'varg' : False }, 29 : { 'mnem' : 'Paren', 'args' : [], 'varg' : False }, 30 : { 'mnem' : 'Sharp', 'args' : [], 'varg' : False }, - 31 : { 'mnem' : 'LdLHS', 'args' : [], 'varg' : False }, + 31 : { 'mnem' : 'LdLHS', 'args' : ['name'], 'varg' : False }, 32 : { 'mnem' : 'Ld', 'args' : ['name'], 'varg' : False }, 33 : { 'mnem' : 'MemLd', 'args' : ['name'], 'varg' : False }, 34 : { 'mnem' : 'DictLd', 'args' : ['name'], 'varg' : False }, @@ -337,14 +337,14 @@ def getTheIdentifiers(vbaProjectData): 39 : { 'mnem' : 'St', 'args' : ['name'], 'varg' : False }, 40 : { 'mnem' : 'MemSt', 'args' : ['name'], 'varg' : False }, 41 : { 'mnem' : 'DictSt', 'args' : ['name'], 'varg' : False }, - 42 : { 'mnem' : 'IndexSt', 'args' : ['name'], 'varg' : False }, + 42 : { 'mnem' : 'IndexSt', 'args' : ['0x'], 'varg' : False }, 43 : { 'mnem' : 'ArgsSt', 'args' : ['name', '0x'], 'varg' : False }, 44 : { 'mnem' : 'ArgsMemSt', 'args' : ['name', '0x'], 'varg' : False }, 45 : { 'mnem' : 'ArgsDictSt', 'args' : ['name', '0x'], 'varg' : False }, 46 : { 'mnem' : 'set', 'args' : ['name'], 'varg' : False }, 47 : { 'mnem' : 'Memset', 'args' : ['name'], 'varg' : False }, 48 : { 'mnem' : 'Dictset', 'args' : ['name'], 'varg' : False }, - 49 : { 'mnem' : 'Indexset', 'args' : ['name'], 'varg' : False }, + 49 : { 'mnem' : 'Indexset', 'args' : ['0x'], 'varg' : False }, 50 : { 'mnem' : 'ArgsSet', 'args' : ['name', '0x'], 'varg' : False }, 51 : { 'mnem' : 'ArgsMemSet', 'args' : ['name', '0x'], 'varg' : False }, 52 : { 'mnem' : 'ArgsDictSet', 'args' : ['name', '0x'], 'varg' : False }, @@ -368,8 +368,8 @@ def getTheIdentifiers(vbaProjectData): 70 : { 'mnem' : 'BoS', 'args' : ['0x'], 'varg' : False }, 71 : { 'mnem' : 'BoSImplicit', 'args' : [], 'varg' : False }, 72 : { 'mnem' : 'BoL', 'args' : [], 'varg' : False }, - 73 : { 'mnem' : 'LdAddressOf', 'args' : [], 'varg' : False }, - 74 : { 'mnem' : 'MemAddressOf', 'args' : [], 'varg' : False }, + 73 : { 'mnem' : 'LdAddressOf', 'args' : ['name'], 'varg' : False }, + 74 : { 'mnem' : 'MemAddressOf', 'args' : ['name'], 'varg' : False }, 75 : { 'mnem' : 'Case', 'args' : [], 'varg' : False }, 76 : { 'mnem' : 'CaseTo', 'args' : [], 'varg' : False }, 77 : { 'mnem' : 'CaseGt', 'args' : [], 'varg' : False }, @@ -397,7 +397,7 @@ def getTheIdentifiers(vbaProjectData): 99 : { 'mnem' : 'Else', 'args' : [], 'varg' : False }, 100 : { 'mnem' : 'ElseBlock', 'args' : [], 'varg' : False }, 101 : { 'mnem' : 'ElseIfBlock', 'args' : [], 'varg' : False }, -102 : { 'mnem' : 'ElseIfTypeBlock', 'args' : [], 'varg' : False }, +102 : { 'mnem' : 'ElseIfTypeBlock', 'args' : ['imp_'], 'varg' : False }, 103 : { 'mnem' : 'End', 'args' : [], 'varg' : False }, 104 : { 'mnem' : 'EndContext', 'args' : [], 'varg' : False }, 105 : { 'mnem' : 'EndFunc', 'args' : [], 'varg' : False }, @@ -411,10 +411,10 @@ def getTheIdentifiers(vbaProjectData): 113 : { 'mnem' : 'EndWith', 'args' : [], 'varg' : False }, 114 : { 'mnem' : 'Erase', 'args' : ['0x'], 'varg' : False }, 115 : { 'mnem' : 'Error', 'args' : [], 'varg' : False }, -116 : { 'mnem' : 'EventDecl', 'args' : ['0x', '0x', '0x', '0x'], 'varg' : False }, -117 : { 'mnem' : 'RaiseEvent', 'args' : ['0x', '0x', '0x', '0x'], 'varg' : False }, -118 : { 'mnem' : 'ArgsMemRaiseEvent', 'args' : ['0x', '0x', '0x', '0x'], 'varg' : False }, -119 : { 'mnem' : 'ArgsMemRaiseEventWith', 'args' : ['0x', '0x', '0x', '0x'], 'varg' : False }, +116 : { 'mnem' : 'EventDecl', 'args' : ['func_'], 'varg' : False }, +117 : { 'mnem' : 'RaiseEvent', 'args' : ['name', '0x'], 'varg' : False }, +118 : { 'mnem' : 'ArgsMemRaiseEvent', 'args' : ['name', '0x'], 'varg' : False }, +119 : { 'mnem' : 'ArgsMemRaiseEventWith', 'args' : ['name', '0x'], 'varg' : False }, 120 : { 'mnem' : 'ExitDo', 'args' : [], 'varg' : False }, 121 : { 'mnem' : 'ExitFor', 'args' : [], 'varg' : False }, 122 : { 'mnem' : 'ExitFunc', 'args' : [], 'varg' : False }, @@ -443,7 +443,7 @@ def getTheIdentifiers(vbaProjectData): 145 : { 'mnem' : 'FnUBound', 'args' : ['0x'], 'varg' : False }, 146 : { 'mnem' : 'For', 'args' : [], 'varg' : False }, 147 : { 'mnem' : 'ForEach', 'args' : [], 'varg' : False }, -148 : { 'mnem' : 'ForEachAs', 'args' : [], 'varg' : False }, +148 : { 'mnem' : 'ForEachAs', 'args' : ['imp_'], 'varg' : False }, 149 : { 'mnem' : 'ForStep', 'args' : [], 'varg' : False }, 150 : { 'mnem' : 'FuncDefn', 'args' : ['func_'], 'varg' : False }, 151 : { 'mnem' : 'FuncDefnSave', 'args' : ['func_'], 'varg' : False }, @@ -453,7 +453,7 @@ def getTheIdentifiers(vbaProjectData): 155 : { 'mnem' : 'If', 'args' : [], 'varg' : False }, 156 : { 'mnem' : 'IfBlock', 'args' : [], 'varg' : False }, 157 : { 'mnem' : 'TypeOf', 'args' : ['imp_'], 'varg' : False }, -158 : { 'mnem' : 'IfTypeBlock', 'args' : [], 'varg' : False }, +158 : { 'mnem' : 'IfTypeBlock', 'args' : ['imp_'], 'varg' : False }, 159 : { 'mnem' : 'Implements', 'args' : ['0x', '0x', '0x', '0x'], 'varg' : False }, 160 : { 'mnem' : 'Input', 'args' : [], 'varg' : False }, 161 : { 'mnem' : 'InputDone', 'args' : [], 'varg' : False }, @@ -533,10 +533,10 @@ def getTheIdentifiers(vbaProjectData): 235 : { 'mnem' : 'Scale', 'args' : ['0x'], 'varg' : False }, 236 : { 'mnem' : 'Seek', 'args' : [], 'varg' : False }, 237 : { 'mnem' : 'SelectCase', 'args' : [], 'varg' : False }, -238 : { 'mnem' : 'SelectIs', 'args' : [], 'varg' : False }, +238 : { 'mnem' : 'SelectIs', 'args' : ['imp_'], 'varg' : False }, 239 : { 'mnem' : 'SelectType', 'args' : [], 'varg' : False }, 240 : { 'mnem' : 'SetStmt', 'args' : [], 'varg' : False }, -241 : { 'mnem' : 'Stack', 'args' : [], 'varg' : False }, +241 : { 'mnem' : 'Stack', 'args' : ['0x', '0x'], 'varg' : False }, 242 : { 'mnem' : 'Stop', 'args' : [], 'varg' : False }, 243 : { 'mnem' : 'Type', 'args' : ['rec_'], 'varg' : False }, 244 : { 'mnem' : 'Unlock', 'args' : [], 'varg' : False }, @@ -793,6 +793,9 @@ def dumpLine(moduleData, lineStart, lineLength, endian, vbaVer, is64bit, identif elif (arg in ['func_', 'var_', 'rec_', 'type_', 'context_']): offset, dword = getVar(moduleData, offset, endian, True) print('{0}{1:08X} '.format(arg, dword), end='') + if (is64bit and (arg == 'context_')): + offset, dword = getVar(moduleData, offset, endian, True) + print('{0:08X} '.format(dword), end='') if (instruction['varg']): offset, wLength = getVar(moduleData, offset, endian, False) substring = moduleData[offset:offset + wLength] From adb65ab79c14bc82f0ce1d4e4a98fb859f1ecc9a Mon Sep 17 00:00:00 2001 From: Vesselin Bontchev Date: Sun, 9 Oct 2016 16:57:22 +0300 Subject: [PATCH 11/11] Changed the version number Made the version number compatible with the Major.Minor.Patch specification. --- README.md | 4 ++-- pcodedmp.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 9eaa0cb..7a80434 100644 --- a/README.md +++ b/README.md @@ -77,6 +77,6 @@ For reference, it is the result of compiling the following VBA code: ## Change log -Version 1.00: Initial version. +Version 1.0.0: Initial version. -Version 2.00: Storing the opcodes in a more efficient manner. Implemented VBA7 support. Implemented support for documents created by the 64-bit version of Office. +Version 1.1.0: Storing the opcodes in a more efficient manner. Implemented VBA7 support. Implemented support for documents created by the 64-bit version of Office. diff --git a/pcodedmp.py b/pcodedmp.py index e977628..ada03ee 100644 --- a/pcodedmp.py +++ b/pcodedmp.py @@ -10,7 +10,7 @@ __author__ = 'Vesselin Bontchev ' __license__ = 'GPL' -__VERSION__ = '2.00' +__VERSION__ = '1.1.0' def getWord(buffer, offset, endian): return unpack_from(endian + 'H', buffer, offset)[0]