From f9602ace66d1f7d32479a653fb55c2f7412fb528 Mon Sep 17 00:00:00 2001 From: Ninjananas Date: Fri, 28 Jun 2019 14:05:37 +0200 Subject: [PATCH 1/6] Fixed flen abstract method definition --- miasm/core/cpu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/miasm/core/cpu.py b/miasm/core/cpu.py index 425f3aff2..45d3a2575 100644 --- a/miasm/core/cpu.py +++ b/miasm/core/cpu.py @@ -514,7 +514,7 @@ def check_fbits(self, v): return v & self.fmask == self.fbits @classmethod - def flen(cls, v): + def flen(cls, mode, v): raise NotImplementedError('not fully functional') From f2573f50a2eecc7256956cc59d4114b92437e741 Mon Sep 17 00:00:00 2001 From: Ninjananas Date: Wed, 3 Jul 2019 09:06:13 +0200 Subject: [PATCH 2/6] added bs argument to flen methods --- miasm/arch/msp430/arch.py | 4 ++-- miasm/arch/x86/arch.py | 20 ++++++++++---------- miasm/core/cpu.py | 6 +++--- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/miasm/arch/msp430/arch.py b/miasm/arch/msp430/arch.py index 65dd435e8..a1b5474da 100644 --- a/miasm/arch/msp430/arch.py +++ b/miasm/arch/msp430/arch.py @@ -464,7 +464,7 @@ def encode(self): class bs_cond_off_s(bs_cond): @classmethod - def flen(cls, mode, v): + def flen(cls, mode, v, bs): if v['a_s'] == 0b00: return None elif v['a_s'] == 0b01: @@ -501,7 +501,7 @@ def decode(self, v): class bs_cond_off_d(bs_cond_off_s): @classmethod - def flen(cls, mode, v): + def flen(cls, mode, v, bs): if v['a_d'] == 0: return None elif v['a_d'] == 1: diff --git a/miasm/arch/x86/arch.py b/miasm/arch/x86/arch.py index 3053301ad..1e47ce380 100644 --- a/miasm/arch/x86/arch.py +++ b/miasm/arch/x86/arch.py @@ -2671,7 +2671,7 @@ class bs_cond_scale(bs_cond): ll = 2 @classmethod - def flen(cls, mode, v): + def flen(cls, mode, v, bs): return sib_cond(cls, mode, v) def encode(self): @@ -2690,7 +2690,7 @@ class bs_cond_index(bs_cond_scale): ll = 3 @classmethod - def flen(cls, mode, v): + def flen(cls, mode, v, bs): return sib_cond(cls, mode, v) @@ -2698,7 +2698,7 @@ class bs_cond_disp(bs_cond): # cond must return field len @classmethod - def flen(cls, mode, v): + def flen(cls, mode, v, bs): if admode_prefix((mode, v['opmode'], v['admode'])) == 16: if v['mod'] == 0b00: if v['rm'] == 0b110: @@ -2780,7 +2780,7 @@ def fromstring(self, text, loc_db, parser_result=None): return start, stop @classmethod - def flen(cls, mode, v): + def flen(cls, mode, v, bs): if 'w8' not in v or v['w8'] == 1: if 'se' in v and v['se'] == 1: return 8 @@ -2868,7 +2868,7 @@ def getmaxlen(self): return 64 @classmethod - def flen(cls, mode, v): + def flen(cls, mode, v, bs): if 'w8' not in v or v['w8'] == 1: if 'se' in v and v['se'] == 1: return 8 @@ -2899,7 +2899,7 @@ def fromstring(self, text, loc_db, parser_result=None): return start, stop @classmethod - def flen(cls, mode, v): + def flen(cls, mode, v, bs): osize = v_opmode_info(mode, v['opmode'], v['rex_w'], 0) if osize == 16: return 16 @@ -2941,7 +2941,7 @@ class bs_s08(bs_rel_off): parser = base_expr @classmethod - def flen(cls, mode, v): + def flen(cls, mode, v, bs): return 8 def encode(self): @@ -2974,14 +2974,14 @@ def decode(self, v): class bs_rel_off08(bs_rel_off): @classmethod - def flen(cls, mode, v): + def flen(cls, mode, v, bs): return 8 class bs_moff(bsi): @classmethod - def flen(cls, mode, v): + def flen(cls, mode, v, bs): osize = v_opmode_info(mode, v['opmode'], v['rex_w'], 0) if osize == 16: return 16 @@ -3046,7 +3046,7 @@ def fromstring(self, text, loc_db, parser_result=None): return start, stop @classmethod - def flen(cls, mode, v): + def flen(cls, mode, v, bs): if mode == 64: if v['admode']: return 32 diff --git a/miasm/core/cpu.py b/miasm/core/cpu.py index 45d3a2575..d4ea870be 100644 --- a/miasm/core/cpu.py +++ b/miasm/core/cpu.py @@ -514,7 +514,7 @@ def check_fbits(self, v): return v & self.fmask == self.fbits @classmethod - def flen(cls, mode, v): + def flen(cls, mode, v, bs): raise NotImplementedError('not fully functional') @@ -1085,7 +1085,7 @@ def guess_mnemo(cls, bs, attrib, pre_dis_info, offset): (l, fmask, fbits, fname, flen), vals = branch if flen is not None: - l = flen(attrib, fname_values) + l = flen(attrib, fname_values, bs) if l is not None: try: v = cls.getbits(bs, attrib, offset_b, l) @@ -1214,7 +1214,7 @@ def dis(cls, bs_o, mode_o = None, offset=0): total_l = 0 for i, f in enumerate(c.fields_order): if f.flen is not None: - l = f.flen(mode, fname_values) + l = f.flen(mode, fname_values, bs) else: l = f.l if l is not None: From f9e2857a0b019711364f763f166dc788c8df4271 Mon Sep 17 00:00:00 2001 From: Ninjananas Date: Thu, 4 Jul 2019 09:27:06 +0200 Subject: [PATCH 3/6] flen is called in try/except statements --- miasm/core/cpu.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/miasm/core/cpu.py b/miasm/core/cpu.py index d4ea870be..310098d6a 100644 --- a/miasm/core/cpu.py +++ b/miasm/core/cpu.py @@ -460,8 +460,10 @@ def __init__(self, strbits=None, l=None, cls=None, # gen conditional field if cls: for b in cls: - if 'flen' in b.__dict__: + try: flen = getattr(b, 'flen') + except AttributeError: + pass self.strbits = strbits self.l = l @@ -1085,7 +1087,10 @@ def guess_mnemo(cls, bs, attrib, pre_dis_info, offset): (l, fmask, fbits, fname, flen), vals = branch if flen is not None: - l = flen(attrib, fname_values, bs) + try: + l = flen(attrib, fname_values, bs) + except NotImplementedError: + pass if l is not None: try: v = cls.getbits(bs, attrib, offset_b, l) @@ -1214,7 +1219,10 @@ def dis(cls, bs_o, mode_o = None, offset=0): total_l = 0 for i, f in enumerate(c.fields_order): if f.flen is not None: - l = f.flen(mode, fname_values, bs) + try: + l = f.flen(mode, fname_values, bs) + except NotImplementedError: + pass else: l = f.l if l is not None: From 131cb9af31622f2dd42db9cc15808dd48406a4d3 Mon Sep 17 00:00:00 2001 From: Ninjananas Date: Thu, 4 Jul 2019 09:38:25 +0200 Subject: [PATCH 4/6] added 'offset_b' argument to flen signature --- miasm/arch/msp430/arch.py | 4 ++-- miasm/arch/x86/arch.py | 20 ++++++++++---------- miasm/core/cpu.py | 6 +++--- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/miasm/arch/msp430/arch.py b/miasm/arch/msp430/arch.py index a1b5474da..0c50820d2 100644 --- a/miasm/arch/msp430/arch.py +++ b/miasm/arch/msp430/arch.py @@ -464,7 +464,7 @@ def encode(self): class bs_cond_off_s(bs_cond): @classmethod - def flen(cls, mode, v, bs): + def flen(cls, mode, v, bs, offset_b): if v['a_s'] == 0b00: return None elif v['a_s'] == 0b01: @@ -501,7 +501,7 @@ def decode(self, v): class bs_cond_off_d(bs_cond_off_s): @classmethod - def flen(cls, mode, v, bs): + def flen(cls, mode, v, bs, offset_b): if v['a_d'] == 0: return None elif v['a_d'] == 1: diff --git a/miasm/arch/x86/arch.py b/miasm/arch/x86/arch.py index 1e47ce380..86ed4b981 100644 --- a/miasm/arch/x86/arch.py +++ b/miasm/arch/x86/arch.py @@ -2671,7 +2671,7 @@ class bs_cond_scale(bs_cond): ll = 2 @classmethod - def flen(cls, mode, v, bs): + def flen(cls, mode, v, bs, offset_b): return sib_cond(cls, mode, v) def encode(self): @@ -2690,7 +2690,7 @@ class bs_cond_index(bs_cond_scale): ll = 3 @classmethod - def flen(cls, mode, v, bs): + def flen(cls, mode, v, bs, offset_b): return sib_cond(cls, mode, v) @@ -2698,7 +2698,7 @@ class bs_cond_disp(bs_cond): # cond must return field len @classmethod - def flen(cls, mode, v, bs): + def flen(cls, mode, v, bs, offset_b): if admode_prefix((mode, v['opmode'], v['admode'])) == 16: if v['mod'] == 0b00: if v['rm'] == 0b110: @@ -2780,7 +2780,7 @@ def fromstring(self, text, loc_db, parser_result=None): return start, stop @classmethod - def flen(cls, mode, v, bs): + def flen(cls, mode, v, bs, offset_b): if 'w8' not in v or v['w8'] == 1: if 'se' in v and v['se'] == 1: return 8 @@ -2868,7 +2868,7 @@ def getmaxlen(self): return 64 @classmethod - def flen(cls, mode, v, bs): + def flen(cls, mode, v, bs, offset_b): if 'w8' not in v or v['w8'] == 1: if 'se' in v and v['se'] == 1: return 8 @@ -2899,7 +2899,7 @@ def fromstring(self, text, loc_db, parser_result=None): return start, stop @classmethod - def flen(cls, mode, v, bs): + def flen(cls, mode, v, bs, offset_b): osize = v_opmode_info(mode, v['opmode'], v['rex_w'], 0) if osize == 16: return 16 @@ -2941,7 +2941,7 @@ class bs_s08(bs_rel_off): parser = base_expr @classmethod - def flen(cls, mode, v, bs): + def flen(cls, mode, v, bs, offset_b): return 8 def encode(self): @@ -2974,14 +2974,14 @@ def decode(self, v): class bs_rel_off08(bs_rel_off): @classmethod - def flen(cls, mode, v, bs): + def flen(cls, mode, v, bs, offset_b): return 8 class bs_moff(bsi): @classmethod - def flen(cls, mode, v, bs): + def flen(cls, mode, v, bs, offset_b): osize = v_opmode_info(mode, v['opmode'], v['rex_w'], 0) if osize == 16: return 16 @@ -3046,7 +3046,7 @@ def fromstring(self, text, loc_db, parser_result=None): return start, stop @classmethod - def flen(cls, mode, v, bs): + def flen(cls, mode, v, bs, offset_b): if mode == 64: if v['admode']: return 32 diff --git a/miasm/core/cpu.py b/miasm/core/cpu.py index 310098d6a..ea1a61702 100644 --- a/miasm/core/cpu.py +++ b/miasm/core/cpu.py @@ -516,7 +516,7 @@ def check_fbits(self, v): return v & self.fmask == self.fbits @classmethod - def flen(cls, mode, v, bs): + def flen(cls, mode, v, bs, offset_b): raise NotImplementedError('not fully functional') @@ -1088,7 +1088,7 @@ def guess_mnemo(cls, bs, attrib, pre_dis_info, offset): if flen is not None: try: - l = flen(attrib, fname_values, bs) + l = flen(attrib, fname_values, bs, offset_b) except NotImplementedError: pass if l is not None: @@ -1220,7 +1220,7 @@ def dis(cls, bs_o, mode_o = None, offset=0): for i, f in enumerate(c.fields_order): if f.flen is not None: try: - l = f.flen(mode, fname_values, bs) + l = f.flen(mode, fname_values, bs, offset_b) except NotImplementedError: pass else: From 11030288a439bb8d68d6c158c8c729c054b39e8f Mon Sep 17 00:00:00 2001 From: Ninjananas Date: Mon, 8 Jul 2019 08:46:51 +0200 Subject: [PATCH 5/6] handle IOErrors when calling flen --- miasm/core/cpu.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/miasm/core/cpu.py b/miasm/core/cpu.py index ea1a61702..3201bc177 100644 --- a/miasm/core/cpu.py +++ b/miasm/core/cpu.py @@ -1091,6 +1091,8 @@ def guess_mnemo(cls, bs, attrib, pre_dis_info, offset): l = flen(attrib, fname_values, bs, offset_b) except NotImplementedError: pass + except IOError: + continue if l is not None: try: v = cls.getbits(bs, attrib, offset_b, l) @@ -1223,6 +1225,9 @@ def dis(cls, bs_o, mode_o = None, offset=0): l = f.flen(mode, fname_values, bs, offset_b) except NotImplementedError: pass + except IOError: + bs_o.leave_atomic_mode() + raise else: l = f.l if l is not None: From 48b67c68358125adb1bcea6117bd324535027b09 Mon Sep 17 00:00:00 2001 From: Ninjananas Date: Thu, 28 Mar 2019 12:59:26 +0100 Subject: [PATCH 6/6] WebAssembly implemetation in Miasm --- .gitignore | 4 +- miasm/analysis/binary.py | 48 + miasm/analysis/machine.py | 13 +- miasm/arch/wasm/__init__.py | 0 miasm/arch/wasm/arch.py | 760 +++++++++ miasm/arch/wasm/disasm.py | 385 +++++ miasm/arch/wasm/regs.py | 24 + miasm/arch/wasm/sem.py | 455 ++++++ miasm/core/bin_stream.py | 3 + miasm/expression/simplifications_common.py | 8 + miasm/loader/wasm.py | 94 ++ miasm/loader/wasm_init.py | 1682 ++++++++++++++++++++ miasm/loader/wasm_utils.py | 57 + setup.py | 1 + test/arch/wasm/arch.py | 76 + test/arch/wasm/sem.py | 140 ++ 16 files changed, 3748 insertions(+), 2 deletions(-) create mode 100644 miasm/arch/wasm/__init__.py create mode 100644 miasm/arch/wasm/arch.py create mode 100644 miasm/arch/wasm/disasm.py create mode 100644 miasm/arch/wasm/regs.py create mode 100644 miasm/arch/wasm/sem.py create mode 100644 miasm/loader/wasm.py create mode 100644 miasm/loader/wasm_init.py create mode 100644 miasm/loader/wasm_utils.py create mode 100644 test/arch/wasm/arch.py create mode 100755 test/arch/wasm/sem.py diff --git a/.gitignore b/.gitignore index f3c186eb6..f294a0823 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,6 @@ # Emacs files *~ # Compiled python files -*\.pyc \ No newline at end of file +*\.pyc +# Version file +/miasm/VERSION \ No newline at end of file diff --git a/miasm/analysis/binary.py b/miasm/analysis/binary.py index 66244822f..af67b9e74 100644 --- a/miasm/analysis/binary.py +++ b/miasm/analysis/binary.py @@ -214,6 +214,53 @@ def parse(self, data, vm=None, addr=0, apply_reloc=False, **kwargs): fill_loc_db_with_symbols(self._executable, self.loc_db, addr) +class ContainerWasm(Container): + "Container abstraction for ELF" + + def parse(self, data, vm=None, addr=0, apply_reloc=False, **kwargs): + """Load a wasm from @data + @data: bytes containing the wasm bytes bytes + """ + #from miasm.jitter.loader.elf import vm_load_elf, guess_arch, \ + # fill_loc_db_with_symbols + from miasm.loader import wasm_init + + self._executable = wasm_init.Wasm(data) + + # Parse signature + #if not data.startswith(b'\x7fELF'): + # raise ContainerSignatureException() + + # Build executable instance + # try: + # if vm is not None: + # self._executable = vm_load_elf( + # vm, + # data, + # loc_db=self.loc_db, + # base_addr=addr, + # apply_reloc=apply_reloc + # ) + # else: + # self._executable = elf_init.ELF(data) + # except Exception as error: + # raise ContainerParsingException('Cannot read ELF: %s' % error) + + # Set the architecture + self._arch = 'wasm' + + # Build the bin_stream instance and set the entry point + try: + #self._bin_stream = bin_stream_wasm(self._executable) + self._entry_point = self._executable.content.entry + except Exception as error: + self._entry_point = None + #raise ContainerParsingException('Cannot read ELF: %s' % error) + + #if vm is None: + # # Add known symbols (vm_load_elf already does it) + # fill_loc_db_with_symbols(self._executable, self.loc_db, addr) + class ContainerUnknown(Container): "Container abstraction for unknown format" @@ -233,4 +280,5 @@ def parse(self, data, vm=None, addr=0, **kwargs): ## Register containers Container.register_container(ContainerPE) Container.register_container(ContainerELF) +Container.register_container(ContainerWasm) Container.register_fallback(ContainerUnknown) diff --git a/miasm/analysis/machine.py b/miasm/analysis/machine.py index ba076d8c4..201ce6cac 100644 --- a/miasm/analysis/machine.py +++ b/miasm/analysis/machine.py @@ -12,7 +12,7 @@ class Machine(object): __available = ["arml", "armb", "armtl", "armtb", "sh4", "x86_16", "x86_32", "x86_64", "msp430", "mips32b", "mips32l", - "aarch64l", "aarch64b", "ppc32b", "mepl", "mepb"] + "aarch64l", "aarch64b", "ppc32b", "mepl", "mepb", "wasm"] def __init__(self, machine_name): @@ -200,6 +200,17 @@ def __init__(self, machine_name): mn = arch.mn_mep from miasm.arch.mep.ira import ir_a_mepl as ira from miasm.arch.mep.sem import ir_mepl as ir + elif machine_name == "wasm": + from miasm.arch.wasm.disasm import dis_wasm as dis_engine + from miasm.arch.wasm import arch + # try: + # from miasm.arch.arm import jit + # jitter = jit.jitter_arml + # except ImportError: + # pass + mn = arch.mn_wasm + #from miasm.arch.wasm.ira import ir_a_wasm as ira + from miasm.arch.wasm.sem import ir_wasm as ir else: raise ValueError('Unknown machine: %s' % machine_name) diff --git a/miasm/arch/wasm/__init__.py b/miasm/arch/wasm/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/miasm/arch/wasm/arch.py b/miasm/arch/wasm/arch.py new file mode 100644 index 000000000..7227c8c53 --- /dev/null +++ b/miasm/arch/wasm/arch.py @@ -0,0 +1,760 @@ +#-*- coding:utf-8 -*- + +from builtins import range + +import logging +from pyparsing import * +from collections import defaultdict +from builtins import range +import struct +from math import ceil + +from miasm.expression.expression import * +from miasm.core.cpu import * +from miasm.core.bin_stream import bin_stream +import miasm.arch.wasm.regs as regs_module +from miasm.arch.wasm.regs import * +from miasm.core.asm_ast import AstInt, AstId, AstMem, AstOp +from miasm.loader.wasm_utils import encode_LEB128 + +log = logging.getLogger("wasmdis") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.DEBUG) + +SPACE = Suppress(' ') +LPAR = Suppress('(') +RPAR = Suppress(')') +RESULT = Suppress('result') +EQUAL = Suppress('=') +OFFSET = Suppress('offset') +ALIGN = Suppress('align') + +# (non-empty) block type parser +valtypes_str = ['f64', 'f32', 'i64', 'i32'] +valtypes_expr = [ExprId(i, 8) for i in valtypes_str] +def valtype_str2expr(tokens): + assert len(tokens) == 1 and len(tokens[0]) == 1 # In Wasm v1, a block can return at most one value + i = valtypes_str.index(tokens[0][0]) + return AstId(valtypes_expr[i]) + +blocktype_val = Group(LPAR + RESULT + literal_list(valtypes_str) + RPAR).setParseAction(valtype_str2expr) + +# Memargs +basic_deref = lambda x: x[0][0] +offset_parser = Optional(Group(OFFSET + EQUAL + base_expr), default=0).setParseAction(basic_deref) + +def align_parser(default_value): + return Optional(Group(ALIGN + EQUAL + base_expr), default=default_value).setParseAction(basic_deref) + +# Floats +frac = Word(nums).setParseAction() + +#float_parser = Or() + +class additional_info(object): + + def __init__(self): + self.except_on_instr = False + + +class instruction_wasm(instruction): + __slots__ = [] + delayslot = 0 + + @property + def has_memarg(self): + try: + opcode = struct.unpack('B', self.b[0])[0] + return (0x27 < opcode) and (opcode < 0x3F) + except TypeError: + return self.name in [ + 'i32.load', + 'i64.load', + 'f32.load', + 'f64.load', + 'i32.load8_s', + 'i32.load8_u', + 'i32.load16_s', + 'i32.load16_u', + 'i64.load8_s', + 'i64.load8_u', + 'i64.load16_s', + 'i64.load16_u', + 'i64.load32_s', + 'i64.load32_u', + 'i32.store', + 'i64.store', + 'f32.store', + 'f64.store', + 'i32.store8', + 'i32.store16', + 'i64.store8', + 'i64.store16', + 'i64.store32', + ] + + def to_string(self, loc_db=None): + o = "%-10s " % self.name + args = [] + for i, arg in enumerate(self.args): + if isinstance(arg, int): + return o + if not isinstance(arg, m2_expr.Expr): + raise ValueError('zarb arg type') + x = self.arg2str(arg, i, loc_db) + args.append(x) + if self.has_memarg: + o += self.gen_memarg(args) + else: + o += self.gen_args(args) + return o + + def gen_args(self, args): + return ' '.join([str(x) for x in args]) + + def gen_memarg(self, args): + assert len(args) == 2 + return 'offset={} align={}'.format(str(args[0]), str(args[1])) + + @staticmethod + def arg2str(expr, index=None, loc_db=None): + if isinstance(expr, ExprInt): # Only valid for standard integers + o = str(expr) + elif isinstance(expr, ExprId): + # valtype in structure's return + if expr.name in ['i32', 'i64', 'f32', 'f64']: + o = "(result {})".format(expr.name) + elif expr.name.startswith('$'): # structure label + o = expr.name + else: + fds + elif isinstance(expr, ExprLoc): + o, = loc_db.get_location_names(expr.loc_key) + else: + fds + return o + + @property + def is_structure(self): + return self.name in ['loop', 'block', 'end', 'if', 'else'] + + def dstflow(self): + return self.name in ['br', 'br_if', 'br_table', 'return'] + + def dstflow2label(self, loc_db): + fds + expr = self.args[1] + if not isinstance(expr, ExprInt): + return + + addr = int(expr) + loc_key = loc_db.get_or_create_offset_location(addr) + self.args[1] = ExprLoc(loc_key, expr.size) + + def breakflow(self): + return self.name in ['br', 'br_if', 'br_table', 'if', 'else', 'call', 'return'] # call_indirect ? + + def splitflow(self): + return self.name in ['br_if', 'if', 'call'] # call_indirect ? + + def setdstflow(self, a): + fds + + def is_subcall(self): + return self.name in ['call'] # call_indirect ? + + def getdstflow(self, loc_db): + if self.name in ['br', 'br_if']: + return self.args[0] # br idx + if self.name in ['br_table']: + return self.args # all br indexes + if self.name in ['call']: # call_indirect ? + return self.args[0] # func idx + fds + + def get_symbol_size(self, symbol, loc_db): + fds + + def fixDstOffset(self): + e = self.args[1] + if not isinstance(e, ExprInt): + log.debug('dyn dst %r', e) + return + off = int(e) + if off % 2: + raise ValueError('strange offset! %r' % off) + self.args[1] = ExprInt(off, 16) + + def get_info(self, c): + pass + + def __str__(self): + o = super(instruction_wasm, self).__str__() + return o + + def get_args_expr(self): + args = [] + for a in self.args: + args.append(a) + return args + +class mn_wasm(cls_mn): + name = "wasm" + regs = regs_module + all_mn = [] + bintree = {} + num = 0 + delayslot = 0 + pc = {None: PC} + sp = {None: SP} + all_mn_mode = defaultdict(list) + all_mn_name = defaultdict(list) + all_mn_inst = defaultdict(list) + instruction = instruction_wasm + # max_instruction_len = Nothing (instructions may be very long...) + + + @classmethod + def getpc(cls, attrib): + return PC + + @classmethod + def getsp(cls, attrib): + return SP + + @classmethod + def check_mnemo(cls, fields): + pass + + @classmethod + def gen_modes(cls, subcls, name, bases, dct, fields): + dct['mode'] = None + return [(subcls, name, bases, dct, fields)] + + def additional_info(self): + info = additional_info() + return info + + @classmethod + def getmn(cls, name): + return name + + def reset_class(self): + super(mn_wasm, self).reset_class() + + def getnextflow(self, loc_db): + raise NotImplementedError('not fully functional') + + +def addop(name, fields, args=None, alias=False): + dct = {"fields": fields} + dct["alias"] = alias + if args is not None: + dct['args'] = args + type(name, (mn_wasm,), dct) + + +class wasm_arg(m_arg): + def asm_ast_to_expr(self, arg, loc_db): + if isinstance(arg, AstInt): + if hasattr(self, '_int_size'): # arg is LEB_128-encoded + return ExprInt(arg.value, self._int_size) + fds + if isinstance(arg, AstId): + if isinstance(arg.name, ExprId): + return arg.name + fds + fds + if isinstance(arg, AstMem): + if isinstance(arg.ptr, AstId) and isinstance(arg.ptr.name, str): + return None + ptr = self.asm_ast_to_expr(arg.ptr, loc_db) + if ptr is None: + return None + return ExprMem(ptr, arg.size) + fds + if isinstance(arg, AstOp): + args = [self.asm_ast_to_expr(tmp, loc_db) for tmp in arg.args] + if None in args: + return None + return ExprOp(arg.op, *args) + return None + +mask_all = lambda x: (1 << x) - 1 +mask_msb = lambda x: 1 << (x - 1) + +def sxt(i, cur_l, dst_l): + ''' + Sign extends the integer @i (encoded on @cur_l bits) + to an int of @dst_l bits + ''' + if cur_l < dst_l and i & mask_msb(cur_l) != 0: + i |= mask_all(dst_l) ^ mask_all(cur_l) + return i + +def sct(i, cur_l): + ''' + "Sign contracts" the @cur_l-bits integer @i as much as possible: + - removes the MSBs while they are all the same + - sign extends to the lowest 7-bit multiple greater than the result + - returns a list of 7-bits inegers to encode + ''' + n = cur_l + msb_zero = True if i & mask_msb(n) == 0 else False + res = i & mask_all(7) + while n > 7: + n -= 1 + if msb_zero ^ (i & mask_msb(n) == 0): + n += 2 + res = i & mask_all(n) + break + res_array = [] + while n > 0: + res_array.append(res & mask_all(7)) + res >>= 7 + n -= 7 + return res_array + +def vtobl(v, n_bytes): + ''' + "v to byte_list": convert the v arg of decode method + to a list of bytes + ''' + res = [] + for i in range(n_bytes): + res[0:0] = [v & 0xff] + v >>= 8 + return res + +def decode_LEB128(bl): + ''' + bl is the result returned by vtobl + ''' + res = 0 + i = 0 + n = len(bl) + while True: + if i == n: + raise Exception("Malformed integer") + byt = bl[i] + # get value of the 7-bit sub-integer + # and add it correctly to the result + res += (byt & 0x7f) << (7*i) + i += 1 + + # test if it was the last one + if byt & 0x80 == 0: + break + return res, i + +def get_LEB128_len(bs, max_len): + ''' + gets the number of bytes a LEB128 is encoded on + does not rewind the bs pointer + ''' + i = 0 + while i < max_len: + i += 1 + byte = ord(bs.readbs()) + if byte & 0x80 == 0: + return i*8 + return None + +class imm_arg_LEB128(imm_noarg, wasm_arg): + ''' + This argument is a LEB128-encoded integer + Make classes inerit from this one and add + a '_int_size' attribute with the size of the + integer (in bits) + ''' + parser = base_expr + + @classmethod + def flen(cls, mode, v, bs, offset_b): + ofs = bs.offset + # do not parse some bytes, start at the right spot + assert(offset_b % 8 == 0) + bs.setoffset(offset_b // 8) + max_l = ceil(cls._int_size / 7.) + res = get_LEB128_len(bs, max_l) + bs.setoffset(ofs) + return res + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + + # Value to encode in LEB_128 + LEB128_bytes = sct(int(self.expr), self._int_size) + + self.value = 0 + for b in LEB128_bytes[:-1]: + self.value += 0x80 + self.value += b + self.value <<= 8 + self.value += LEB128_bytes[-1] + self.l = len(LEB128_bytes)*8 + return True + + def decode(self, v): + n_bytes = self.l // 8 + bl = vtobl(v, n_bytes) + val, n = decode_LEB128(bl) + assert(n == n_bytes) + val = sxt(val, n*7, self._int_size) & mask_all(self._int_size) + self.expr = ExprInt(val, self._int_size) + return True + +class imm_arg_i32(imm_arg_LEB128): + _int_size = 32 + +class imm_arg_i64(imm_arg_LEB128): + _int_size = 64 + +class imm_arg_offset(imm_arg_i32): + parser = offset_parser + +class imm_arg_align_1(imm_arg_i32): + parser = align_parser(1) + +class imm_arg_align_2(imm_arg_i32): + parser = align_parser(2) + +class imm_arg_align_4(imm_arg_i32): + parser = align_parser(4) + +class imm_arg_align_8(imm_arg_i32): + parser = align_parser(8) + +class arg_br_table(wasm_arg): + + @classmethod + def flen(cls, mode, v, bs, offset_b): + ofs = bs.offset + + assert(offset_b % 8 == 0) + bs.setoffset(offset_b // 8) + + # Find the length of the head integer and decode it + i = 0 + max_l = 5 + len_head = None + n_dest = 0 + while i < max_l: + byte = ord(bs.readbs()) + n_dest += (byte & 0x7f) << (7 * i) + i += 1 + if byte & 0x80 == 0: + len_head = i*8 + break + + if len_head == None: + return None + + total_length = len_head + for i in range(n_dest +1): + total_length += get_LEB128_len(bs, 5) + + bs.setoffset(ofs) + return total_length + + def decode(self, v): + n_bytes = self.l // 8 + bl = vtobl(v, n_bytes) + args = [] + n_parsed = 0 + while n_parsed < n_bytes: + val, n = decode_LEB128(bl[n_parsed:]) + val = sxt(val, n*7, 32) & mask_all(32) + arg = imm_arg_i32() + arg.expr = ExprInt(val, 32) + args.append(arg) + n_parsed += n + # remove vec length + self.parent.args = args[1:] + return True + + def encode(self): + self.value = 0 + self.l = 0 + + # Encode number of args (minus default) + args + for i in [len(self.parent.args)-1] + [int(arg.expr) for arg in self.parent.args]: + # make room for the upcoming arg + self.value <<= 8 + LEB128_bytes = sct(i, 32) + for b in LEB128_bytes[:-1]: + self.value += 0x80 + self.value += b + self.value <<= 8 + self.value += LEB128_bytes[-1] + self.l += len(LEB128_bytes) * 8 + return True + + +VALTYPES = [ + (0x7F,'i32'), + (0x7E,'i64'), + (0x7D,'f32'), + (0x7C,'f64'), +] + +class imm_f32(wasm_arg): + parser = base_expr + + def decode(self, v): + pass + + def encode(self, v): + pass + +class block_result_no_empty(imm_noarg): + parser = blocktype_val + + def decode(self, v): + for val, name in VALTYPES: + if val == v: + self.expr = ExprId(name, 8) + return True + return False + + def encode(self): + if not self.expr.is_id(): + return False + for i, v in VALTYPES: + if v == self.expr.name: + self.value = i + return True + fds + return False + + + +single_byte_name = bs_name(l=8, name={ + 'unreachable' : 0x00, + 'nop' : 0x01, + 'else' : 0x05, + 'end' : 0x0B, + 'return' : 0x0F, + 'drop' : 0x1A, + 'select' : 0x1B, + 'i32.eqz' : 0x45, + 'i32.eq' : 0x46, + 'i32.ne' : 0x47, + 'i32.lt_s' : 0x48, + 'i32.lt_u' : 0x49, + 'i32.gt_s' : 0x4A, + 'i32.gt_u' : 0x4B, + 'i32.le_s' : 0x4C, + 'i32.le_u' : 0x4D, + 'i32.ge_s' : 0x4E, + 'i32.ge_u' : 0x4F, + 'i64.eqz' : 0x50, + 'i64.eq' : 0x51, + 'i64.ne' : 0x52, + 'i64.lt_s' : 0x53, + 'i64.lt_u' : 0x54, + 'i64.gt_s' : 0x55, + 'i64.gt_u' : 0x56, + 'i64.le_s' : 0x57, + 'i64.le_u' : 0x58, + 'i64.ge_s' : 0x59, + 'i64.ge_u' : 0x5A, + 'f32.eq' : 0x5B, + 'f32.ne' : 0x5C, + 'f32.lt' : 0x5D, + 'f32.gt' : 0x5E, + 'f32.le' : 0x5F, + 'f32.ge' : 0x60, + 'f64.eq' : 0x61, + 'f64.ne' : 0x62, + 'f64.lt' : 0x63, + 'f64.gt' : 0x64, + 'f64.le' : 0x65, + 'f64.ge' : 0x66, + 'i32.clz' : 0x67, + 'i32.ctz' : 0x68, + 'i32.popcnt' : 0x69, + 'i32.add' : 0x6A, + 'i32.sub' : 0x6B, + 'i32.mul' : 0x6C, + 'i32.div_s' : 0x6D, + 'i32.div_u' : 0x6E, + 'i32.rem_s' : 0x6F, + 'i32.rem_u' : 0x70, + 'i32.and' : 0x71, + 'i32.or' : 0x72, + 'i32.xor' : 0x73, + 'i32.shl' : 0x74, + 'i32.shr_s' : 0x75, + 'i32.shr_u' : 0x76, + 'i32.rotl' : 0x77, + 'i32.rotr' : 0x78, + 'i64.clz' : 0x79, + 'i64.ctz' : 0x7A, + 'i64.popcnt' : 0x7B, + 'i64.add' : 0x7C, + 'i64.sub' : 0x7D, + 'i64.mul' : 0x7E, + 'i64.div_s' : 0x7F, + 'i64.div_u' : 0x80, + 'i64.rem_s' : 0x81, + 'i64.rem_u' : 0x82, + 'i64.and' : 0x83, + 'i64.or' : 0x84, + 'i64.xor' : 0x85, + 'i64.shl' : 0x86, + 'i64.shr_s' : 0x87, + 'i64.shr_u' : 0x88, + 'i64.rotl' : 0x89, + 'i64.rotr' : 0x8A, + 'f32.abs' : 0x8B, + 'f32.neg' : 0x8C, + 'f32.ceil' : 0x8D, + 'f32.floor' : 0x8E, + 'f32.trunc' : 0x8F, + 'f32.nearest' : 0x90, + 'f32.sqrt' : 0x91, + 'f32.add' : 0x92, + 'f32.sub' : 0x93, + 'f32.mul' : 0x94, + 'f32.div' : 0x95, + 'f32.min' : 0x96, + 'f32.max' : 0x97, + 'f32.copysign' : 0x98, + 'f64.abs' : 0x99, + 'f64.neg' : 0x9A, + 'f64.ceil' : 0x9B, + 'f64.floor' : 0x9C, + 'f64.trunc' : 0x9D, + 'f64.nearest' : 0x9E, + 'f64.sqrt' : 0x9F, + 'f64.add' : 0xA0, + 'f64.sub' : 0xA1, + 'f64.mul' : 0xA2, + 'f64.div' : 0xA3, + 'f64.min' : 0xA4, + 'f64.max' : 0xA5, + 'f64.copysign' : 0xA6, + 'i32.wrap_i64' : 0xA7, + 'i32.trunc_f32_s' : 0xA8, + 'i32.trunc_f32_u' : 0xA9, + 'i32.trunc_f64_s' : 0xAA, + 'i32.trunc_f64_u' : 0xAB, + 'i64.extend_i32_s' : 0xAC, + 'i64.extend_i32_u' : 0xAD, + 'i64.trunc_f32_s' : 0xAE, + 'i64.trunc_f32_u' : 0xAF, + 'i64.trunc_f64_s' : 0xB0, + 'i64.trunc_f64_u' : 0xB1, + 'f32.convert_i32_s' : 0xB2, + 'f32.convert_i32_u' : 0xB3, + 'f32.convert_i64_s' : 0xB4, + 'f32.convert_i64_u' : 0xB5, + 'f32.demote_f64' : 0xB6, + 'f64.convert_i32_s' : 0xB7, + 'f64.convert_i32_u' : 0xB8, + 'f64.convert_i64_s' : 0xB9, + 'f64.convert_i64_u' : 0xBA, + 'f64.promote_f32' : 0xBB, + 'i32.reinterpret_f32' : 0xBC, + 'i64.reinterpret_f64' : 0xBD, + 'f32.reinterpret_i32' : 0xBE, + 'f64.reinterpret_i64' : 0xBF, +}) + +addop('single_byte', [single_byte_name]) + +memarg_1 = [bs(l=8888, cls=(imm_arg_offset,)), bs(l=8888, cls=(imm_arg_align_1,))] +memarg_2 = [bs(l=8888, cls=(imm_arg_offset,)), bs(l=8888, cls=(imm_arg_align_2,))] +memarg_4 = [bs(l=8888, cls=(imm_arg_offset,)), bs(l=8888, cls=(imm_arg_align_4,))] +memarg_8 = [bs(l=8888, cls=(imm_arg_offset,)), bs(l=8888, cls=(imm_arg_align_8,))] + +i32_bs = [bs(l=1, cls=(imm_arg_i32,))] +addop('i32.const', [bs('01000001')] + i32_bs) + + +i64_bs = [bs(l=1, cls=(imm_arg_i64,))] +addop('i64.const',[bs('01000010')] + i64_bs) + +# Floating numbers +#TODO# +#addop('f32.const', []) + +block_ret = bs(l=8, cls=(block_result_no_empty, wasm_arg)) + +# Structured instructions +#no return +addop('block', [bs('00000010'), bs('01000000')]) +addop('loop', [bs('00000011'), bs('01000000')]) +addop('if', [bs('00000100'), bs('01000000')]) +#return +addop('block', [bs('00000010'), block_ret]) +addop('loop', [bs('00000011'), block_ret]) +addop('if', [bs('00000100'), block_ret]) + +# Branches +addop('br', [bs('00001100')] + i32_bs) +addop('br_if', [bs('00001101')] + i32_bs) +addop('br_table', [bs('00001110'), bs(l=1, cls=(arg_br_table,))]) + +# Calls +addop('call', [bs('00010000')] + i32_bs) +addop('call_indirect', [bs('00010001')] + i32_bs + [bs('00000000')]) + +# Variable instructions +var_instr_names = bs_name(l=8, name={ + 'local.get' : 0x20, + 'local.set' : 0x21, + 'local.tee' : 0x22, + 'global.get': 0x23, + 'global.set': 0x24, +}) +addop('var_instr', [var_instr_names] + i32_bs) + +# Memory instructions +#The 'align' field in most memory instructions has a default value +#This value depends on the instruction +mem_instr_default_1 = bs_name(l=8, name={ + 'i32.load8_s': 0x2C, + 'i32.load8_u': 0x2D, + 'i64.load8_s': 0x30, + 'i64.load8_u': 0x31, + 'i32.store8' : 0x3A, + 'i64.store8' : 0x3C, +}) +addop('mem_instr_default_1', [mem_instr_default_1] + memarg_1) + +mem_instr_default_2 = bs_name(l=8, name={ + 'i32.load16_s': 0x2E, + 'i32.load16_u': 0x2F, + 'i64.load16_s': 0x32, + 'i64.load16_u': 0x33, + 'i32.store16' : 0x3B, + 'i64.store16' : 0x3D, +}) +addop('mem_instr_default_2', [mem_instr_default_2] + memarg_2) + +mem_instr_default_4 = bs_name(l=8, name={ + 'i32.load' : 0x28, + 'f32.load' : 0x2A, + 'i64.load32_s': 0x34, + 'i64.load32_u': 0x35, + 'i32.store' : 0x36, + 'f32.store' : 0x38, + 'i64.store32' : 0x3E, +}) +addop('mem_instr_default_4', [mem_instr_default_4] + memarg_4) + +mem_instr_default_8 = bs_name(l=8, name={ + 'i64.load' : 0x29, + 'f64.load' : 0x2B, + 'i64.store': 0x37, + 'f64.store': 0x39, +}) +addop('mem_instr_default_8', [mem_instr_default_8] + memarg_4) + +addop('memory.size', [bs('0011111100000000')]) +addop('memory.grow', [bs('0100000000000000')]) diff --git a/miasm/arch/wasm/disasm.py b/miasm/arch/wasm/disasm.py new file mode 100644 index 000000000..496436b1a --- /dev/null +++ b/miasm/arch/wasm/disasm.py @@ -0,0 +1,385 @@ +from miasm.core.asmblock import disasmEngine, AsmBlock +from miasm.core.utils import Disasm_Exception +from miasm.arch.wasm.arch import mn_wasm +from miasm.core.asmblock import AsmConstraint, AsmCFG, AsmBlockBad +from miasm.expression.expression import ExprId, LocKey, ExprLoc +import copy +import logging + +log_asmblock = logging.getLogger("asmblock") + +_prev_labels = {'loop': 0, 'if': 0, 'block': 0} +LABEL_PREFIXES = {'loop': 'L', 'if': 'I', 'block':'B'} +def get_new_label(kind): + global _prev_labels + a = _prev_labels[kind] + _prev_labels[kind] += 1 + return "${}{}".format(LABEL_PREFIXES[kind], a) + +class WasmStruct(object): + ''' + Defines a Wasm structure (its start and its stop) + The possible kinds of structures are: + 'func', 'loop', 'block', 'if' + ''' + __slots__ = ['kind', 'start_key', 'end_key', + 'after_else_key', 'label'] + + def __init__(self, loc_db, kind, start_key): + self.kind = kind + self.start_key = start_key + self.end_key = None + self.after_else_key = None + if kind == 'func': + self.label = None + else: + self.label = get_new_label(kind) + + def set_end_key(self, loc_db, key): + if self.end_key is not None: + raise Exception('Malformed code') + self.end_key = key + if self.kind == 'if' and self.after_else_key is None: + self.after_else_key = self.end_key + + def set_after_else_key(self, loc_db, key): + if self.kind != 'if' or self.after_else_key is not None: + raise Exception('Malformed code') + self.after_else_key = key + + @property + def branch_key(self): + if self.kind == 'loop': + return self.start_key + return self.end_key + + @property + def else_key(self): + if self.kind != 'if': + raise Exception("No esle key in {} structure".format(self.kind)) + return self.after_else_key + +class PendingBasicBlocks(object): + ''' + Feed this object with offsets of structured instructions and basic blocks + of a specific wasm function. It will store basic blocks and update them when needed. + + For this to work you must: + - declare every structure pseudo-instruction you encounter + ('loop', 'block', 'if', 'else', 'end') + - declare function start with the dummy instruction name 'func' + - declare every basic block you encounter + --> blocks that are processed are those ending with + a known (pseudo-)instruction. You MUST place such + instructions at the end of a block and declare them here + - all these declaration must be made in order in the body of functions + + It updates basic blocks that end with: + - branches ('br', 'br_if', 'br_table') + - 'if' pseudo instruction + - 'else' pseudo instruction + by finding their true dstflow and adding the corresponding + location to the block's bto. + During disassembly, please declare structured (pseudo-)instructions + BEFORE adding a basic block if both occur at the same time + ''' + __slots__ = ['_if_todo', '_br_todo', 'done', 'loc_db', + '_structs', '_todo_structs'] + + def __init__(self, loc_db): + self.loc_db = loc_db + self._br_todo = [] + self._if_todo = [] + self.done = set() + self._structs = [] + self._todo_structs = [] + + def _add_done(self, block): + self.done.add(block) + block.fix_constraints() + + def structure_instr_at(self, instr, key_or_block): + ''' + Declare a structure pseudo-instruction at a specific block or key. + Please note that for the 'else' instruction, you have to give + the loc key at the next instruction (else is at end of block) + ''' + key = None + if isinstance(key_or_block, LocKey): + key = key_or_block + elif isinstance(key_or_block, AsmBlock): + key = key_or_block.loc_key + + try: + kind = instr.name + except AttributeError: + kind = instr + + # If end is found, this variable is set + pop_struct = None + + if kind in ['func', 'loop', 'block', 'if']: + self._structs.append(WasmStruct(self.loc_db, kind, key)) + self._br_todo.append([]) + self._if_todo.append([]) + + elif kind == 'else': + self._structs[-1].set_after_else_key(self.loc_db, key) + + elif kind == 'end': + pop_struct = self._structs.pop() + if pop_struct.kind == 'func': + instr.args = instr.args + ['func_idx'] + pop_struct.set_end_key(self.loc_db, key) + br_todo = self._br_todo.pop() + if_todo = self._if_todo.pop() + + br_key = pop_struct.branch_key + for block in br_todo: + block.bto.add(AsmConstraint(br_key, AsmConstraint.c_to)) + self._add_done(block) + + if len(if_todo) > 1: + raise Exception('Malformed code') + if if_todo != []: + else_key = pop_struct.else_key + if_todo[0].add_cst(else_key, AsmConstraint.c_next) + self._add_done(if_todo[0]) + + else: + raise Exception('{} is not a structure instruction'.format(kind)) + + # Insert label + if kind != 'func': + if pop_struct is not None: # last struct has been poped (end) + label = pop_struct.label + else: + label = self._structs[-1].label + if label is not None: + instr.args = [label] + instr.args + + # Add done structs for update + if pop_struct is not None: + self._todo_structs.append(pop_struct) + + def add_block(self, block): + name = block.lines[-1].name + if name == 'if': + self._if_todo[-1].append(block) + + elif name == 'else': + if self._structs[-1].kind != 'if': + raise Exception('Unexpected \'else\'') + # 'else' is treated as 'br 0' + self._br_todo[-1].append(block) + + elif name in ['br', 'br_if']: + arg = int(block.lines[-1].getdstflow(self.loc_db)) + if arg >= len(self._br_todo): + raise Exception('Bad br') + self._br_todo[-1-arg].append(block) + label = self._structs[-1-arg].label + if label is not None: + block.lines[-1].args = [ExprId(label, 0)] + + + elif name == 'br_table': + args = [int(i) for i in block.lines[-1].getdstflow(self.loc_db)] + for i in range(len(args)): + arg = args[i] + if arg >= len(self._br_todo): + raise Exception('Bad br') + self._br_todo[-1-arg].append(block) + label = self._structs[-1-arg].label + if label is not None: + block.lines[-1].args[i] = ExprId(label, 0) + + elif name == 'return': + self._br_todo[0].append(block) + + else: + self._add_done(block) + + @property + def is_done(self): + return len(self._structs) == 0 + + def update_loc_names(self): + if not self.is_done: + raise Exception("Please wait end of function to update locs") + for s in self._todo_structs: + if s.label is None: + continue + self.loc_db.add_location_name(s.branch_key, s.label) + self._todo_structs = [] + + +class dis_wasm(disasmEngine): + attrib = None + + def __init__(self, wasm_cont=None, **kwargs): + self.cont = wasm_cont + super(dis_wasm, self).__init__(mn_wasm, self.attrib, None, **kwargs) + + def dis_multiblock(self): + raise NotImplementedError("Use dis_func_body to disassemble a function body") + + def dis_instr(self, bs, offset): + try: + instr = self.arch.dis(bs, self.attrib, offset) + error = None + except Disasm_Exception as e: + log_asmblock.warning(e) + instr = None + error = AsmBlockBad.ERROR_CANNOT_DISASM + except IOError as e: + log_asmblock.warning(e) + instr = None + error = AsmBlockBad.ERROR_IO + return instr, error + + def get_func_name(self, idx): + '''Returns the name of the function number @idx''' + func_name = self.cont._executable.functions[idx].name + if func_name is None: + func_name = "" + else: + func_name = ' ' + func_name + #func_name = "_function_{}".format(func_idx) + return "".format(idx, func_name) + + def dis_func(self, func_idx, blocks=None): + ''' + Disassembles a wasm function's body. + Works sorta like the vanilla dis_multiblock except that it: + - takes a function index @func_idx as a parameter + - disassembles every instruction in function body + - ignores self.dont_dis + ''' + #log_asmblock.info("dis block all") + func = self.cont._executable.functions[func_idx] + + # Get func name or create it + func_name = self.get_func_name(func_idx) + + if func.is_imported: + res = AsmCFG(self.loc_db) + res.add_block(AsmBlock(self.loc_db.get_or_create_name_location(func_name))) + return res + + # Get func body + bs = func.code.body + cur_offset = 0 + cur_block = AsmBlock(self.loc_db.get_or_create_name_location(func_name)) + + pending_blocks = PendingBasicBlocks(self.loc_db) + pending_blocks.structure_instr_at('func', cur_block) + + block_cpt = 0 + after_else = False + prebuilt_key = None + + ## Block loop ## + while not pending_blocks.is_done: + # Start new block + block_cpt += 1 + lines_cpt = 0 + if block_cpt != 1: # Not first block of the function + if prebuilt_key is not None: + cur_block = AsmBlock(prebuilt_key) + prebuilt_key = None + else: + cur_block = AsmBlock(self.loc_db.add_location()) + + # Check block watchdog + if self.blocs_wd is not None and block_cpt > self.blocs_wd: + log_asmblock.debug("blocks watchdog reached at %X in function #%X", int(cur_offset), func_idx) + break + + ## Instruction loop ## + while not pending_blocks.is_done: + + lines_cpt += 1 + # Check line watchdog + if self.lines_wd is not None and lines_cpt > self.lines_wd: + log_asmblock.debug("lines watchdog reached at %X", int(cur_offset)) + break + + # Try to disassemble instruction + instr, error = self.dis_instr(bs, cur_offset) + + if instr is None: + log_asmblock.warning("cannot disasm at %X", int(cur_offset)) + raise Exception("Disasm error: {}".format(error)) + + log_asmblock.debug("dis at %X in function #%X", int(cur_offset), func_idx) + log_asmblock.debug(instr) + log_asmblock.debug(instr.args) + + # Stop block in case of 'end' or 'loop' + # -> forces the creation of a location (maybe useless for some 'end's) + if instr.name in ['end', 'loop'] and lines_cpt > 1: + prebuilt_key = self.loc_db.add_location() + cur_block.add_cst(prebuilt_key, AsmConstraint.c_next) + break + + + # Add instr to block + cur_block.addline(instr) + + # Hide function index in 'return' instructions + if instr.name == 'return': + instr.args.append(func_idx) + + # Declare structure pseudo-instructions + if instr.is_structure: + if instr.name == 'else': + prebuilt_key = self.loc_db.add_location() + pending_blocks.structure_instr_at(instr, prebuilt_key) + else: + pending_blocks.structure_instr_at(instr, cur_block) + + if instr.is_subcall(): + call_key = self.loc_db.get_or_create_name_location( + self.get_func_name(int(instr.getdstflow(None)))) + cur_block.bto.add(AsmConstraint(call_key, AsmConstraint.c_to)) + + # Increment offset + cur_offset += instr.l + + if not instr.breakflow(): + continue + + if instr.splitflow() and not (instr.is_subcall() and self.dontdis_retcall): + if prebuilt_key is None: + prebuilt_key = self.loc_db.add_location() + # 'if' branches alter execution flow when condition is not true + if instr.name == 'if': + cur_block.bto.add(AsmConstraint(prebuilt_key, AsmConstraint.c_to)) + else: + cur_block.add_cst(prebuilt_key, AsmConstraint.c_next) + + break + + # Register current block + pending_blocks.add_block(cur_block) + + pending_blocks.update_loc_names() + + blocks = AsmCFG(self.loc_db) + for block in pending_blocks.done: + blocks.add_block(block) + + for block in blocks.blocks: + for instr in block.lines: + for i in range(len(instr.args)): + if isinstance(instr.args[i], str): + if instr.args[i] == 'func_idx': + instr.args[i] = func_idx + else: + lk = self.loc_db.get_name_location(instr.args[i]) + if lk is None: + raise Exception("Not supposed to happen") + instr.args[i] = ExprLoc(lk, 32) + + return blocks diff --git a/miasm/arch/wasm/regs.py b/miasm/arch/wasm/regs.py new file mode 100644 index 000000000..d54fb1ae6 --- /dev/null +++ b/miasm/arch/wasm/regs.py @@ -0,0 +1,24 @@ +#from builtins import range +from miasm.expression.expression import * +#from miasm.core.cpu import reg_info + +WASM_ADDR_SIZE = 32 + +PC = ExprId('PC', WASM_ADDR_SIZE) +SP = ExprId('SP', WASM_ADDR_SIZE) + +# Call pointer +# A pointer on a parallel stack storing +# Local variables of functions and return addresses +CP = ExprId('CP', WASM_ADDR_SIZE) + +PC_init = ExprId("PC_init", WASM_ADDR_SIZE) +SP_init = ExprId("SP_init", WASM_ADDR_SIZE) +CP_init = ExprId("CP_init", WASM_ADDR_SIZE) + + +regs_init = { + PC: PC_init, + SP: SP_init, + CP: CP_init, +} diff --git a/miasm/arch/wasm/sem.py b/miasm/arch/wasm/sem.py new file mode 100644 index 000000000..b9fbef912 --- /dev/null +++ b/miasm/arch/wasm/sem.py @@ -0,0 +1,455 @@ +#-*- coding:utf-8 -*- + +from miasm.expression.expression import * +from miasm.arch.wasm.regs import * +from miasm.arch.wasm.arch import mn_wasm +from miasm.ir.ir import IntermediateRepresentation, IRBlock, AssignBlock + + +##### Utility functions ##### + +def i2expr(i, size): + if isinstance(i, int): + if i >= 0: + return ExprInt(i, size) + return ExprOp('-', ExprInt(-i, size)) + return i + +##### Functions that make operations on stack ##### +##### or depend on the stack implementation ##### +''' +These functions return some IR that must be +executed to make some operations on the stack. +Only use these functions when you operate the stack, +so it's easier to change the way the stack work +The returned IR depends on the status of the stack, +use them carefully ! +''' + +# Sizes of types +VT_SIZE = { + 'i32': 32, + 'i64': 64, + 'f32': 32, + 'i64': 64, +} + +# Representation of value types on stack +VT_REPR = { + 'i32': 0, + 'i64': 1, + 'f32': 2, + 'i64': 3, +} + +def size_on_stack(vt): + # Assumes vt is a correct calue type + if vt[1:] == '64': + return 9 + return 5 + +def overwrite_at(ir, ofs, val): + ''' + Returns an ExprAssign that writes the value @val + on the stack at sp+@ofs + ''' + ofs = i2expr(ofs, ir.sp.size) + return ExprAssign(ExprMem(ExprOp('+', ir.sp, ofs), val.size), val) + +def get_at(ir, ofs, vt): + ''' + Returns an Expr which holds the value contained + on the stack at sp+@ofs + ''' + ofs = i2expr(ofs, ir.sp.size) + return ExprMem(ExprOp('+', ir.sp, ofs), VT_SIZE[vt]) + +def add_sp(ir, n_bytes): + ''' + Returns an ExprAssign to add a shift to the SP + ''' + shf = i2expr(n_bytes, ir.sp.size) + return ExprAssign(ir.sp, ExprOp('+', ir.sp, shf)) + +def push(ir, val, vt, ofs=0): + ''' + "Pushes" a value on the stack. + Returns a list of ExprAssign that: + - Moves the SP accordingly + - Write the value on the stack + The parameter @ofs enables to move the SP + before pushing + ''' + ofs = i2expr(ofs, ir.sp.size) + shf = i2expr(-size_on_stack(vt), ir.sp.size) + target = ExprOp('+', ofs, shf) + mv_sp = add_sp(ir, target) + w_val = overwrite_at(ir, ExprOp('+', ExprInt(1, ir.sp.size), target), val) + w_vt = overwrite_at(ir, target, i2expr(VT_REPR[vt], 8)) + return [mv_sp, w_val, w_vt] + +def get_last_value_size(ir): + return ExprCond(ExprOp('&', ExprMem(ir.sp, 8), ExprInt(1, 8)), + ExprInt(9, ir.sp.size), + ExprInt(5, ir.sp.size)) + +def pop(ir, vt=None, n=1): + ''' + "Pops" a value (or @n values) from the operand stack. + If @vt is None, @n is ignored and only one value is poped + Returns a tuple (shf, val) where: + - shf is an Expr holding the value to add to the stack + - ofs_vals is a list of Expr holding offsets to get the poped values + Note that if @vt is None, val is None too + ''' + if vt is None: + return get_last_value_size(ir), None + + size_per_item = size_on_stack(vt) + size_to_pop = ExprInt(size_per_item * n, ir.sp.size) + + is_64 = VT_REPR[vt] & 1 == 1 + # get poped values ordered with the one the furthest from the SP first + ofs_vals = [i2expr(1 + (i*size_per_item), ir.sp.size) for i in range(n)][::-1] + return i2expr(size_per_item * n, ir.sp.size), ofs_vals + +##### Mnemonics functions ##### + +def nop(ir, instr): + return [],[] + +def const(ir, instr, arg): + e = push(ir, arg, instr.name.split('.')[0]) + return e, [] + +def drop(ir, instr): + a = pop(ir)[0] + return [add_sp(ir, a)], [] + + +## Control flow (block, loop, end, calls... + +def block(ir, instr, *args): + return nop(ir, instr) + +def loop(ir, instr, *args): + return nop(ir, instr) + +def call(ir, instr, *args): + info = ir.func_info[args[0]] + adjust_cp_size = ExprAssign( + ir.cp, + ExprOp('+', + ir.cp, + ExprOp('-', + ExprInt(info[locsize], + ir.addrsize)))) + fds #TODO# + +def if_(ir, instr, *args): + # #TODO# (outside block) : + # Pop value and branch on correct LocKey + return nop(ir, instr) + + +def else_(ir, instr, *args): + return br(ir, instr, *args) + +def end(ir, instr, *args): + if isinstance(args[0], int): + return return_(ir, instr, *args) + return nop(ir, instr) + +def return_(ir, instr, *args): + idx = args[0] + + +## Branch instructions + +def br(ir, instr, dest): + return [ExprAssign(ir.IRDst, dest)], [] + +def br_if(ir, instr, dest): + shf, ofs = pop(ir, 'i32') + test = ExprMem(ofs[0], 32) + cond_dst = ExprAssign(ir.IRDst, ExprCond(test, dest, ir.IRDst)) + fds #TODO# c'est pas bon + return [shf, cond_dst], [] + + +def br_table(ir, instr, *args): + shf, ofs = pop(ir, 'i32') + index = ExprMem(ofs[0], 32) + oob = ExprOp('>>', vals[0], vals[1]), + 'div_u': lambda vals: ExprOp('udiv', vals[0], vals[1]), + 'rem_u': lambda vals: ExprOp('umod', vals[0], vals[1]), + 'shr_u': lambda vals: ExprOp('>>', vals[0], vals[1]), + 'div_s': lambda vals: ExprOp('sdiv', vals[0], vals[1]), + 'rem_s': lambda vals: ExprOp('smod', vals[0], vals[1]), + 'shr_s': lambda vals: ExprOp('a>>', vals[0], vals[1]), +} + +def ibinop(ir, instr): + ''' + Binary operation on integer: + Consumes 2 operands on stack + Produces 1 operand of same type + ''' + vt, op = instr.name.split('.') + # get operands and make operation + _, ofs_vals = pop(ir, vt, 2) + res = IBINOPS[op]([get_at(ir, ofs, vt) for ofs in ofs_vals]) + aff_res = overwrite_at(ir, ofs_vals[1], res) + + # Move the stack + mv_sp = add_sp(ir, size_on_stack(vt)) + return [mv_sp, aff_res], [] + + +ITESTOPS = { + 'eqz': lambda vals: ExprCond(vals[0], ExprInt(0x0, 32), ExprInt(0x1, 32)), +} + +def itestop(ir, instr): + ''' + Test operation on integer: + Consumes 1 operand on stack + Produces 1 boolean (i32) operand + ''' + vt, op = instr.name.split('.') + # get operands + pp, ofs_vals = pop(ir, vt, 1) + res = ITESTOPS[op]([get_at(ir, ofs, vt) for ofs in ofs_vals]) + # Push result of operation on the previous value + push_res = push(ir, res, 'i32', pp) + + return push_res, [] + +IRELOPS = { + 'eq' : lambda vals: ExprOp('==', vals[0], vals[1]).zeroExtend(32), + # 'FLAG_EQ' operator acts like a 'not' + 'ne' : lambda vals: ExprOp('FLAG_EQ', ExprOp('==', vals[0], vals[1])).zeroExtend(32), + 'lt_s': lambda vals: ExprOp(' i64) + ''' + vt_dst, op = instr.name.split('.') + if vt_dst == 'i32': + vt_src = 'i64' + elif vt_dst == 'i64': + vt_src = 'i32' + pp, ofs_vals = pop(ir, vt_src, 1) + res = I2I[op]([get_at(ir, ofs, vt_src) for ofs in ofs_vals]) + + push_res = push(ir, res, vt_dst, pp) + return push_res, [] + +##### Mnemonics indexing ##### + +''' #TODO# +if / loop / block / else... +calls +memories +branches +select +floats +locals +globals +''' + +mnemo_func = { + 'i32.const' : const, + 'i64.const' : const, + 'f32.const' : const, + 'f64.const' : const, + 'nop' : nop, + 'block' : block, + 'loop' : loop, + 'else' : else_, + 'end' : end, + 'if' : if_, + 'return' : return_, + 'drop' : drop, + 'i32.clz' : iunop, + 'i32.ctz' : iunop, + 'i32.popcnt' : iunop, + 'i64.clz' : iunop, + 'i64.ctz' : iunop, + 'i64.popcnt' : iunop, + 'i32.add' : ibinop, + 'i32.sub' : ibinop, + 'i32.mul' : ibinop, + 'i32.and' : ibinop, + 'i32.or' : ibinop, + 'i32.xor' : ibinop, + 'i32.shl' : ibinop, + 'i32.rotl' : ibinop, + 'i32.rotr' : ibinop, + 'i32.div_u' : ibinop, + 'i32.rem_u' : ibinop, + 'i32.shr_u' : ibinop, + 'i32.div_s' : ibinop, + 'i32.rem_s' : ibinop, + 'i32.shr_s' : ibinop, + 'i64.add' : ibinop, + 'i64.sub' : ibinop, + 'i64.mul' : ibinop, + 'i64.and' : ibinop, + 'i64.or' : ibinop, + 'i64.xor' : ibinop, + 'i64.shl' : ibinop, + 'i64.rotl' : ibinop, + 'i64.rotr' : ibinop, + 'i64.div_u' : ibinop, + 'i64.rem_u' : ibinop, + 'i64.shr_u' : ibinop, + 'i64.div_s' : ibinop, + 'i64.rem_s' : ibinop, + 'i64.shr_s' : ibinop, + 'i32.eqz' : itestop, + 'i64.eqz' : itestop, + 'i32.eq' : irelop, + 'i32.ne' : irelop, + 'i32.lt_s' : irelop, + 'i32.lt_u' : irelop, + 'i32.gt_s' : irelop, + 'i32.gt_u' : irelop, + 'i32.le_s' : irelop, + 'i32.le_u' : irelop, + 'i32.ge_s' : irelop, + 'i32.ge_u' : irelop, + 'i64.eq' : irelop, + 'i64.ne' : irelop, + 'i64.lt_s' : irelop, + 'i64.lt_u' : irelop, + 'i64.gt_s' : irelop, + 'i64.gt_u' : irelop, + 'i64.le_s' : irelop, + 'i64.le_u' : irelop, + 'i64.ge_s' : irelop, + 'i64.ge_u' : irelop, + 'i32.wrap_i64' : i2i, + 'i64.extend_i32_u' : i2i, + 'i64.extend_i32_s' : i2i, +} + +class ir_wasm(IntermediateRepresentation): + + def __init__(self, loc_db=None, cont=None): + IntermediateRepresentation.__init__(self, mn_wasm, None, loc_db) + + if cont is None: + raise Exception("Container object is needed") + + # Init registers and basic information + self.pc = PC # Does it make sense ? + self.sp = SP + self.cp = CP + self.IRDst = ExprId('IRDst', WASM_ADDR_SIZE) + self.addrsize = WASM_ADDR_SIZE + + # Init function information + self.func_info = [] + for f in cont.executable.functions: + locs = [] + if not f.is_imported: + locs = f.code.locs + locsize = 0 + for vt in f.signature.params + locs: + locsize += VT_SIZE[vt.name] + self.func_info.append({ + 'params' : f.signature.params, + 'results': f.signature.results, + 'locals' : locs, + 'locsize': locsize, + }) + #TODO# init globals, memories, tables (with elements) + + def get_ir(self, instr): + args = instr.args + instr_ir, extra_ir = mnemo_func[instr.name](self, instr, *args) + return instr_ir, extra_ir + + def get_loc_key_for_instr(self, instr): + ''' + Only called when instruction is not at beginning of block, + so @instr has no loc_key already + ''' + return self.loc_db.add_location() + + def get_next_loc_key(self, instr): + raise NotImplementedError("Cannot be used") + + def add_instr_to_ircfg(self, instr, ircfg, loc_key=None, gen_pc_updt=False): + raise NotImplementedError("Cannot be used") diff --git a/miasm/core/bin_stream.py b/miasm/core/bin_stream.py index 727a853d4..415127ea0 100644 --- a/miasm/core/bin_stream.py +++ b/miasm/core/bin_stream.py @@ -282,6 +282,9 @@ def __init__(self, binary, *args, **kwargs): super(bin_stream_elf, self).__init__(binary, *args, **kwargs) self.endianness = binary.sex +class bin_stream_wasm(bin_stream_container): + def __init__(self, binary, *args, **kwargs): + super(bin_stream_wasm, self).__init__(binary, *args, **kwargs) class bin_stream_vm(bin_stream): diff --git a/miasm/expression/simplifications_common.py b/miasm/expression/simplifications_common.py index 69d569975..e5bb5a51b 100644 --- a/miasm/expression/simplifications_common.py +++ b/miasm/expression/simplifications_common.py @@ -115,6 +115,14 @@ def simp_cst_propagation(e_s, expr): i -= 1 return ExprInt(expr.size - (i + 1), args[0].size) + # cntones(int) => int + if op_name == "cntones" and args[0].is_int(): + cnt = 0 + for i in range(args[0].size): + if args[0].arg & (1 << i) != 0: + cnt += 1 + return ExprInt(cnt, args[0].size) + # -(-(A)) => A if (op_name == '-' and len(args) == 1 and args[0].is_op('-') and len(args[0].args) == 1): diff --git a/miasm/loader/wasm.py b/miasm/loader/wasm.py new file mode 100644 index 000000000..e5291b2a2 --- /dev/null +++ b/miasm/loader/wasm.py @@ -0,0 +1,94 @@ +from miasm.loader.wasm_utils import * + +class TwoWayTable(object): + __slots__ = ['_b_dict','_s_dict'] + ''' + Table that makes different byte <-> string equivalences + Note that in Python2, 'bytes' and 'str' are the same type + To prevent collisions, 'bytes' are converted to 'int' in the object + ''' + def __init__(self, byte_str_pair_list): + self._s_dict = {} + self._b_dict = {} + for b, s in byte_str_pair_list: + if type(b) != int: + b = byte_to_int(b) + if b in self._b_dict or s in self._s_dict: + raise Exception("Cannot build TwoWayTable: duplicate") + self._s_dict[s] = b + self._b_dict[b] = s[:] + + def str_version(self, val): + ''' + Returns the 'str' version of @val in the table + Raises an Exception if not possible + ''' + if type(val) == str and val in self._s_dict: + return val + if type(val) == bytes and len(val) == 1: + val = byte_to_int(val) + if type(val) == int and val in self._b_dict: + return self._b_dict[val] + raise Exception("Not found") + + def int_version(self, val): + ''' + Returns the 'int' version of @val in the table + Raises an Exception if not possible + ''' + if type(val) == str and val in self._s_dict: + return self._s_dict[val] + if type(val) == bytes and len(val) == 1: + val = byte_to_int(val) + if type(val) == int and val in self._b_dict: + return val + raise Exception("Not found") + + def byte_version(self, val): + ''' + Returns the 'bytes' version of @val in the table + Raises an Exception if not possible + ''' + return int_to_byte(self.int_version(val)) + + +CONSTINSTRS = TwoWayTable([(0x41,'i32.const'), + (0x42,'i64.const'), + (0x43,'f32.const'), + (0x44,'f64.const'), + (0x23,'global.get'), + (0x0b,'end')]) + +VALTYPES = TwoWayTable([(0x7f,'i32'), + (0x7e,'i64'), + (0x7d,'f32'), + (0x7c,'f64')]) + +ELEMTYPES = TwoWayTable([(0x70,'funcref')]) + +MUTTYPES = TwoWayTable([(0x00,'const'), + (0x01,'var')]) + +NAMETYPES = TwoWayTable([(0x00,'mname'), + (0x01,'fnames'), + (0x02,'lnames')]) + +IMPORTTYPES = TwoWayTable([(0x00,'func'), + (0x01,'table'), + (0x02,'mem'), + (0x03,'global')]) + +EXPORTTYPES = IMPORTTYPES + +SHT_CUSTOM = 0 +SHT_TYPE = 1 +SHT_IMPORT = 2 +SHT_FUNCTION = 3 +SHT_TABLE = 4 +SHT_MEMORY = 5 +SHT_GLOBAL = 6 +SHT_EXPORT = 7 +SHT_START = 8 +SHT_ELEMENT = 9 +SHT_CODE = 10 +SHT_DATA = 11 diff --git a/miasm/loader/wasm_init.py b/miasm/loader/wasm_init.py new file mode 100644 index 000000000..2a6df681a --- /dev/null +++ b/miasm/loader/wasm_init.py @@ -0,0 +1,1682 @@ +#! /usr/bin/env python + +import logging +import struct +import collections + +from future.utils import PY2, PY3 + +from miasm.loader.wasm import * +from miasm.loader.wasm_utils import * +from miasm.loader.strpatchwork import StrPatchwork +from miasm.analysis.binary import (ContainerSignatureException, + ContainerParsingException) + +log = logging.getLogger('wasmparse') +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.WARN) + + + +class serializer(object): + ''' + Collection of functions (class methods) used to serialize different values. + The parameters' types depend on the data you want to serialize + However, all the functions return the same type : a byte string containing the serialized data + ''' + @classmethod + def u32(cls, n): + '''Does not check that n fits on 32 bits, and might use extra bytes if n is too big''' + return encode_LEB128(n) + + +class parser(object): + ''' + Collection of functions (class methods) used to parse different values. + Each one takes a byte string @bs, an offset @ofs, and possibly other inputs. + It returns a tuple (res, n_bytes) containing: + - res: result + - n: number of bytes read. + The result type is different depending on the value type parsed + ''' + + @classmethod + def u32(cls, bs, ofs): + '''In the specification, u32 is a LEB128-encoded unsigned int with a value in [0, 2**32] + res type: int''' + return decode_LEB128(bs[ofs : ofs + 5]) + + @classmethod + def skip_iN(cls, bs, ofs, N): + ''' + Partially decodes a iN LEB128-encoded integer: + does not return its value, but returns the number of bytes it was encoded on + ''' + n = 0 + for b in bs[ofs : ofs + N//7 +1]: + if PY2: + b = struct.unpack('B', b)[0] + n += 1 + if b&0x80 == 0: + break + return None, n + + @classmethod + def skip_i32(cls, bs, ofs): + return parser.skip_iN(bs, ofs, 32) + + @classmethod + def skip_i64(cls, bs, ofs): + return parser.skip_iN(bs, ofs, 64) + + @classmethod + def const_instr(cls, bs, ofs): + '''Parses a constant instruction, returns True if the instruction is 'end', False otherwise (along with the number of bytes read)''' + instr = CONSTINSTRS.str_version(bs[ofs]) + if instr == 'end': + return True, 1 + elif instr == 'i32.const': + _, n = parser.skip_i32(bs, ofs+1) + elif instr == 'i64.const': + _, n = parser.skip_i64(bs, ofs+1) + elif instr == 'f32.const': + n = 4 + elif instr == 'f64.const': + n = 8 + else: + log.error("Non constant or unknown instruction found in constant expression") + raise ContainerParsingException("Unknown/non constant instruction found in constant expression") + return False, n+1 + + @classmethod + def const_expr(cls, bs, ofs): + n = 0 + tst = False + while not tst: + tst, m = parser.const_instr(bs, ofs+n) + n += m + return bs[ofs:ofs+n], n + + +def homogenize(lst, cls): + '''Generator that tries to convert all elements of lst to cls''' + for i in lst: + if isinstance(i, cls): + yield i + else: + try: + yield cls(i) + except Exception as e: + e.args = ("Cannot convert {} to {}".format(type(i), cls),) + raise + +class HomogeneousList(list): + ''' + Python list that enforces the type of its elements + May raise Exceptions if you try tu put elements that are + not convertible into the list's elements' type + ''' + __slots__ = ['_item_type'] + + def __init__(self, value, item_type): + self._item_type = item_type + super(HomogeneousList, self).__init__(homogenize(value, item_type)) + + def __setitem__(self, key, value): + if isinstance(key, slice): + value = homogenize(value, self._item_type) + elif not isinstance(value, self._item_type) : + value = self._item_type(value) + return list.__setitem__(self, key, value) + + def append(self, value): + if not isinstance(value, self._item_type): + value = self._item_type(value) + return list.append(self, value) + + def insert(self, value): + if not isinstance(value, self._item_type): + value = self._item_type(value) + return list.insert(self, value) + + def extend(self, value): + value = homogenize(value, self._item_type) + return list.extend(self, value) + + def __add__(self, other): + other = homogenize(other, self._item_type) + return self.__class__(list.__add__(self, other), self._item_type) + + def __radd__(self, other): + other = homogenize(other, self._item_type) + return self.__class__(list.__radd__(self, other), self._item_type) + + def __iadd__(self, other): + other = homogenize(other, self._item_type) + return self.__class__(list.__iadd__(self, other), self._item_type) + + @property + def item_type(self): + return self._item_type + + +class WasmItem(object): + '''Python representation of a wasm-formatted item''' + __slots__ = [] + + def __init__(self): + object.__init__(self) + + @staticmethod + def parse(bs, ofs=0): + ''' + Parses the byte string @bs from the offset @ofs + Returns a pair (obj, n) containing: + - obj: the instantiated object + - n: number of bytes read in @bs + Has to be implemented by sub-classes + ''' + raise NotImplementedError() + + def build(self): + ''' + Needed to re-convert the object to its representation in bytes + Has to be implemented by sub-classes + ''' + raise NotImplementedError() + + + @classmethod + def from_bytes(cls, bs, *args, **kwargs): + return cls.parse(bs, 0, *args, **kwargs)[0] + + +class WasmItemVec(WasmItem, HomogeneousList): + ''' + HomogeneousList of a particular WasmItem + Is itself a WasmItem + ''' + __slots__ = [] + + def __init__(self, lst, item_type): + if not issubclass(item_type, WasmItem): + raise TypeError("{} is not sub-class of WasmItem" + .format(item_type)) + WasmItem.__init__(self) + HomogeneousList.__init__(self, lst, item_type) + + @classmethod + def parse(cls, bs, ofs, item_type, *args, **kwargs): + elems = [] + n_elems, ofs_vec = parser.u32(bs, ofs) + for _ in range(n_elems): + elt, n = item_type.parse(bs, ofs+ofs_vec, *args, **kwargs) + elems.append(elt) + ofs_vec += n + return cls(elems, item_type), ofs_vec + + def build(self): + return serializer.u32(len(self)) + b''.join([i.build() for i in self]) + +class WasmItemOptionVec(WasmItemVec): + __slots__ = [] + + def build(self): + if len(self) == 0: + return b'' + return WasmItemVec.build(self) + +def simple_twt_item(table): + '''Decorator that creates a basic WasmItem corresponding to a TwoWayTable''' + def wrapper(cls): + cls.__slots__ = ['_value'] + + def init(self, value): + if isinstance(value, cls): + self._value = value._value + else: + self._value = table.int_version(value) + super(cls, self).__init__() + cls.__init__ = init + + @classmethod + def parser(c, bs, ofs): + return c(bs[ofs]), 1 + cls.parse = parser + + cls.build = lambda self: table.byte_version(self._value) + + cls.__repr__ = lambda self: table.str_version(self._value) + + def eq(self, other): + if not isinstance(other, cls): + other = cls(other) + return self._value == other._value + cls.__eq__ = eq + return cls + return wrapper + +@simple_twt_item(VALTYPES) +class ValType(WasmItem): + pass + +@simple_twt_item(IMPORTTYPES) +class ImportType(WasmItem): + pass + +@simple_twt_item(ELEMTYPES) +class ElemType(WasmItem): + pass + +@simple_twt_item(MUTTYPES) +class MutType(WasmItem): + pass + +@simple_twt_item(NAMETYPES) +class NameType(WasmItem): + pass + +def prop(name, prop_type, optional=False): + def wrapper(cls): + if '_' + name not in cls.__slots__: + raise Exception("Please add '{}' in {}.__slots__" + .format('_' + name, cls.__name__)) + g = lambda self: getattr(self, '_' + name) + def s(self, value): + if optional and value is None: + setattr(self, '_' + name, None) + elif isinstance(value, prop_type): + setattr(self, '_' + name, value) + else: + try: + setattr(self, '_' + name, prop_type(value)) + except: + raise Exception("Cannot convert {} to {}" + .format(value, prop_type)) + setattr(cls, name, property(g, s)) + return cls + return wrapper + +def prop_list(name, elem_type, optional=False): + def wrapper(cls): + if '_' + name not in cls.__slots__: + raise Exception("Please add '{}' in {}.__slots__" + .format('_' + name, cls.__name__)) + g = lambda self: getattr(self, '_' + name) + def s(self, value): + if optional and value is None: + setattr(self, '_' + name, None) + elif (isinstance(value, WasmItemVec) and + issubclass(value._item_type, elem_type)): + setattr(self, '_' + name, value) + else: + try: + setattr(self, '_' + name, WasmItemVec(value, elem_type)) + except Exception as e: + raise Exception("Cannot convert {} to a list of {}" + .format(value, elem_type)) + setattr(cls, name, property(g, s)) + return cls + return wrapper + +def can_be_exported(cls): + cls = prop('export_name', Name, optional = True)(cls) + + def is_exported(self): + return hasattr(self, 'export_name') and self.export_name != None + + setattr(cls, 'is_exported', property(is_exported)) + return cls + +def can_be_imported(cls): + def is_imported(self): + return hasattr(self, 'import_info') and self.import_info != None + + setattr(cls, 'is_imported', property(is_imported)) + return cls + +class Name(str, WasmItem): + __slots__=[] + + def __init__(self, string): + WasmItem.__init__(self) + super(Name, self).__init__() + + @staticmethod + def parse(bs, ofs): + size, n = parser.u32(bs, ofs) + res = bs[ofs+n:ofs+n+size].decode('UTF-8', 'strict') + if PY2: + res = bs[ofs+n:ofs+n+size] + return Name(res), size+n + + def build(self): + s = str(self) + if PY3: + s = bytes(s, 'utf-8') + return serializer.u32(len(s)) + s + + + +@prop('name', Name) +@prop('mod', Name) +class ImportInfo(WasmItem): + __slots__ = ['_name', '_mod'] + + def __init__(self, mod, name): + self.mod = mod + self.name = name + super(ImportInfo, self).__init__() + + @staticmethod + def parse(bs, ofs): + mod, n = Name.parse(bs, ofs) + name, m = Name.parse(bs, ofs+n) + return ImportInfo(mod, name), n+m + + def build(self): + return self.mod.build() + self.name.build() + + def __repr__(self): + return "<'{}' in module '{}'>".format(self.name, self.mod) + + +class ImportDesc(WasmItem): + '''Not used by end users''' + __slots__ = ['_importtype', '_content'] + + def __init__(self, importtype, content): + self._importtype = importtype + self._content = content + + @staticmethod + def parse(bs, ofs): + t = ImportType(bs[ofs]) + if t == 'func': + content, n = parser.u32(bs, ofs+1) + elif t == 'table': + content, n = TableType.parse(bs, ofs+1) + elif t == 'mem': + content, n = Limits.parse(bs, ofs+1) + elif t == 'global': + content, n = GlobalType.parse(bs, ofs+1) + else: + raise ContainerParsingException("Error parsing import description") + return ImportDesc(t, content), n+1 + + def build(self): + if self._importtype == 'func': + return self._importtype.build() + serializer.u32(self._content) + return self._importtype.build() + self._content.build() + +@prop('info', ImportInfo) +@prop('desc', ImportDesc) +class Import(WasmItem): + __slots__ = ['_info', '_desc'] + + def __init__(self, info, desc): + self.info = info + self.desc = desc + super(Import, self).__init__() + + @staticmethod + def parse(bs, ofs): + info, n = ImportInfo.parse(bs, ofs) + desc, m = ImportDesc.parse(bs, ofs+n) + return Import(info, desc), n+m + + def build(self): + return self.info.build() + self.desc.build() + + +class ExportDesc(WasmItem): + __slots__ = ['_exporttype', '_idx'] + + def __init__(self, et, idx): + self._exporttype = et + self._idx = idx + super(ExportDesc, self).__init__() + + @staticmethod + def parse(bs, ofs): + t = ImportType(bs[ofs]) + idx, n = parser.u32(bs, ofs+1) + return ExportDesc(t, idx), n+1 + + def build(self): + return self._exporttype.build() + serializer.u32(self._idx) + +@prop('name', Name) +@prop('desc', ExportDesc) +class Export(WasmItem): + __slots__ = ['_name', '_desc'] + def __init__(self, name, desc): + self.name = name + self.desc = desc + super(Export, self).__init__() + + @staticmethod + def parse(bs, ofs): + name, n = Name.parse(bs, ofs) + desc, m = ExportDesc.parse(bs, ofs+n) + return Export(name, desc), n+m + + def build(self): + return self.name.build() + self.desc.build() + + +@prop('valtype', ValType) +class Locals(WasmItem): + __slots__ = ['n', '_valtype'] + + def __init__(self, n, valtype): + self.n = n + self.valtype = valtype + super(Locals, self).__init__() + + @staticmethod + def parse(bs, ofs): + nmb, n = parser.u32(bs, ofs) + valtype, m = ValType.parse(bs, ofs + n) + return Locals(nmb, valtype), n+m + + def build(self): + return serializer.u32(self.n) + self.valtype.build() + + +@prop('name', Name, optional=True) +class Local(ValType): + __slots__ = ['_name'] + + def __init__(self, valtype, name=None): + self.name = name + super(Local, self).__init__(valtype) + + def __repr__(self): + res = super(Local, self).__repr__() + if isinstance(self.name, Name): + res += ' {}'.format(self.name) + return res + +@prop_list('locs', Local) +class FunctionCode(WasmItem): + __slots__ = ['body', '_locs', '_loc_names'] + + def __init__(self, locs=[], body=b"", loc_names=None): + self.body = body + self.locs = [] + for l in locs: + if isinstance(l, Local): + self.locs.append(l) + elif isinstance(l, Locals): + for i in range(l.n): + self.locs.append(Local(l.valtype)) + else: + raise Exception("{} is not valid candidate for function local(s)" + .format(l)) + super(FunctionCode, self).__init__() + + @staticmethod + def parse(bs, ofs=0): + size, n = parser.u32(bs, ofs) + locs, m = WasmItemVec.parse(bs, ofs+n, Locals) + body = bs[ofs+n+m : ofs+n+size] + return FunctionCode(locs, body), n+size + + def build(self): + locs_todo = WasmItemVec([], Locals) + i = 0 + lim = len(self.locs) + while i < lim: + if not isinstance(self.locs[i], Local): + raise Exception("{} is not a valid local" + .format(self.locs[i])) + n = 1 + t = self.locs[i] + i += 1 + while i < lim and self.locs[i] == t: + n += 1 + i += 1 + locs_todo.append(Locals(n = n, valtype = t)) + + res = locs_todo.build() + self.body + return serializer.u32(len(res)) + res + + +@prop_list('params', Local) # Parameters are locals +@prop_list('results', ValType) +class Signature(WasmItem): + __slots__ = ['_params', '_results'] + + def __init__(self, params=[], results=[]): + self.params = params + self.results = results + super(Signature, self).__init__() + + @staticmethod + def parse(bs, ofs=0): + val = byte_to_int(bs[ofs]) + if val != 0x60: + log.error("Function type malformed") + raise Exception("Function type malformed") + params, n = WasmItemVec.parse(bs, ofs+1, ValType) + results, m = WasmItemVec.parse(bs, ofs+n+1, ValType) + return Signature(params, results), 1+n+m + + def build(self): + return b'\x60' + self.params.build() + self.results.build() + + def __repr__(self): + return "({0}) -> ({1})".format(', '.join([repr(i) for i in self._params]), + ', '.join([repr(i) for i in self._results])) + + def __eq__(self, other): + return (isinstance(other, Signature) and + self.build() == other.build()) + + def __deepcopy__(self): + return Signature([i.build() for i in self._params], + [i.build() for i in self._results]) + + +class LocalIndexer(object): + __slots__ = ['_parent'] + + @property + def params(self): + return self._parent.signature.params + + @property + def locs(self): + if isinstance(self._parent, LocalFunction): + return self._parent.code.locs + return None + + def __init__(self, parent_function): + self._parent = parent_function + super(LocalIndexer, self).__init__() + + def __getitem__(self, key): + l = len(self.params) + if key < l: + return self.params[key] + if self.locs is not None: + return self.locs[key-l] + return None + + def __setitem__(self, key, val): #TODO# remove ? can be ambiguous + l = len(self.params) + if key < l: + return self.params.__setitem__(key, val) + if self.locs is not None: + return self.locs.__setitem__(key-l, val) + return None + + def __repr__(self): + if self.locs is None: + return repr(self.params) + return repr(list(self.params) + list(self.locs)) + + def __len__(self): + if self.locs is None: + return len(self.params) + return len(self.params) + len(self.locs) + +@can_be_exported +@can_be_imported +@prop('signature', Signature) +@prop('name', Name, optional = True) +class Function(object): + __slots__ = ['_signature', '_export_name', '_name', '_locals'] + + @property + def locals(self): + return self._locals + + def __init__(self, signature, name = None): + self.signature = signature + self.name = name + self._locals = LocalIndexer(self) + super(Function, self).__init__() + + def __repr__(self): + res = "fn " + if self.name is None: + res += "_?_" + else: + res += self.name + res += repr(self.signature) + if self.is_exported: + res += "\n\tExported as '{}'".format(self.export_name) + return res + + +@prop('import_info', ImportInfo) +class ImportedFunction(Function): + __slots__ = ['_import_info'] + + def __init__(self, import_info, *args, **kwargs): + self.import_info = import_info + super(ImportedFunction, self).__init__(*args, **kwargs) + + def __repr__(self): + res = super(ImportedFunction, self).__repr__() + '\n' + res += '\tImported from:' + repr(self.import_info) + return res + +@prop('code', FunctionCode) +class LocalFunction(Function): + __slots__ = ['_code'] + + def __init__(self, code, *args, **kwargs): + self.code = code + super(LocalFunction, self).__init__(*args, **kwargs) + + def __repr__(self): + res = super(LocalFunction, self).__repr__() + '\n' + return res #TODO# + +class Limits(WasmItem): + __slots__ = ['min', 'max'] + def __init__(self, mini, maxi=None): + self.min = mini + self.max = maxi + super(Limits, self).__init__() + + @staticmethod + def parse(bs, ofs): + mini, n = parser.u32(bs, ofs+1) + if byte_to_int(bs[ofs]) == 1: + maxi, m = parser.u32(bs, ofs+1+n) + return Limits(mini, maxi), 1+n+m + return Limits(mini), 1+n + + def build(self): + if self.max is None: + return b'\x00' + serializer.u32(self.min) + return b'\x01' + serializer.u32(self.min) + serializer.u32(self.max) + +@prop('elemtype', ElemType) +@prop('limits', Limits) +class TableType(WasmItem): + __slots__ = ['_elemtype', '_limits'] + def __init__(self, elemtype, limits): + self.elemtype = elemtype + self.limits = limits + super(TableType, self).__init__() + + @staticmethod + def parse(bs, ofs): + elemtype, n = ElemType.parse(bs, ofs) + limits, m = Limits.parse(bs, ofs+n) + return TableType(elemtype, limits), n+m + + def build(self): + return self.elemtype.build() + self.limits.build() + +@can_be_imported +@can_be_exported +@prop('tabletype', TableType) +class Table(WasmItem): + __slots__ = ['_tabletype', '_export_name'] + + def __init__(self, tabletype): + self.tabletype = tabletype + super(Table, self).__init__() + + @staticmethod + def parse(bs, ofs): + tt, n = TableType.parse(bs, ofs) + return Table(tt), n + + def build(self): + return self.tabletype.build() + + +@prop('import_info', ImportInfo) +class ImportedTable(object): + __slots__ = ['_import_info'] + + def __init__(self, import_info, *args, **kwargs): + self.import_info = import_info + super(ImportedTable, self).__init(*args, **kwargs) + +@can_be_imported +@can_be_exported +@prop('limits', Limits) +class Memory(WasmItem): + __slots__ = ['_limits', '_export_name'] + + def __init__(self, limits): + self.limits = limits + super(Memory, self).__init__() + + @staticmethod + def parse(bs, ofs): + lims, n = Limits.parse(bs, ofs) + return Memory(lims), n + + def build(self): + return self.limits.build() + +@prop('import_info', ImportInfo) +class ImportedMemory(Memory): + __slots__ = ['_import_info'] + + def __init__(self, import_info, *args, **kwargs): + self.import_info = import_info + super(ImportedMemory, self).__init(*args, **kwargs) + +class Element(WasmItem): + __slots__ = ['table', 'offset', 'init'] + + def __init__(self, table, offset, init): + self.table = table + self.offset = offset + self.init = init + super(Element, self).__init__() + + @staticmethod + def parse(bs, ofs): + tidx, n = parser.u32(bs, ofs) + offset, m = parser.const_expr(bs, ofs+n) + length, p = parser.u32(bs, ofs+n+m) + N = n+m+p + init = [] + for i in range(length): + fidx, n = parser.u32(bs, ofs+N) + N += n + init.append(fidx) + return Element(tidx, offset, init), N + + def build(self): + res = serializer.u32(self.table) + self.offset + res += serializer.u32(len(self.init)) + for i in self.init: + res += serializer.u32(i) + return res + +class Data(WasmItem): + __slots__ = ['mem', 'offset', 'init'] + + def __init__(self, mem, offset, init): + self.mem = mem + self.offset = offset + self.init = init + super(Data, self).__init__() + + @staticmethod + def parse(bs, ofs): + midx, N = parser.u32(bs, ofs) + offset, n = parser.const_expr(bs, ofs+N) + N += n + l, n = parser.u32(bs, ofs+N) + N += n + init = bs[ofs+N : ofs+N+l] + return Data(midx, offset, init), N+l + + def build(self): + return (serializer.u32(self.mem) + self.offset + + serializer.u32(len(self.init)) + self.init) + +@prop('valtype', ValType) +@prop('mutable', MutType) +class GlobalType(WasmItem): + __slots__ = ['_valtype', '_mutable'] + + def __init__(self, valtype, mutable): + self.valtype = valtype + self.mutable = mutable + super(GlobalType, self).__init__() + + @staticmethod + def parse(bs, ofs): + typ, n = ValType.parse(bs, ofs) + mut, m = MutType.parse(bs, ofs+n) + return GlobalType(typ, mut), n+m + + def build(self): + return self.valtype.build() + self.mutable.build() + +@can_be_imported +@can_be_exported +@prop('globaltype', GlobalType) +class Global(WasmItem): + __slots__ = ['_globaltype', '_export_name'] + + def __init__(self, globtype): + self.globaltype = globtype + super(Global, self).__init__() + + +@prop('import_info', ImportInfo) +class ImportedGlobal(Global): + __slots__ = ['_import_info'] + + def __init__(self, import_info, *args, **kwargs): + self.import_info = import_info + super(ImportedGlobal, self).__init(*args, **kwargs) + +class LocalGlobal(Global): + __slots__ = ['init'] + + def __init__(self, global_type, init): + self.init = init + super(LocalGlobal, self).__init__(global_type) + + @staticmethod + def parse(bs, ofs): + gt, n = GlobalType.parse(bs, ofs) + init, m = parser.const_expr(bs, ofs+n) + return LocalGlobal(gt, init), n+m + + def build(self): + return self.globaltype.build() + self.init + + +@prop('name', Name) +class NameAssoc(WasmItem): + __slots__ = ['idx', '_name'] + + def __init__(self, idx, name): + self.idx = idx + self.name = name + super(NameAssoc, self).__init__() + + @staticmethod + def parse(bs, ofs): + idx, n = parser.u32(bs, ofs) + name, m = Name.parse(bs, ofs+n) + return NameAssoc(idx, name), n+m + + def build(self): + return serializer.u32(self.idx) + self.name.build() + +@prop_list('assocs', NameAssoc) +class NameMap(WasmItem): + __slots__ = ['_assocs'] + + def __init__(self, assocs): + self.assocs = assocs + super(NameMap, self).__init__() + + @staticmethod + def parse(bs, ofs): + asc, n = WasmItemVec.parse(bs, ofs, NameAssoc) + return NameMap(asc), n + + def build(self): + return self.assocs.build() + +@prop('nmap', NameMap) +class IndirectNameAssoc(WasmItem): + __slots__ = ['idx', '_nmap'] + + def __init__(self, idx, nmap): + self.idx = idx + self.nmap = nmap + super(IndirectNameAssoc, self).__init__() + + @staticmethod + def parse(bs, ofs): + idx, n = parser.u32(bs, ofs) + nmap, m = NameMap.parse(bs, ofs+n) + return IndirectNameAssoc(idx, nmap), n+m + + def build(self): + return serializer.u32(self.idx) + self.nmap.build() + +@prop_list('iassocs', IndirectNameAssoc) +class IndirectNameMap(WasmItem): + __slots__ = ['_iassocs'] + + def __init__(self, iassocs): + self.iassocs = iassocs + super(IndirectNameMap, self).__init__() + + @staticmethod + def parse(bs, ofs): + asc, n = WasmItemVec.parse(bs, ofs, IndirectNameAssoc) + return IndirectNameMap(asc), n + + def build(self): + return self.iassocs.build() + +@prop('typ', NameType) +class NameSubSec(WasmItem): + __slots__ = ['_typ', 'content'] + + def __init__(self, typ, content): + self.typ = typ + self.content = content + super(NameSubSec, self).__init__() + + @staticmethod + def parse(bs, ofs): + t, n = NameType.parse(bs, ofs) + sz, nn = parser.u32(bs, ofs+n) + n += nn + if t == 0: + cnt, m = Name.parse(bs, ofs+n) + elif t == 1: + cnt, m = NameMap.parse(bs, ofs+n) + elif t == 2: + cnt, m = IndirectNameMap.parse(bs, ofs+n) + else: + log.warn("Name section is broken") + if sz != m: + log.warn("Name section inconsistent") + return NameSubSec(t, cnt), n+m + + def build(self): + cnt = self.content.build() + return self.typ.build + serializer.u32(len(cnt)) + cnt + + +class Section(object): + + @staticmethod + def new(wasmstr, offset): + '''Parses the header of the section starting at @wasmstr[@offset:] and returns an instance of the correct section type''' + # Get section type + t = byte_to_int(wasmstr[offset]) + + # Instentiate the correct Section object + if t >= len(SECTIONS) or t < 0: + return UnknownSection(wasmstr, offset) + return SECTIONS[t](wasmstr, offset) + + + def __init__(self, wasmstr, offset): + # Get data and its size + self.payload_size, n = parser.u32(wasmstr, offset+1) + self.size = 1 + n + self.payload_size + self.wasmstr = wasmstr + self.offset = offset+n+1 + self.current = self.offset + + # Parse data + self.parse_content() + + # Verify the quantity of data parsed + if self.payload_size + self.offset != self.current: + log.warn("Section of type {0} is inconsistent: header announces {1} bytes of data but {2} bytes were parsed" + .format(self.stype, self.payload_size, self.current - self.offset)) + + + def parse_content(self): + '''To be implemented by each section type''' + pass + + + def get_bytes(self, n): + '''Returns @n next bytes of content, but does not move the cursor''' + return self.wasmstr[self.current : self.current + n] + + + def pop_bytes(self, n): + '''Returns @n next bytes of content, and moves the cursor @n bytes forward''' + self.current += n + return self.wasmstr[self.current - n : self.current] + + + def pop_parse(self, parse_func, *args, **kwargs): + '''Parses content using parse_func, returns the result, and moves the cursor accordingly''' + res, n = parse_func(self.wasmstr, self.current, *args, **kwargs) + self.pop_bytes(n) + return res + + +class UnknownSection(Section): + stype = -1 + + +class CustomSection(Section): + stype = SHT_CUSTOM + + def parse_content(self): + # Parse section's name + self.name = self.pop_parse(Name.parse) + self.unknown = False + + # Search for symbols if Name section + if self.name == "name": + l = 0 + self.content = [] + lim = self.payload_size + self.offset + while self.current < lim: + self.content.append(self.pop_parse(NameSubSec.parse)) + + else: + log.warn("Unknown custom section '{}' has been ignored".format(self.name)) + self.unknown = True + self.content = self.wasmstr[self.offset:self.offset+self.payload_size] + self.current = self.offset + self.payload_size + + +class TypeSection(Section): + stype = SHT_TYPE + + def parse_content(self): + self.content = self.pop_parse(WasmItemVec.parse, Signature) + + +class ImportSection(Section): + stype = SHT_IMPORT + + def parse_content(self): + self.content = self.pop_parse(WasmItemVec.parse, Import) + + +class FunctionSection(Section): + stype = SHT_FUNCTION + + def parse_content(self): + self.content = [] + n = self.pop_parse(parser.u32) + for i in range(n): + self.content.append(self.pop_parse(parser.u32)) + + +class TableSection(Section): + stype = SHT_TABLE + + def parse_content(self): + self.content = self.pop_parse(WasmItemVec.parse, Table) + + +class MemorySection(Section): + stype = SHT_MEMORY + + def parse_content(self): + self.content = self.pop_parse(WasmItemVec.parse, Memory) + +class GlobalSection(Section): + stype = SHT_GLOBAL + + def parse_content(self): + self.content = self.pop_parse(WasmItemVec.parse, LocalGlobal) + + +class ExportSection(Section): + stype = SHT_EXPORT + + def parse_content(self): + self.content = self.pop_parse(WasmItemVec.parse, Export) + + +class StartSection(Section): + stype = SHT_START + + def parse_content(self): + self.content = self.pop_parse(parser.u32) + + +class ElementSection(Section): + stype = SHT_ELEMENT + + def parse_content(self): + self.content = self.pop_parse(WasmItemVec.parse, Element) + +class CodeSection(Section): + stype = SHT_CODE + + def parse_content(self): + self.content = self.pop_parse(WasmItemVec.parse, FunctionCode) + +class DataSection(Section): + stype = SHT_DATA + + def parse_content(self): + self.content = self.pop_parse(WasmItemVec.parse, Data) + +SECTIONS = [ + CustomSection, + TypeSection, + ImportSection, + FunctionSection, + TableSection, + MemorySection, + GlobalSection, + ExportSection, + StartSection, + ElementSection, + CodeSection, + DataSection, +] + +def add_section_header(section_type): + def builder_decorator(func): + def func_wrapper(*args, **kwargs): + res = func(*args, **kwargs) + l = len(res) + if l == 0: + return res + return int_to_byte(section_type) + serializer.u32(l) + res + return func_wrapper + return builder_decorator + +def find_section_offset(wasmstr, first_section_offset, s_desc, end=False): + '''Returns the offset in @wasmstr at which the section described by @s_desc starts (or ends if @end==True)''' + c = first_section_offset + l = len(wasmstr) + while True: + t = byte_to_int(wasmstr[c]) + size, n = parser.u32(wasmstr, c+1) + if t == s_desc['type'] and (s_desc['name'] == None or s_desc['name'] == Name.parse(wasmstr, c+1+n)[0]): + if end: + return c+1+n+size + return c + c += 1+n+size + if c >= l: + break + return None + +def _sec_desc(s, name=None): + '''A short description of a section: its type (+ its name if custom)''' + if isinstance(s, Section): + if s.stype == SHT_CUSTOM: + return {'type': s.stype, 'name': s.name} + return {'type': s.stype, 'name': None} + return {'type': s, 'name': name} + +def filter_local(lst): + '''Returns the elements of lst that are not imported''' + res = [] + flag = False + for i in lst: + if i.is_imported: + if flag: + raise Exception("Imported and non-imported are mixed up in {}" + .format(lst)) + continue + flag = True + res.append(i) + return res + +def find_imports(lst): + '''Finds the imported elements in lst and returns a list of Import objects''' + if len(lst) == 0: + return [] + + if isinstance(lst[0], Function): + t = ImportType('func') + cb = lambda i, lst: i + elif isinstance(lst[0], Table): + t = ImportType('table') + cb = lambda i, lst: lst[i].tabletype + elif isinstance(lst[0], Memory): + t = ImportType('mem') + cb = lambda i, lst: lst[i].limits + elif isinstance(lst[0], Global): + t = ImportType('global') + cb = lambda i, lst: lst[i].globtype + else: + raise Exception("Error finding import type") + + res = [] + flag = False + for i in range(len(lst)): + if not lst[i].is_imported: + flag = True + continue + if flag: + raise Exception("Imported and non-imported {} are mixed up..." + .format(repr(t))) + res.append(Import(lst[i].import_info, ImportDesc(t, cb(i, lst)))) + return res + +def find_exports(lst): + '''Finds the exported elements in lst and returns a list of Export objects''' + if len(lst) == 0: + return [] + + if isinstance(lst[0], Function): + t = ImportType('func') + elif isinstance(lst[0], Table): + t = ImportType('table') + elif isinstance(lst[0], Memory): + t = ImportType('mem') + elif isinstance(lst[0], Global): + t = ImportType('global') + else: + raise Exception("Error finding export type") + + res = [] + for i in range(len(lst)): + if lst[i].is_exported: + res.append(Export(lst[i].export_name, ExportDesc(t, i))) + return res + +def find_section_offset(wasmstr, first_section_offset, s_desc, end=False): + '''Returns the offset in @wasmstr at which the section described by @s_desc starts (or ends if @end==True)''' + c = first_section_offset + l = len(wasmstr) + while True: + t = byte_to_int(wasmstr[c]) + size, n = parser.u32(wasmstr, c+1) + if t == s_desc['type'] and (s_desc['name'] == None or s_desc['name'] == Name.parse(wasmstr, c+1+n)[0]): + if end: + return c+1+n+size + return c + c += 1+n+size + if c >= l: + break + return None + +@prop('name', Name, optional=True) +class Wasm(object): + __slots__ = ['_slist', '_builders', '_tmp_signatures', + 'functions', 'mems', 'tables', 'globs', + '_name', 'entry', 'elements', 'data', 'header'] + + @classmethod + def from_path(cls, path): + return cls(open(path, 'rb').read()) + + def __init__(self, wasmstr=None): + super(Wasm, self).__init__() + if wasmstr == None: + wasmstr = b"\x00\x61\x73\x6d\x01\x00\x00\x00" # Empty wasm file version 01 + self.header = wasmstr[:8] + magic = struct.unpack('= s.stype: + log.error("Invalid wasm file: section {} is either duplicate or misplaced".format(s.stype)) + raise ContainerParsingException("Duplicate or misplaced section") + else: + last_section = s.stype + else: + # Check that 'name' section follows 'Data' section + # (other custom sections are allowed between 'Data' and 'name' + if s.name == "name" and last_section != SHT_DATA: + log.warn("Section 'name' misplacement: should follow Data Section.") + + log.info("Sections placement validated") + + + def build_content(self): + ''' + Re-builds wasm sections (without the wasm header) and returns them in a StrPatchwork + ''' + res = StrPatchwork() + for builder in self._builders: + res += builder(self) + self._inject_unknown_custom(res) + return res + + @add_section_header(SHT_TYPE) + def _build_type(self): + signs = WasmItemOptionVec([], Signature) + for f in self.functions: + if f.signature not in signs: + signs.append(f.signature) + self._tmp_signatures = signs + return signs.build() + + @add_section_header(SHT_IMPORT) + def _build_import(self): + imprts = WasmItemOptionVec([], Import) + for i in [self.functions, self.tables, self.mems, self.globs]: + imprts.extend(find_imports(i)) + return imprts.build() + + @add_section_header(SHT_FUNCTION) + def _build_function(self): + idxs = [] + for f in filter_local(self.functions): + for i in range(len(self._tmp_signatures)): + if self._tmp_signatures[i] == f.signature: + idxs.append(i) + break + return serializer.u32(len(idxs)) + b''.join([serializer.u32(i) for i in idxs]) + + @add_section_header(SHT_TABLE) + def _build_table(self): + return WasmItemOptionVec([t.tabletype for t in filter_local(self.tables)], + TableType).build() + + @add_section_header(SHT_MEMORY) + def _build_memory(self): + return WasmItemOptionVec([m.limits for m in filter_local(self.mems)], + Limits).build() + + @add_section_header(SHT_GLOBAL) + def _build_global(self): + return WasmItemOptionVec(filter_local(self.globs), + Global).build() + + @add_section_header(SHT_EXPORT) + def _build_export(self): + exprts = WasmItemOptionVec([], Export) + for i in [self.functions, self.tables, self.mems, self.globs]: + exprts.extend(find_exports(i)) + return exprts.build() + + @add_section_header(SHT_START) + def _build_start(self): + if self.entry is not None: + return serializer.u32(self.entry) + return b'' + + @add_section_header(SHT_ELEMENT) + def _build_element(self): + return self.elements.build() + + @add_section_header(SHT_CODE) + def _build_code(self): + return WasmItemOptionVec([f.code for f in filter_local(self.functions)], + FunctionCode).build() + + @add_section_header(SHT_DATA) + def _build_data(self): + return self.data.build() + + @add_section_header(SHT_CUSTOM) + def _build_name(self): + res = b'' + # Add module name, if any + if self.name is not None: + res += b'\x00' + serializer.u32(len(self.name)) + self.name + + # Look for function or local names + fnames = WasmItemVec([], NameAssoc) + lnames = WasmItemVec([], IndirectNameAssoc) + for i in range(len(self.functions)): + f = self.functions[i] + if hasattr(f, 'name') and f.name is not None: + fnames.append(NameAssoc(i, f.name)) + assocs = [] + for j in range(len(f.locals)): + loc = f.locals[j] + if hasattr(loc, 'name') and loc.name is not None: + assocs.append(NameAssoc(j, loc.name)) + if len(assocs) > 0: + lnames.append(IndirectNameAssoc(i, NameMap(assocs))) + + if len(fnames) > 0: + tmp = fnames.build() + res += b'\x01' + serializer.u32(len(tmp)) + tmp + if len(lnames) > 0: + tmp = lnames.build() + res += b'\x02' + serializer.u32(len(tmp)) + tmp + if len(res) != 0: + res = Name('name').build() + res + return res + + + @add_section_header(SHT_CUSTOM) + def _build_unknown_custom(self, s): + return s.content + + + def _inject_unknown_custom(self, out): + ''' + Try to re-inject custom sections that were not parsed. + To do so, the type (and name if custom) of the sections directly before and after a block of unknown sections\ + when the file was parsed must be the same as in the output build. + If this is not possible, the block of custom sections are placed at the end of the output + ''' + todo = [] + i = 0 + l = len(self._slist) + while i != l: + s = self._slist[i] + if s.stype == SHT_CUSTOM and s.unknown: + block = {'content': b'', 'prev': None, 'next': None} + if i > 0: + block['prev'] = _sec_desc(self._slist[i-1]) + while i!=l: + s = self._slist[i] + if not (s.stype == SHT_CUSTOM and s.unknown): + break + block['content'] += self._build_unknown_custom(s) + i += 1 + if i!=l: + block['next'] = _sec_desc(self._slist[i]) + todo.append(block) + i += 1 + for t in todo: + if t['prev'] is not None: + ofs = find_section_offset(out, 0, t['prev'], end=True) + if t['next'] is not None: + ofs2 = find_section_offset(out, 0, t['next'], end=False) + if ofs != ofs2: + ofs = None + elif t['next'] is not None: + ofs = find_section_offset(out, 0, t['next'], end=False) + else: + ofs = 0 + if ofs is not None: + out[ofs:ofs] = t['content'] + else: + log.warn("Some unknown custom sections were added at the end of the build because I couldn't gess where to put them...") + out += t['content'] diff --git a/miasm/loader/wasm_utils.py b/miasm/loader/wasm_utils.py new file mode 100644 index 000000000..0b28b5ff4 --- /dev/null +++ b/miasm/loader/wasm_utils.py @@ -0,0 +1,57 @@ +import struct +import collections +from future.utils import PY2 + +def byte_to_int(b): + if type(b) == int: + return b + return struct.unpack('B', b)[0] + +def int_to_byte(i): + return struct.pack('B', i) + +def encode_LEB128(uint): + ''' + Encode a LEB128 unsigned integer from the (positive) integer uint + Returns bytes + @uint: integer to encode + ''' + if uint == 0: + return b'\x00' + bts = [] + while uint != 0: + byte = uint &0x7f + uint >>= 7 + if uint != 0: + byte |= 0x80 + bts.append(struct.pack('B', byte)) + return b''.join(bts) + +def decode_LEB128(bs): + ''' + Decode a LEB128-encoded unsigned integer at the beginning of the byte string bs + Returns a tuple (res, n_bytes) + @bs: byte string + -res: the decoded integer + -n_bytes: the number of bytes it was encoded on + ''' + res = 0 + n = 0 + for b in bs: + if PY2: + b = struct.unpack('B', b)[0] + res |= (b&0x7f) << n*7 + n += 1 + if b&0x80 == 0: + break + bs = bs[n:] + return res, n + +def list_eq(l, m): + ''' + Test if @l and @m contain the same elements + Elements have to be hashable + ''' + return collections.Counter(l) == collections.Counter(m) + + diff --git a/setup.py b/setup.py index e8ea7b3a6..1930f9bcb 100644 --- a/setup.py +++ b/setup.py @@ -31,6 +31,7 @@ def buil_all(): "miasm/arch/sh4", "miasm/arch/mips32", "miasm/arch/ppc", + "miasm/arch/wasm", "miasm/core", "miasm/expression", "miasm/ir", diff --git a/test/arch/wasm/arch.py b/test/arch/wasm/arch.py new file mode 100644 index 000000000..725430967 --- /dev/null +++ b/test/arch/wasm/arch.py @@ -0,0 +1,76 @@ +from __future__ import print_function + +import time +from pdb import pm +from miasm.core.utils import decode_hex, encode_hex +from miasm.arch.wasm.arch import * +from miasm.core.locationdb import LocationDB + +loc_db = LocationDB() + +def h2i(s): + return decode_hex(s.replace(' ', '')) + + +def u16swap(i): + return struct.unpack('H', i))[0] + +reg_tests_wasm = [ + ("xxxx unreachable ", + "00"), + ("xxxx nop ", + "01"), + ("xxxx i32.or ", + "72"), + ("xxxx i32.const 0xF5", + "41f501"), + ("xxxx i32.const 0x0", + "4100"), + ("xxxx i32.const 0xFFFFFFFF", + "417f"), + ("xxxx i64.const 0xFFFFFFFF", + "42ffffffff0f"), + ("xxxx i32.const 0x7FFFFFFF", + "41ffffffff07"), + ("xxxx i64.const 0x7FFFFFFF", + "42ffffffff07"), + ("xxxx loop (result i32)", + "037f"), + ("xxxx block ", + "0240"), + ("xxxx br 0x0", + "0c00"), + ("xxxx call 0x40", + "10C000"), + ("xxxx local.set 0x99", + "219901"), + ("xxxx i32.load8_s offset=0x3 align=0x2", + "2C0302"), + ("xxxx i32.load8_s offset=0xFF align=0x2", + "2CFF0102"), + ("xxxx br_table 0x1 0x2 0x3 0x4 0x5 0x6 0x0", + "0E0601020304050600"), + ("xxxx br_table 0x0", + "0E0000"), + ("xxxx br_table 0xFF 0x1 0x2 0x3", + "0E03FF01010203"), +] + +ts = time.time() + +for s, l in reg_tests_wasm: + print("-" * 80) + s = s[8:] + b = h2i((l)) + print(repr(b)) + mn = mn_wasm.dis(b, None) + print([str(x) for x in mn.args]) + print("'{}'".format(s)) + print("'{}'".format(mn)) + assert(str(mn) == s) + l = mn_wasm.fromstring(s, loc_db, None) + assert(str(l) == s) + a = mn_wasm.asm(l) + print([x for x in a]) + print(repr(b)) + assert(b in a) diff --git a/test/arch/wasm/sem.py b/test/arch/wasm/sem.py new file mode 100755 index 000000000..cf73444f9 --- /dev/null +++ b/test/arch/wasm/sem.py @@ -0,0 +1,140 @@ +#! /usr/bin/env python2 +#-*- coding:utf-8 -*- + +from __future__ import print_function +import unittest +import logging + +from future.utils import viewitems + +from miasm.ir.symbexec import SymbolicExecutionEngine +from miasm.arch.wasm.arch import mn_wasm as mn +from miasm.arch.wasm.arch import * +from miasm.arch.wasm.sem import ir_wasm as ir_arch +from miasm.arch.wasm.regs import * +from miasm.expression.expression import * +from miasm.core.locationdb import LocationDB + +logging.getLogger('cpuhelper').setLevel(logging.ERROR) +#EXCLUDE_REGS = set([res, ir_arch().IRDst]) +EXCLUDE_REGS = set([ir_arch().IRDst]) + +mode = None + +def M(addr): + return ExprMem(ExprInt(addr, 16), 16) + + +def compute(asm, inputstate={}, debug=False): + loc_db = LocationDB() + sympool = dict(regs_init) + sympool.update({k: ExprInt(v, k.size) for k, v in viewitems(inputstate)}) + ir_tmp = ir_arch(loc_db) + ircfg = ir_tmp.new_ircfg() + symexec = SymbolicExecutionEngine(ir_tmp, sympool) + instr = mn.fromstring(asm, loc_db, mode) + code = mn.asm(instr)[0] + instr = mn.dis(code, mode) + instr.offset = inputstate.get(PC, 0) + loc_key = ir_tmp.add_instr_to_ircfg(instr, ircfg) + symexec.run_at(ircfg, loc_key) + if debug: + for k, v in viewitems(symexec.symbols): + if regs_init.get(k, None) != v: + print(k, v) + print(symexec.symbols) + #fds + return None + return { + k: v.arg.arg for k, v in viewitems(symexec.symbols) + if k not in EXCLUDE_REGS and regs_init.get(k, None) != v + } + +def computemany(asm_l, inputstate={}, debug=False): + loc_db = LocationDB() + sympool = dict(regs_init) + sympool.update({k: ExprInt(v, k.size) for k, v in viewitems(inputstate)}) + ir_tmp = ir_arch(loc_db) + ircfg = ir_tmp.new_ircfg() + symexec = SymbolicExecutionEngine(ir_tmp, sympool) + i = 0 + print('----------\n{}\n----------'.format('START')) + print(symexec.symbols) + for asm in asm_l: + instr = mn.fromstring(asm, loc_db, mode) + code = mn.asm(instr)[0] + instr = mn.dis(code, mode) + instr.offset = inputstate.get(PC, i*8) + i += 1 + loc_key = ir_tmp.add_instr_to_ircfg(instr, ircfg) + symexec.run_at(ircfg, loc_key) + + print('\n\n----------\n{}\n----------'.format(str(instr))) + print(symexec.symbols) + + if debug: + for k, v in viewitems(symexec.symbols): + if regs_init.get(k, None) != v: + print(k, v) + #fds + return None + return { + k: v.arg.arg for k, v in viewitems(symexec.symbols) + if k not in EXCLUDE_REGS and regs_init.get(k, None) != v + } + +class TestWasmSemantic(unittest.TestCase): + def test_const(self): + self.assertEqual(compute('i64.const 0x34'), + {}) + +if __name__ == '__main__': + testsuite = unittest.TestLoader().loadTestsFromTestCase(TestWasmSemantic) + #report = unittest.TextTestRunner(verbosity=2).run(testsuite) + #exit(len(report.errors + report.failures)) + print(computemany([ + 'i64.const 0x12', + 'i32.const 0x34', + 'drop', + 'i64.const 0x10', + 'i64.const 0x56', + 'i64.add', + 'i64.xor', + 'i64.eqz', + 'drop', # No value on stack here + 'i32.const 0x1', + 'drop', + 'i32.const 0x2', + 'i32.eqz', + 'drop', + 'i64.const 0x1', + 'drop', + 'i32.const 0x2', + 'i32.eqz', + 'i32.const 0x0', + 'i32.eqz', + 'i32.const 0x4', + 'i32.const 0x3', + 'i32.le_u', + 'i32.const 0x4', + 'i32.const 0x5', + 'i32.le_u', + 'i32.const 0x4', + 'i32.const 0x4', + 'i32.lt_u', + 'drop', + 'drop', + 'drop', + 'drop', + 'drop', # No value on stack here + 'i64.const -0x2', + 'i64.popcnt', + 'drop', + 'i64.const -0x1', + 'i32.wrap_i64', + 'i64.extend_i32_s', + 'i32.wrap_i64', + 'i64.extend_i32_u', + 'drop', # No value on stack here + ])) +