diff --git a/example/jitter/run_with_linuxenv.py b/example/jitter/run_with_linuxenv.py index 9290e6a83..6826661b9 100644 --- a/example/jitter/run_with_linuxenv.py +++ b/example/jitter/run_with_linuxenv.py @@ -58,6 +58,7 @@ ld_path = linux_env.filesystem.resolve_path(ld_path) cont_ld = Container.from_stream( open(ld_path, "rb"), + loc_db=loc_db, vm=jitter.vm, addr=0x80000000, apply_reloc=True diff --git a/example/jitter/unpack_upx.py b/example/jitter/unpack_upx.py index a75e9a797..16067e29b 100644 --- a/example/jitter/unpack_upx.py +++ b/example/jitter/unpack_upx.py @@ -25,7 +25,9 @@ def kernel32_GetProcAddress(jitter): # Get the generated address of the library, and store it in memory to # dst_ad - ad = sb.loader.lib_get_add_func(args.libbase, fname, dst_ad) + #ad = sb.loader.lib_get_add_func(args.libbase, fname, dst_ad) + name = sb.loader.module_base_address_to_name[args.libbase] + ad = sb.loader.resolve_function(name, fname, dst_ad=dst_ad) # Add a breakpoint in case of a call on the resolved function # NOTE: never happens in UPX, just for skeleton jitter.handle_function(ad) diff --git a/miasm/analysis/dse.py b/miasm/analysis/dse.py index 3a7ea3c18..1d922aeff 100644 --- a/miasm/analysis/dse.py +++ b/miasm/analysis/dse.py @@ -257,14 +257,14 @@ def add_lib_handler(self, loader, namespace): # lambda cannot contain statement def default_func(dse): - fname = loader.fad2cname[dse.jitter.pc] + fname = loader.module_base_address_to_name(dse.jitter.pc) if isinstance(fname, tuple): fname = b"%s_%d_symb" % (force_bytes(fname[0]), fname[1]) else: fname = b"%s_symb" % force_bytes(fname) raise RuntimeError("Symbolic stub '%s' not found" % fname) - for addr, fname in viewitems(loader.fad2cname): + for addr, fname in viewitems(loader.function_address_to_canonical_name): if isinstance(fname, tuple): fname = b"%s_%d_symb" % (force_bytes(fname[0]), fname[1]) else: diff --git a/miasm/analysis/sandbox.py b/miasm/analysis/sandbox.py index e94bd0b3f..4bb2bf8b9 100644 --- a/miasm/analysis/sandbox.py +++ b/miasm/analysis/sandbox.py @@ -232,7 +232,7 @@ def init_loader(self, options): loader_start_address = int(options.loader_start_address, 0) else: loader_start_address = None - loader = LoaderWindows(loader_start_address=loader_start_address) + loader = LoaderWindows(self.jitter.vm, loader_start_address=loader_start_address) self.loader = loader winobjs.loader = loader @@ -242,7 +242,7 @@ def use_windows_structs(self): win_api_x86_32_seh.main_pe_name = self.fname_basename win_api_x86_32_seh.main_pe = self.pe win_api_x86_32.winobjs.hcurmodule = self.pe.NThdr.ImageBase - win_api_x86_32_seh.name2module = self.name2module + win_api_x86_32_seh.name2module = self.loader.module_name_to_module win_api_x86_32_seh.set_win_fs_0(self.jitter) win_api_x86_32_seh.init_seh(self.jitter) @@ -250,52 +250,11 @@ def load_main_pe(self, options): from miasm.jitter.loader.pe import vm_load_pe from miasm.os_dep.win_api_x86_32 import winobjs - self.name2module = {} - - # Load main pe - with open(options.filename, "rb") as fstream: - self.pe = vm_load_pe( - self.jitter.vm, - fstream.read(), - load_hdr=options.load_hdr, - name=options.filename, - winobjs=winobjs, - ) - self.name2module[self.fname_basename] = self.pe + module_image_base = self.loader.load_module(options.filename) + name = self.loader.module_base_address_to_name[module_image_base] + self.pe = self.loader.module_name_to_module[name] winobjs.current_pe = self.pe - def load_base_dll(self): - from miasm.os_dep.win_api_x86_32 import winobjs - from miasm.jitter.loader.pe import vm_load_pe_libs, fix_pe_imports - - # Load libs in memory - self.name2module.update( - vm_load_pe_libs( - self.jitter.vm, - self.LOADED_DLLS, - self.loader, - self.PATH_DLLS, - winobjs=winobjs, - ) - ) - - # Patch libs imports - for name, pe in self.name2module.items(): - fix_pe_imports(self.jitter.vm, pe, self.loader, pe_name=name) - - def load_dependencies(self): - from miasm.os_dep.win_api_x86_32 import winobjs - from miasm.jitter.loader.pe import vm_load_pe_and_dependencies - - vm_load_pe_and_dependencies( - self.jitter.vm, - self.fname_basename, - self.name2module, - self.loader, - self.PATH_DLLS, - winobjs=winobjs, - ) - def set_call_handler(self, custom_methods): # Library calls handler from miasm.os_dep import win_api_x86_32 @@ -306,14 +265,6 @@ def set_call_handler(self, custom_methods): methods.update(custom_methods) self.jitter.add_lib_handler(self.loader, methods) - def fix_pe_imports(self): - # Fix pe imports - from miasm.jitter.loader.pe import fix_pe_imports - - fix_pe_imports( - self.jitter.vm, self.pe, self.loader, pe_name=self.fname_basename - ) - def __init__(self, jitter, options, custom_methods=None): self.fname_basename = os.path.basename(options.filename).lower() self.jitter = jitter @@ -321,12 +272,7 @@ def __init__(self, jitter, options, custom_methods=None): self.init_stack() self.init_loader(options) self.load_main_pe(options) - if options.loadbasedll: - self.load_base_dll() - if options.dependencies: - self.load_dependencies() - self.fix_pe_imports() self.set_call_handler(custom_methods) # Manage SEH @@ -419,7 +365,7 @@ def init_loader(self, options): loader_start_address = int(options.loader_start_address, 0) else: loader_start_address = None - loader = LoaderWindows(apiset=apiset, loader_start_address=loader_start_address) + loader = LoaderWindows(self.jitter.vm, apiset=apiset, loader_start_address=loader_start_address) self.loader = loader winobjs.loader = loader @@ -503,7 +449,7 @@ def __init__(self, jitter, options, custom_methods=None): self.jitter.init_stack() # Import manager - self.loader = LoaderUnix() + self.loader = LoaderUnix(self.jitter.vm) with open(options.filename, "rb") as fstream: self.elf = vm_load_elf( @@ -562,7 +508,7 @@ def __init__(self, jitter, options, custom_methods=None): self.jitter.init_stack() # Import manager - self.loader = LoaderUnix() + self.loader = LoaderUnix(self.jitter.vm) data = open(options.filename, "rb").read() options.load_base_addr = int(options.load_base_addr, 0) diff --git a/miasm/jitter/jitload.py b/miasm/jitter/jitload.py index 20d795e65..edb6e441f 100644 --- a/miasm/jitter/jitload.py +++ b/miasm/jitter/jitload.py @@ -491,7 +491,7 @@ def handle_lib(jitter): """Resolve the name of the function which cause the handler call. Then call the corresponding handler from users callback. """ - fname = jitter.libs.fad2cname[jitter.pc] + fname = jitter.loader.function_address_to_canonical_name[jitter.pc] if fname in jitter.user_globals: func = jitter.user_globals[fname] else: @@ -510,21 +510,21 @@ def handle_function(self, f_addr): """Add a breakpoint which will trigger the function handler""" self.add_breakpoint(f_addr, self.handle_lib) - def add_lib_handler(self, libs, user_globals=None): - """Add a function to handle libs call with breakpoints - @libs: libimp instance + def add_lib_handler(self, loader, user_globals=None): + """Add a function to handle loader call with breakpoints + @loader: Loader instance @user_globals: dictionary for defined user function """ if user_globals is None: user_globals = {} - self.libs = libs + self.loader = loader out = {} for name, func in viewitems(user_globals): out[name] = func self.user_globals = out - for f_addr in libs.fad2cname: + for f_addr in loader.function_address_to_canonical_name: self.handle_function(f_addr) def eval_expr(self, expr): diff --git a/miasm/jitter/loader/elf.py b/miasm/jitter/loader/elf.py index 9aa11143a..97100a36e 100644 --- a/miasm/jitter/loader/elf.py +++ b/miasm/jitter/loader/elf.py @@ -43,7 +43,7 @@ def preload_elf(vm, e, loader, patch_vm_imp=True, loc_db=None): continue for ad in ads: ad_base_lib = loader.lib_get_add_base(libname) - ad_libfunc = loader.lib_get_add_func(ad_base_lib, libfunc, ad) + ad_libfunc = loader.resolve_function(vm, ad_base_lib, libfunc, ad) libname_s = canon_libname_libfunc(libname, libfunc) dyn_funcs[libname_s] = ad_libfunc @@ -317,7 +317,17 @@ def vm_load_elf(vm, fdata, name="", base_addr=0, loc_db=None, apply_reloc=False, class LoaderUnix(Loader): - pass + + def lib_get_add_base(self, name): + name = name.lower().strip(' ') + if name in self.module_name_to_base_address: + ad = self.module_name_to_base_address[name] + else: + ad = self.fake_library_entry(name) + return ad + + def resolve_function(self, vm, libad, imp_ord_or_name, dst_ad=None): + return self.fake_resolve_function(libad, imp_ord_or_name, dst_ad=dst_ad) class libimp_elf(LoaderUnix): diff --git a/miasm/jitter/loader/pe.py b/miasm/jitter/loader/pe.py index 3b871886c..f2b9b1a57 100644 --- a/miasm/jitter/loader/pe.py +++ b/miasm/jitter/loader/pe.py @@ -26,41 +26,6 @@ match_hyphen_digit = re.compile(".*-[\d]+-[\d]+$") -def get_pe_dependencies(pe_obj): - """Collect the shared libraries upon which this PE depends. - @pe_obj: pe object - Returns a set of strings of DLL names. - Example: - container = miasm.analysis.binary.Container.from_string(buf) - deps = miasm.jitter.loader.pe.get_pe_dependencies(container.executable) - assert sorted(deps)[0] == 'api-ms-win-core-appcompat-l1-1-0.dll' - """ - - if pe_obj.DirImport.impdesc is None: - return set() - out = set() - for dependency in pe_obj.DirImport.impdesc: - libname = dependency.dlldescname.name.lower() - # transform bytes to str - libname = force_str(libname) - out.add(libname) - - # If binary has redirected export, add dependencies - if pe_obj.DirExport.expdesc != None: - addrs = get_export_name_addr_list(pe_obj) - for imp_ord_or_name, ad in addrs: - # if export is a redirection, search redirected dll - # and get function real addr - ret = is_redirected_export(pe_obj, ad) - if ret is False: - continue - dllname, func_info = ret - dllname = dllname + '.dll' - out.add(dllname) - - return out - - def get_import_address_pe(e): """Compute the addresses of imported symbols. @e: pe object @@ -92,29 +57,6 @@ def get_import_address_pe(e): return import2addr -def fix_pe_imports(vm, e, loader, patch_vm_imp=True, pe_name=None): - import_information = get_import_address_pe(e) - dyn_funcs = {} - # log.debug('imported funcs: %s' % import_information) - for (libname, funcname), ads in viewitems(import_information): - for ad in ads: - libname = force_str(libname) - if loader.apiset: - libname = loader.apiset.get_redirection(libname, pe_name) - if libname.startswith("api-ms"): - fds - ad_base_lib = loader.lib_get_add_base(libname) - ad_funcname = loader.lib_get_add_func(ad_base_lib, funcname, ad) - - libname_s = canon_libname_libfunc(libname, funcname) - dyn_funcs[libname_s] = ad_funcname - if patch_vm_imp: - vm.set_mem( - ad, struct.pack(cstruct.size2type[e._wsize], ad_funcname) - ) - return dyn_funcs - - def is_redirected_export(pe_obj, addr): """Test if the @addr is a forwarded export address. If so, return dllname/function name couple. If not, return False. @@ -143,7 +85,7 @@ def is_redirected_export(pe_obj, addr): return dllname, func_info -def get_export_name_addr_list(e): +def get_export_name_addr_list(e, parent=None): """Collect names/ordinals and addresses of symbols exported by the given PE. @e: PE instance Returns a list of tuples: @@ -172,6 +114,8 @@ def get_export_name_addr_list(e): if not s.rva: continue out.append((i + e.DirExport.expdesc.base, e.rva2virt(s.rva))) + #if parent and parent.startswith("shlwapi") and i + e.DirExport.expdesc.base == 270: + # fds return out @@ -192,7 +136,7 @@ def vm_load_pe(vm, fdata, align_s=True, load_hdr=True, name="", winobjs=None, ba # Parse and build a PE instance pe = pe_init.PE(fdata, **kargs) - # Optionaly rebase PE + # Optionally rebase PE if base_addr is not None: pe.reloc_to(base_addr) @@ -303,43 +247,6 @@ def vm_load_pe(vm, fdata, align_s=True, load_hdr=True, name="", winobjs=None, ba return pe -def vm_load_pe_lib(vm, fname_in, loader, lib_path_base, **kargs): - """Call vm_load_pe on @fname_in and update @loader accordingly - @vm: VmMngr instance - @fname_in: library name - @loader: LoaderWindows instance - @lib_path_base: DLLs relative path - Return the corresponding PE instance - Extra arguments are passed to vm_load_pe - """ - - log.info('Loading module %r', fname_in) - - fname = os.path.join(lib_path_base, fname_in) - with open(fname, "rb") as fstream: - pe = loader.vm_load_pe(vm, fstream.read(), name=fname_in, **kargs) - loader.add_export_lib(pe, fname_in) - return pe - - -def vm_load_pe_libs(vm, libs_name, loader, lib_path_base, **kargs): - """Call vm_load_pe_lib on each @libs_name filename - @vm: VmMngr instance - @libs_name: list of str - @loader: LoaderWindows instance - @lib_path_base: (optional) DLLs relative path - Return a dictionary Filename -> PE instances - Extra arguments are passed to vm_load_pe_lib - """ - out = {} - for fname in libs_name: - assert isinstance(fname, str) - out[fname] = vm_load_pe_lib(vm, fname, loader, lib_path_base, **kargs) - - return out - - - def vm2pe(myjit, fname, loader=None, e_orig=None, min_addr=None, max_addr=None, min_section_offset=0x1000, img_base=None, @@ -389,11 +296,6 @@ def vm2pe(myjit, fname, loader=None, e_orig=None, data=all_mem[ad]['data']) first = False if loader: - if added_funcs is not None: - for addr, funcaddr in added_funcs: - libbase, dllname = loader.fad2info[funcaddr] - loader.lib_get_add_func(libbase, dllname, addr) - filter_import = kwargs.get( 'filter_import', lambda _, ad: mye.virt.is_addr_in(ad)) new_dll = loader.gen_new_lib(mye, filter_import) @@ -430,108 +332,28 @@ def vm2pe(myjit, fname, loader=None, e_orig=None, class LoaderWindows(Loader): - def __init__(self, *args, apiset=None, loader_start_address=None, **kwargs): - super(LoaderWindows, self).__init__(*args, **kwargs) + def __init__(self, vm, apiset=None, loader_start_address=None, *args, **kwargs): + super(LoaderWindows, self).__init__(vm, *args, **kwargs) + self.library_path = ["win_dll", "./"] # dependency -> redirector self.created_redirected_imports = {} + self.module_name_to_module = {} self.apiset = apiset self.loader_start_address = loader_start_address + def lib_get_add_base(self, name): + name = name.lower().strip(' ') + if not "." in name: + log.warning('warning adding .dll to modulename') + name += '.dll' + log.warning(name) - def add_function(self, dllname, imp_ord_or_name, addr): - assert isinstance(dllname, str) - assert isinstance(imp_ord_or_name, (int, str)) - libad = self.name2off[dllname] - c_name = canon_libname_libfunc( - dllname, imp_ord_or_name - ) - update_entry = True - if addr in self.fad2info: - known_libad, known_imp_ord_or_name = self.fad2info[addr] - if isinstance(imp_ord_or_name, int): - update_entry = False - self.cname2addr[c_name] = addr - log.debug("Add func %s %s", hex(addr), c_name) - if update_entry: - log.debug("Real Add func %s %s", hex(addr), c_name) - self.fad2cname[addr] = c_name - self.fad2info[addr] = libad, imp_ord_or_name - - - def add_export_lib(self, e, name): - if name in self.created_redirected_imports: - log.error("%r has previously been created due to redirect\ - imports due to %r. Change the loading order.", - name, self.created_redirected_imports[name]) - raise RuntimeError('Bad import: loading previously created import') - - self.all_exported_lib.append(e) - # will add real lib addresses to database - if name in self.name2off: - ad = self.name2off[name] - if e is not None and name in self.fake_libs: - log.error( - "You are trying to load %r but it has been faked previously. Try loading this module earlier.", name) - raise RuntimeError("Bad import") + if name in self.module_name_to_base_address: + ad = self.module_name_to_base_address[name] else: - log.debug('new lib %s', name) - ad = e.NThdr.ImageBase - libad = ad - self.name2off[name] = ad - self.libbase2lastad[ad] = ad + 0x1 - self.lib_imp2ad[ad] = {} - self.lib_imp2dstad[ad] = {} - self.libbase_ad += 0x1000 - - ads = get_export_name_addr_list(e) - todo = list(ads) - # done = [] - while todo: - # for imp_ord_or_name, ad in ads: - imp_ord_or_name, ad = todo.pop() - - # if export is a redirection, search redirected dll - # and get function real addr - ret = is_redirected_export(e, ad) - if ret: - exp_dname, exp_fname = ret - exp_dname = exp_dname + '.dll' - exp_dname = exp_dname.lower() - # if dll auto refes in redirection - if exp_dname == name: - libad_tmp = self.name2off[exp_dname] - if isinstance(exp_fname, str): - exp_fname = bytes(ord(c) for c in exp_fname) - found = None - for tmp_func, tmp_addr in ads: - if tmp_func == exp_fname: - found = tmp_addr - assert found is not None - ad = found - else: - # import redirected lib from non loaded dll - if not exp_dname in self.name2off: - self.created_redirected_imports.setdefault( - exp_dname, set()).add(name) - - # Ensure import entry is created - new_lib_base = self.lib_get_add_base(exp_dname) - # Ensure function entry is created - _ = self.lib_get_add_func(new_lib_base, exp_fname) - - libad_tmp = self.name2off[exp_dname] - ad = self.lib_imp2ad[libad_tmp][exp_fname] - - self.lib_imp2ad[libad][imp_ord_or_name] = ad - name_inv = dict( - (value, key) for key, value in viewitems(self.name2off) - ) - c_name = canon_libname_libfunc( - name_inv[libad], imp_ord_or_name) - self.fad2cname[ad] = c_name - self.cname2addr[c_name] = ad - log.debug("Add func %s %s", hex(ad), c_name) - self.fad2info[ad] = libad, imp_ord_or_name + ad = self.fake_library_entry(name) + return ad + def gen_new_lib(self, target_pe, filter_import=lambda peobj, ad: True, **kwargs): """Gen a new DirImport description @@ -540,12 +362,24 @@ def gen_new_lib(self, target_pe, filter_import=lambda peobj, ad: True, **kwargs) """ new_lib = [] - for lib_name, ad in viewitems(self.name2off): + module_to_dsts = {} + for canonical_name, dsts in self.canonical_name_to_dst_addr.items(): + address = self.function_canonical_name_to_address[canonical_name] + module_name, imp_ord_or_name = self.function_address_to_info[address] + if module_name not in module_to_dsts: + module_to_dsts[module_name] = {} + module_to_dsts[module_name].setdefault(imp_ord_or_name, set()).update(dsts) + #for lib_name, ad in viewitems(self.module_name_to_base_address): + for module_name, info_dsts in module_to_dsts.items(): # Build an IMAGE_IMPORT_DESCRIPTOR # Get fixed addresses out_ads = dict() # addr -> func_name - for func_name, dst_addresses in viewitems(self.lib_imp2dstad[ad]): + """ + if ad not in self.lib_imp2dstad: + continue + """ + for func_name, dst_addresses in info_dsts.items(): out_ads.update({addr: func_name for addr in dst_addresses}) # Filter available addresses according to @filter_import @@ -580,7 +414,7 @@ def gen_new_lib(self, target_pe, filter_import=lambda peobj, ad: True, **kwargs) except pe.InvalidOffset: pass else: - new_lib.append(({"name": lib_name, + new_lib.append(({"name": module_name, "firstthunk": rva}, funcs) ) @@ -590,9 +424,9 @@ def gen_new_lib(self, target_pe, filter_import=lambda peobj, ad: True, **kwargs) return new_lib - def vm_load_pe(self, vm, fdata, align_s=True, load_hdr=True, name="", winobjs=None, **kargs): + def vm_load_pe(self, fdata, align_s=True, load_hdr=True, name="", winobjs=None, **kargs): pe = vm_load_pe( - vm, fdata, + self.vm, fdata, align_s=align_s, load_hdr=load_hdr, name=name, winobjs=winobjs, @@ -603,119 +437,135 @@ def vm_load_pe(self, vm, fdata, align_s=True, load_hdr=True, name="", winobjs=No self.loader_start_address += pe.NThdr.sizeofimage + 0x1000 return pe + def find_module_path(self, module_name): + """ + Find the real path of module_name + """ + module_name = module_name.lower() + for path in self.library_path: + fname = os.path.join(path, module_name) + if os.access(fname, os.R_OK): + return fname + if module_name in self.unresolved_modules_names: + return None + self.fake_library_entry(module_name) + return None + + + def resolve_function(self, module_name, imp_ord_or_name, parent=None, dst_ad=None): + """ + Resolve the function named @imp_ord_or_name of the module @module_name + Optionally use @parent for ApiSet resolution + Use @dst_ad to hint the destination address of the function + """ + if self.apiset: + # First, try to resolve ApiSet + module_name = self.apiset.get_redirection(module_name, parent) + + if module_name in self.unresolved_modules_names: + module_base_addr = self.module_name_to_base_address[module_name] + addr = self.fake_resolve_function(module_base_addr, imp_ord_or_name, dst_ad=dst_ad) + self.add_function(module_name, imp_ord_or_name, addr, dst_ad=dst_ad) + return addr + + if module_name not in self.module_name_to_module: + log.info("Module %r not found", module_name) + fds + pe = self.module_name_to_module[module_name] + export = self.module_name_to_export[module_name] + addr = export.get(imp_ord_or_name, None) + if addr is None: + log.info("Function %r not found in %r", imp_ord_or_name, module_name) + fds + ret = is_redirected_export(pe, addr) + if ret is False: + self.add_function(module_name, imp_ord_or_name, addr, dst_ad=dst_ad) + return addr + + module_target, func_info = ret + log.debug( + "Function %r %r redirected to %r %r", + module_name, imp_ord_or_name, + module_target, func_info + ) -class limbimp_pe(LoaderWindows): - def __init__(self, *args, **kwargs): - warnings.warn("DEPRECATION WARNING: Use LoaderWindows instead of limimb_pe") - super(limbimp_pe, self).__init__(*args, **kwargs) + module_target += '.dll' -def vm_load_pe_and_dependencies(vm, fname, name2module, loader, - lib_path_base, **kwargs): - """Load a binary and all its dependencies. Returns a dictionary containing - the association between binaries names and it's pe object + # First, try to resolve ApiSet + if self.apiset: + module_target = self.apiset.get_redirection(module_target, module_name) - @vm: virtual memory manager instance - @fname: full path of the binary - @name2module: dict containing association between name and pe - object. Updated. - @loader: Loader instance - @lib_path_base: directory of the libraries containing dependencies + self.load_module(module_target) + addr = self.resolve_function(module_target, func_info, module_name, dst_ad=dst_ad) + self.add_function(module_target, imp_ord_or_name, addr, dst_ad=dst_ad) + return addr - """ + def load_module(self, name): + """ + Load module and it's dependencies + Return image base address of the module + """ + name = name.lower() + fname = self.find_module_path(name) + + if name in self.unresolved_modules_names: + return self.module_name_to_base_address[name] + if fname is None: + fds + + module_address = self.module_name_to_base_address.get(name, None) + if module_address is not None: + # Module is already loaded + return module_address + #log.info("load module %r %r", name, fname) + try: + with open(fname, "rb") as fstream: + log.info('Loading module name %r', fname) + pe = self.vm_load_pe( + fstream.read(), name=fname + ) + except IOError: + log.error('Cannot open %s' % fname) + fds + name2module[name] = None + + image_base = pe.NThdr.ImageBase + self.module_name_to_module[name] = pe + exports = get_export_name_addr_list(pe) + self.module_name_to_export[name] = dict(exports) + self.module_name_to_base_address[name] = pe.NThdr.ImageBase + self.module_base_address_to_name[pe.NThdr.ImageBase] = name + + # Resolve imports + if pe.DirImport.impdesc is None: + # No imports + return image_base + out = set() + for dependency in pe.DirImport.impdesc: + libname = dependency.dlldescname.name.lower() + libname = force_str(libname) + if self.apiset: + # Resolve ApiSet + libname = self.apiset.get_redirection(libname, name) + self.load_module(libname) + + # Fix imports + import_information = get_import_address_pe(pe) + dyn_funcs = {} + # log.debug('imported funcs: %s' % import_information) + for (libname, funcname), ads in import_information.items(): + addr_resolved = self.resolve_function(libname, funcname, name) + addr_bytes = struct.pack(cstruct.size2type[pe._wsize], addr_resolved) + for addr in ads: + self.vm.set_mem(addr, addr_bytes) + return image_base - todo = [(fname, fname, 0)] - weight2name = {} - done = set() - # Walk dependencies recursively - while todo: - name, fname, weight = todo.pop() - if name in done: - continue - done.add(name) - weight2name.setdefault(weight, set()).add(name) - if name in name2module: - pe_obj = name2module[name] - else: - try: - with open(fname, "rb") as fstream: - log.info('Loading module name %r', fname) - pe_obj = loader.vm_load_pe( - vm, fstream.read(), name=fname, **kwargs - ) - except IOError: - log.error('Cannot open %s' % fname) - name2module[name] = None - continue - name2module[name] = pe_obj - - new_dependencies = get_pe_dependencies(pe_obj) - for libname in new_dependencies: - if loader.apiset: - libname = loader.apiset.get_redirection(libname, name) - todo.append((libname, os.path.join(lib_path_base, libname), weight - 1)) - - known_export_addresses = {} - to_resolve = {} - for name, pe_obj in name2module.items(): - if pe_obj is None: - continue - if pe_obj.DirExport.expdesc == None: - continue - addrs = get_export_name_addr_list(pe_obj) - for imp_ord_or_name, ad in addrs: - # if export is a redirection, search redirected dll - # and get function real addr - ret = is_redirected_export(pe_obj, ad) - if ret is False: - known_export_addresses[(name, imp_ord_or_name)] = ad - else: - dllname, func_info = ret - - dllname = loader.apiset.get_redirection(dllname, name) - - dllname = dllname + '.dll' - to_resolve[(name, imp_ord_or_name)] = (dllname, func_info) - - modified = True - while modified: - modified = False - out = {} - for target, dependency in to_resolve.items(): - dllname, funcname = dependency - if dependency in known_export_addresses: - known_export_addresses[target] = known_export_addresses[dependency] - modified = True - else: - log.error("Cannot resolve redirection %r %r", dllname, dependency) - raise RuntimeError('Cannot resolve redirection') - to_resolve = out - - for dllname, pe_obj in name2module.items(): - if pe_obj is None: - continue - ad = pe_obj.NThdr.ImageBase - libad = ad - loader.name2off[dllname] = ad - loader.libbase2lastad[ad] = ad + 0x1 - loader.lib_imp2ad[ad] = {} - loader.lib_imp2dstad[ad] = {} - loader.libbase_ad += 0x1000 - - for (dllname, imp_ord_or_name), addr in known_export_addresses.items(): - loader.add_function(dllname, imp_ord_or_name, addr) - libad = loader.name2off[dllname] - loader.lib_imp2ad[libad][imp_ord_or_name] = addr - - assert not to_resolve - - for dllname, pe_obj in name2module.items(): - if pe_obj is None: - continue - fix_pe_imports(vm, pe_obj, loader, patch_vm_imp=True, pe_name=dllname) +class limbimp_pe(LoaderWindows): + def __init__(self, *args, **kwargs): + raise DeprecationWarning("DEPRECATION WARNING: Use LoaderWindows instead of limimb_pe") - return name2module # machine -> arch PE_machine = { @@ -746,6 +596,7 @@ def compute_hash(self, apiset_lib_name): return hashk def get_redirected_host(self, libname, entries, parent): + #log.info("\tlibname %r %r", parent, libname) if len(entries) == 1: assert "" in entries log.debug("ApiSet %s => %s" % (libname, entries[""])) @@ -755,6 +606,9 @@ def get_redirected_host(self, libname, entries, parent): libname = entries[parent] else: libname = entries[""] + if libname.startswith("api-ms") or libname.startswith("ext-ms"): + fds + return libname def get_redirection(self, libname, parent_name): @@ -769,7 +623,13 @@ def get_redirection(self, libname, parent_name): cname = name_nodll[:name_nodll.rfind('-')] else: cname = name_nodll - values = self.hash_entries.get(cname, None) + #log.info("\t cname %r", cname) + values = self.hash_entries.get( + cname, + self.hash_entries.get( + cname+"-1", None + ) + ) if not values: # No entry found return libname diff --git a/miasm/jitter/loader/utils.py b/miasm/jitter/loader/utils.py index 4cd08b13f..547f868fd 100644 --- a/miasm/jitter/loader/utils.py +++ b/miasm/jitter/loader/utils.py @@ -1,4 +1,5 @@ from builtins import int as int_types +import warnings import logging from future.utils import viewitems, viewvalues @@ -23,69 +24,90 @@ def canon_libname_libfunc(libname, libfunc): class Loader(object): - def __init__(self, lib_base_ad=0x71111000, **kargs): - self.name2off = {} - self.libbase2lastad = {} - self.libbase_ad = lib_base_ad - self.lib_imp2ad = {} - self.lib_imp2dstad = {} - self.fad2cname = {} - self.cname2addr = {} - self.fad2info = {} - self.all_exported_lib = [] - self.fake_libs = set() + def __init__(self, vm, lib_base_ad=0x71111000, **kargs): + self.vm = vm + + self.module_name_to_base_address = {} + self.module_base_address_to_name = {} + + self.function_address_to_canonical_name = {} + self.function_canonical_name_to_address = {} + + self.module_base_address_to_last_address = {} + self.last_module_address = lib_base_ad + self.module_name_to_export = {} + self.canonical_name_to_dst_addr = {} + self.function_address_to_info = {} + self.unresolved_modules_names = set() + + def get_name2off(self): + warnings.warn("Deprecated API: use .module_name_to_base_address(name) instead of name2off") + return self.module_name_to_base_address + + def get_fad2cname(self): + warnings.warn("Deprecated API: use .module_address_to_name(addr) instead of fad2cname") + return self.function_address_to_canonical_name + + + name2off = property(get_name2off) + fad2cname = property(get_fad2cname) + + def fake_library_entry(self, module_name): + addr = self.last_module_address + log.warning("Create dummy entry for %r", module_name) + self.unresolved_modules_names.add(module_name) + self.module_name_to_base_address[module_name] = addr + self.module_base_address_to_name[addr] = module_name + self.module_base_address_to_last_address[addr] = addr + 0x4 + self.module_name_to_export[module_name] = {} + self.last_module_address += 0x1000 + return addr def lib_get_add_base(self, name): - assert isinstance(name, basestring) - name = name.lower().strip(' ') - if not "." in name: - log.warning('warning adding .dll to modulename') - name += '.dll' - log.warning(name) - - if name in self.name2off: - ad = self.name2off[name] - else: - ad = self.libbase_ad - log.warning("Create dummy entry for %r", name) - self.fake_libs.add(name) - self.name2off[name] = ad - self.libbase2lastad[ad] = ad + 0x4 - self.lib_imp2ad[ad] = {} - self.lib_imp2dstad[ad] = {} - self.libbase_ad += 0x1000 - return ad + raise NotImplementedError("Implement in sub class") def lib_get_add_func(self, libad, imp_ord_or_name, dst_ad=None): - if not libad in viewvalues(self.name2off): - raise ValueError('unknown lib base!', hex(libad)) + raise DeprecationWarning("Use resolve_function instead of lib_get_add_func") + + def load_module(self, vm, libname): + raise NotImplementedError("Implement in sub class") + + def add_function(self, module_name, imp_ord_or_name, addr, dst_ad=None): + canonical_name = canon_libname_libfunc( + module_name, imp_ord_or_name + ) + self.function_address_to_info[addr] = module_name, imp_ord_or_name + + if dst_ad is not None: + self.canonical_name_to_dst_addr.setdefault(canonical_name, set()).add(dst_ad) + + self.function_address_to_canonical_name[addr] = canonical_name + self.function_canonical_name_to_address[canonical_name] = addr + + return canonical_name + + def fake_resolve_function(self, module_address, imp_ord_or_name, dst_ad=None): + module_name = self.module_base_address_to_name.get(module_address, None) + if module_name is None: + raise ValueError('unknown lib base!', hex(module_address)) # test if not ordinatl # if imp_ord_or_name >0x10000: # imp_ord_or_name = vm_get_str(imp_ord_or_name, 0x100) # imp_ord_or_name = imp_ord_or_name[:imp_ord_or_name.find('\x00')] - #/!\ can have multiple dst ad - if not imp_ord_or_name in self.lib_imp2dstad[libad]: - self.lib_imp2dstad[libad][imp_ord_or_name] = set() - if dst_ad is not None: - self.lib_imp2dstad[libad][imp_ord_or_name].add(dst_ad) - - if imp_ord_or_name in self.lib_imp2ad[libad]: - return self.lib_imp2ad[libad][imp_ord_or_name] + if imp_ord_or_name in self.module_name_to_export[module_name]: + return self.module_name_to_export[module_name][imp_ord_or_name] log.debug('new imp %s %s' % (imp_ord_or_name, dst_ad)) - ad = self.libbase2lastad[libad] - self.libbase2lastad[libad] += 0x10 # arbitrary - self.lib_imp2ad[libad][imp_ord_or_name] = ad + addr = self.module_base_address_to_last_address[module_address] + canonical_name = self.add_function(module_name, imp_ord_or_name, addr, dst_ad=dst_ad) - name_inv = dict( - (value, key) for key, value in viewitems(self.name2off) - ) - c_name = canon_libname_libfunc(name_inv[libad], imp_ord_or_name) - self.fad2cname[ad] = c_name - self.cname2addr[c_name] = ad - self.fad2info[ad] = libad, imp_ord_or_name - return ad + + self.module_base_address_to_last_address[module_address] += 0x10 # arbitrary + self.module_name_to_export[module_name][imp_ord_or_name] = addr + self.function_canonical_name_to_address[canonical_name] = addr + self.function_address_to_info[addr] = module_name, imp_ord_or_name + return addr def check_dst_ad(self): for ad in self.lib_imp2dstad: diff --git a/miasm/os_dep/win_api_x86_32.py b/miasm/os_dep/win_api_x86_32.py index 734666fb6..93d7270cb 100644 --- a/miasm/os_dep/win_api_x86_32.py +++ b/miasm/os_dep/win_api_x86_32.py @@ -913,11 +913,11 @@ def kernel32_GetModuleFileName(jitter, funcname, set_str): if args.hmodule in [0, winobjs.hcurmodule]: p = winobjs.module_path[:] elif (winobjs.loader and - args.hmodule in viewvalues(winobjs.loader.name2off)): + args.hmodule in viewvalues(winobjs.loader.module_name_to_base_address)): name_inv = dict( [ (x[1], x[0]) - for x in viewitems(winobjs.loader.name2off) + for x in viewitems(winobjs.loader.module_name_to_base_address) ] ) p = name_inv[args.hmodule] @@ -1029,9 +1029,9 @@ def kernel32_LoadLibrary(jitter, get_str): ret_ad, args = jitter.func_args_stdcall(["dllname"]) libname = get_str(args.dllname, 0x100) - ret = winobjs.loader.lib_get_add_base(libname) - log.info("Loading %r ret 0x%x", libname, ret) - jitter.func_ret_stdcall(ret_ad, ret) + module_image_base = winobjs.loader.load_module(libname) + log.info("Loading %r ret 0x%x", libname, module_image_base) + jitter.func_ret_stdcall(ret_ad, module_image_base) def kernel32_LoadLibraryA(jitter): @@ -1069,7 +1069,8 @@ def kernel32_GetProcAddress(jitter): if not fname: fname = None if fname is not None: - ad = winobjs.loader.lib_get_add_func(args.libbase, fname) + name = winobjs.loader.base_address_to_name[args.libbase] + ad = winobjs.resolve_function(name, fname) else: ad = 0 log.info("GetProcAddress %r %r ret 0x%x", args.libbase, fname, ad) @@ -2010,7 +2011,8 @@ def ntdll_LdrGetProcedureAddress(jitter): l1, l2, p_src = struct.unpack('HHI', jitter.vm.get_mem(args.pfname, 0x8)) fname = get_win_str_a(jitter, p_src) - ad = winobjs.loader.lib_get_add_func(args.libbase, fname) + name = winobjs.loader.base_address_to_name[args.libbase] + ad = winobjs.resolve_function(name, fname) jitter.add_breakpoint(ad, jitter.handle_lib) jitter.vm.set_u32(args.p_ad, ad)