Skip to content

Commit

Permalink
[variable-name-generation] Fix various small bugs in hungarian notati…
Browse files Browse the repository at this point in the history
…on (#236)

* Create draft PR for #235

* Fixes 1,2,3

* Change list to set (multiple vars) + change tmp renaming

* Custom/Tmp name fixes

* Newline

* Implemented changes

* Added comment to variable copies

---------

Co-authored-by: NeoQuix <[email protected]>
Co-authored-by: Spartak Ehrlich <[email protected]>
Co-authored-by: Spartak Ehrlich <[email protected]>
Co-authored-by: Niklas Bergmann <[email protected]>
  • Loading branch information
5 people authored Jun 20, 2023
1 parent fec499f commit 7b3264b
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def _get_operands_of_complexity_greater_1(operation: Operation) -> Iterator[Expr

@dataclass
class ComplexityBounds:
"""Dataclass that stores information about the maximum allowed complexity of varíous instruction types."""
"""Dataclass that stores information about the maximum allowed complexity of various instruction types."""

assignment_instr: int = 0
call_operation: int = 0
Expand Down Expand Up @@ -112,7 +112,7 @@ def start_simplification(self, instruction_target: Target):
self.start_simplification(subtarget)

def simplify_target(self, instruction_target: Target) -> None:
"""Handle the actual simplification of the Target by choosing an adequate simplfifier for the instruction type."""
"""Handle the actual simplification of the Target by choosing an adequate simplifier for the instruction type."""
if not instruction_target.exceeds_complexity_bounds():
return
if isinstance(instruction_target.target, BinaryOperation):
Expand Down
43 changes: 32 additions & 11 deletions decompiler/pipeline/controlflowanalysis/variable_name_generation.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,22 @@
import re
from abc import ABC, abstractmethod
from enum import Enum
from typing import Dict, List, Optional
from typing import Dict, List, Optional, Set

from decompiler.pipeline.stage import PipelineStage
from decompiler.structures.ast.ast_nodes import CaseNode, CodeNode, ConditionNode, LoopNode, SwitchNode
from decompiler.structures.ast.syntaxtree import AbstractSyntaxTree
from decompiler.structures.ast.ast_nodes import ConditionNode, LoopNode
from decompiler.structures.logic.logic_condition import LogicCondition
from decompiler.structures.pseudo import Condition, CustomType, DataflowObject, Float, GlobalVariable, Integer, Pointer, Type, Variable
from decompiler.structures.visitors.ast_dataflowobjectvisitor import BaseAstDataflowObjectVisitor
from decompiler.task import DecompilerTask

"""
Small explanation how variables work in the decompiler:
- sometimes they are the same object in different structures (assignments, loops etc.)
- sometimes they are real copies of another
==> Therefore if we change a parameter of a variable (name), we have no guarantee that all usages of the variable will be updated
==> Therefore we always collect EVERY variable used + check with a method (already_renamed) if we already renamed it to our new naming scheme
"""

def _get_var_counter(var_name: str) -> Optional[str]:
"""Return the counter of a given variable name, if any is present."""
Expand Down Expand Up @@ -70,15 +76,17 @@ def __init__(self, task: DecompilerTask) -> None:
collector = VariableCollector(task._ast.condition_map)
collector.visit_ast(task._ast)
self._params: List[Variable] = task._function_parameters
self._loop_vars : List[Variable] = collector.get_loop_variables()
self._variables: List[Variable] = list(filter(self._filter_variables, collector.get_variables()))
self._loop_vars : List[Variable] = collector.get_loop_variables()
self._variables: List[Variable] = list(filter(self._filter_variables, collector.get_variables()))


def _filter_variables(self, item: Variable) -> bool:
"""Return False if variable is a parameter, renamed loop variable or GlobalVariable, else True"""
if item in self._params or (item in self._loop_vars and item.name.find("var_") == -1) or isinstance(item, GlobalVariable):
return False
return True
"""Return False if variable is either a:
- parameter
- renamed loop variable
- GlobalVariable
"""
return not item in self._params and not (item in self._loop_vars and item.name.find("var_") == -1) and not isinstance(item, GlobalVariable)


@abstractmethod
Expand All @@ -95,6 +103,9 @@ class HungarianScheme(RenamingScheme):
Integer: {8: "ch", 16: "s", 32: "i", 64: "l", 128: "i128"},
}

custom_var_names = {
"tmp_": "Tmp"
}

def __init__(self, task: DecompilerTask) -> None:
super().__init__(task)
Expand All @@ -108,13 +119,15 @@ def __init__(self, task: DecompilerTask) -> None:
def renameVariableNames(self):
"""Rename all collected variables to the hungarian notation."""
for var in self._variables:
if self.alread_renamed(var._name):
continue
counter = _get_var_counter(var.name)
var._name = self._hungarian_notation(var, counter if counter else "")


def _hungarian_notation(self, var: Variable, counter: int) -> str:
"""Return hungarian notation to a given variable."""
return f"{self._hungarian_prefix(var.type)}{self._type_separator}{self._var_name}{self._counter_separator}{counter}"
return f"{self._hungarian_prefix(var.type)}{self._type_separator}{self.custom_var_names.get(var._name.rstrip(counter), self._var_name)}{self._counter_separator}{counter}"


def _hungarian_prefix(self, var_type: Type) -> str:
Expand All @@ -128,11 +141,19 @@ def _hungarian_prefix(self, var_type: Type) -> str:
return "b"
elif var_type.size == 0:
return "v"
else:
return ""
if isinstance(var_type, (Integer, Float)):
sign = "" if var_type.is_signed else "u"
prefix = self.type_prefix[type(var_type)][var_type.size]
prefix = self.type_prefix[type(var_type)].get(var_type.size, "unk")
return f"{sign}{prefix}"
return ""


def alread_renamed(self, name) -> bool:
"""Return true if variable with custom name was already renamed, false otherwise"""
renamed_keys_words = [key for key in self.custom_var_names.values()] + ["unk", self._var_name]
return any(keyword in name for keyword in renamed_keys_words)

class DefaultScheme(RenamingScheme):
"""Class which renames variables into the default scheme."""
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from typing import List

import pytest
from decompiler.backend.codegenerator import CodeGenerator
from decompiler.pipeline.controlflowanalysis import VariableNameGeneration
Expand Down Expand Up @@ -88,3 +86,40 @@ def test_hungarian_notation_separators(type_sep: str, counter_sep: str):
ast = AbstractSyntaxTree(CodeNode(Assignment(var := Variable("var_0", I32), Constant(0)), true_value), {})
_run_vng(ast, _generate_options(type_sep=type_sep, counter_sep=counter_sep))
assert var.name == f"i{type_sep}Var{counter_sep}0"


def test_custom_type():
true_value = LogicCondition.initialize_true(LogicCondition.generate_new_context())
ast = AbstractSyntaxTree(CodeNode(Assignment(var := Variable("var_0", CustomType("size_t", 64)), Constant(0)), true_value), {})
_run_vng(ast, _generate_options())
assert var._name == "Var0"


def test_bninja_invalid_type():
true_value = LogicCondition.initialize_true(LogicCondition.generate_new_context())
ast = AbstractSyntaxTree(CodeNode(Assignment(var := Variable("var_0", Integer(104, True)), Constant(0)), true_value), {})
_run_vng(ast, _generate_options())
assert var._name == "unkVar0"


def test_tmp_variable():
true_value = LogicCondition.initialize_true(LogicCondition.generate_new_context())
ast = AbstractSyntaxTree(CodeNode(Assignment(var := Variable("tmp_42", Float(64)), Constant(0)), true_value), {})
_run_vng(ast, _generate_options())
assert var._name == "dTmp42"


def test_same_variable():
"""Variables can be copies of the same one. The renamer should only rename a variable once. (More times would destroy the actual name)"""
true_value = LogicCondition.initialize_true(LogicCondition.generate_new_context())
var1 = Variable("tmp_42", Float(64))
var2 = Variable("var_0", Integer(104, True))
ast = AbstractSyntaxTree(CodeNode([
Assignment(var1, Constant(0)),
Assignment(var1, Constant(0)),
Assignment(var2, Constant(0)),
Assignment(var2, Constant(0))], true_value), {})
_run_vng(ast, _generate_options())
assert var1._name == "dTmp42"
assert var2._name == "unkVar0"

0 comments on commit 7b3264b

Please sign in to comment.