Source code for mythril.disassembler.disassembly

"""This module contains the class used to represent disassembly code."""
from fdg.output_data import print_dict
from mythril.ethereum import util
from mythril.disassembler import asm
from mythril.support.signatures import SignatureDB

from typing import Dict, List, Tuple
import fdg.global_config

[docs] class Disassembly(object): """Disassembly class. Stores bytecode, and its disassembly. Additionally it will gather the following information on the existing functions in the disassembled code: - function hashes - function name to entry point mapping - function entry point to function name mapping """ def __init__(self, code: str, enable_online_lookup: bool = False) -> None: """ :param code: :param enable_online_lookup: """ self.bytecode = code if type(code) == str: self.instruction_list = asm.disassemble(util.safe_decode(code)) else: self.instruction_list = asm.disassemble(code) self.func_hashes = [] # type: List[str] self.function_name_to_address = {} # type: Dict[str, int] self.address_to_function_name = {} # type: Dict[int, str] self.enable_online_lookup = enable_online_lookup self.assign_bytecode(bytecode=code)
[docs] def assign_bytecode(self, bytecode): self.bytecode = bytecode # open from default locations # control if you want to have online signature hash lookups signatures = SignatureDB(enable_online_lookup=self.enable_online_lookup) self.instruction_list = asm.disassemble(bytecode) # Need to take from PUSH1 to PUSH4 because solc seems to remove excess 0s at the beginning for optimizing jump_table_indices = asm.find_op_code_sequence( [("PUSH1", "PUSH2", "PUSH3", "PUSH4"), ("EQ",)], self.instruction_list ) for index in jump_table_indices: function_hash, jump_target, function_name = get_function_info( index, self.instruction_list, signatures ) # @wei get a mapping from function name to function hash fdg.global_config.method_identifiers[function_name] = function_hash self.func_hashes.append(function_hash) if jump_target is not None and function_name is not None: self.function_name_to_address[function_name] = jump_target self.address_to_function_name[jump_target] = function_name
# print_dict(fdg.global_config.method_identifiers,"method identifiers in disassembler.py")
[docs] def get_easm(self): """ :return: """ return asm.instruction_list_to_easm(self.instruction_list)
[docs] def get_function_info( index: int, instruction_list: list, signature_database: SignatureDB ) -> Tuple[str, int, str]: """Finds the function information for a call table entry Solidity uses the first 4 bytes of the calldata to indicate which function the message call should execute The generated code that directs execution to the correct function looks like this: - PUSH function_hash - EQ - PUSH entry_point - JUMPI This function takes an index that points to the first instruction, and from that finds out the function hash, function entry and the function name. :param index: Start of the entry pattern :param instruction_list: Instruction list for the contract that is being analyzed :param signature_database: Database used to map function hashes to their respective function names :return: function hash, function entry point, function name """ # Append with missing 0s at the beginning if type(instruction_list[index]["argument"]) == tuple: try: function_hash = "0x" + bytes( instruction_list[index]["argument"] ).hex().rjust(8, "0") except AttributeError: raise ValueError( "Mythril currently does not support symbolic function signatures" ) else: function_hash = "0x" + instruction_list[index]["argument"][2:].rjust(8, "0") function_names = signature_database.get(function_hash) if len(function_names) > 0: function_name = " or ".join(set(function_names)) else: function_name = "_function_" + function_hash try: offset = instruction_list[index + 2]["argument"] if type(offset) == tuple: offset = bytes(offset).hex() entry_point = int(offset, 16) except (KeyError, IndexError): return function_hash, None, None return function_hash, entry_point, function_name