In [445]:
%run ./GoogleClient.ipynb
%run ./Utils.ipynb
import panoramix
from panoramix.decompiler import decompile_bytecode
import os
import subprocess
import threading
import multiprocessing 
import time

In [None]:
class BytecodeDecompiler:
    def __init__(self, google_client, init = False, decompiler_output=Utils.DECOMPILER_OUTPUT_DIR):
        """
        Initialize the BytecodeDecompiler with a GoogleClient and decompiler output directory.

        Args:
            google_client (GoogleClient): An instance of GoogleClient.
            decompiler_output (str): The directory where decompiled bytecodes will be stored.

        Raises:
            TypeError: If google_client is not an instance of GoogleClient.
            ValueError: If decompiler_output is not a valid directory path.
        """
        if not isinstance(google_client, GoogleClient):
            raise TypeError("google_client must be an instance of GoogleClient object!")
        self.google_client = google_client

        if not isinstance(decompiler_output, str) or not decompiler_output:
            raise ValueError("decompiler_output must be a non-empty string representing a directory path.")
        
        # Ensure the decompiler_output is an absolute path
        self.decompiler_output = os.path.abspath(decompiler_output)
        self.df_all_contracts_bytecodes = "df-all-contracts-bytecodes"
        self.df_all_contracts_asof = "df-all-contracts-asof"
        self.df_all_distinct_bytecodes_hashes = "df-all-distinct-bytecodes-hashes"
        self.df_all_contracts_bytecodes_hashes = "df-all-contracts-bytecode-hashes"

        if init:
            self.initialize()
            
        Utils.create_directory(self.decompiler_output)
        
        self.decomiled_bytecodes = self.load_decompiled_bytecodes()
        
        db_path = os.path.join(Utils.DATA_DIR, "bytecodes_cache.db")
        
        
        
        contracts_hashes_csv_dir = os.path.join(self.google_client.storage_bucket_decompresssed_path, self.df_all_contracts_bytecodes_hashes)
        self.contracts_bytecodes_hash = SQLiteProxyDict(
            db_path=db_path,
            table_name="contracts_bytecode_hashes", # Table name in SQLite
            key_column="from_address",
            value_column="bytecode_hash",
            csv_path=contracts_hashes_csv_dir, # Directory path containing CSV files
            force_reload=init,
        )

        # Path to the directory containing CSVs for distinct_bytecodes_hash
        distinct_hashes_csv_dir = os.path.join(self.google_client.storage_bucket_decompresssed_path, self.df_all_distinct_bytecodes_hashes)
        self.distinct_bytecodes_hash  = SQLiteProxyDict(
            db_path=db_path,
            table_name="distinct_bytecodes_hashes", # Table name in SQLite
            key_column="bytecode_hash",
            value_column="bytecode",
            csv_path=distinct_hashes_csv_dir, # Directory path containing CSV files
            force_reload=init,
        )
        
        #self.contracts_bytecodes_hash = Utils.convert_to_dict(Utils.multicore_read_csv(os.path.join(self.google_client.storage_bucket_decompresssed_path, self.df_all_contracts_bytecodes_hashes)), "from_address", "bytecode_hash")
        #self.distinct_bytecodes_hash  = Utils.convert_to_dict(Utils.multicore_read_csv(os.path.join(self.google_client.storage_bucket_decompresssed_path, self.df_all_distinct_bytecodes_hashes)), "bytecode_hash", "bytecode")
        self.total_decompilation_time = 0
        
    def initialize(self):
        # create a dataset if not exist
        self.google_client.create_dataset(override=False)
        # retrive all contracts bytecodes and hash their bytecodes
        self.google_client.hash_contracts_bytecodes(self.df_all_contracts_bytecodes, keep = 'latest', override=True)
        # collect all contracts deployed
        self.google_client.collect_contract_addresses_as_of(self.df_all_contracts_asof, "2040-01-01", override=True)
        # collect and download the distinct set of bytecode along with their hashes
        self.google_client.download_distinct_bytecode_hashes(self.df_all_contracts_bytecodes, self.df_all_contracts_asof, self.df_all_distinct_bytecodes_hashes, override = True)
        # collect all contracts bytecodes hash
        self.google_client.download_contracts_bytecode_hashes(self.df_all_contracts_bytecodes, self.df_all_contracts_asof, self.df_all_contracts_bytecodes_hashes, override = True)

    def load_decompiled_bytecodes(self):
        """
        This function reads all the files in a decompiled-bytecodes directory and returns a dictionary
        with the filenames as keys and their absolute paths as values.
    
        Returns:
        dict: A dictionary with filenames as keys and their absolute paths as values.
        """
        files_dict = {}
    
        try:
            # Iterate over all the entries in the directory
            for entry in os.listdir(self.decompiler_output):
                # Construct the full path
                full_path = os.path.join(self.decompiler_output, entry)
                # Get the absolute path
                absolute_path = os.path.abspath(full_path)
                # Check if it's a file
                if os.path.isfile(absolute_path):
                    # Add to the dictionary
                    files_dict[entry.split('.')[0]] = absolute_path
        except Exception as e:
            print(f"An error occurred while reading the directory: {e}")
    
        return files_dict
    
    def reload_decompiled_bytecodes(self):
        self.decomiled_bytecodes = self.load_decompiled_bytecodes()
    
    def run_panoramix(self, contract_bytecode, timeout):
        def target():
            global decompiled_bytecode
            # Replace 'self.distinct_bytecodes_hash[self.contracts_bytecodes_hash[address]]' with the actual bytecode or logic to obtain it.
            try:
                result = subprocess.run(
                    ['panoramix', contract_bytecode],
                    capture_output=True,
                    text=True
                )
                decompiled_bytecode = result.stdout
            except Exception as e:
                decompiled_bytecode = None
                print(f"Error: {e}")
    
        thread = threading.Thread(target=target)
        thread.start()
        thread.join(timeout=timeout)  # maximum 300 seconds in single core processing mode.
    
        if thread.is_alive() or decompiled_bytecode is None:
            return False # either timeout or exception
        else:
            return decompiled_bytecode
        
    def decompile_contract(self, address, timeout=900):
        if address in self.contracts_bytecodes_hash:
            if str(self.contracts_bytecodes_hash[address]) in self.decomiled_bytecodes: 
                return self.decomiled_bytecodes[str(self.contracts_bytecodes_hash[address])]
            else:
                if self.contracts_bytecodes_hash[address] in self.distinct_bytecodes_hash:
                    start_time = time.time()  # Start the timer
                    decompiled_bytecode = self.run_panoramix(self.distinct_bytecodes_hash[self.contracts_bytecodes_hash[address]], timeout)
                    end_time = time.time()  # End the timer
                    self.total_decompilation_time += (end_time - start_time)
                    if decompiled_bytecode:
                        bytecode_path = os.path.join(self.decompiler_output, "{}.txt".format(self.contracts_bytecodes_hash[address]))
                        with open (bytecode_path, 'w') as writer:
                            writer.write(Utils.escape_ansi(decompiled_bytecode))
                        # update the list of decompiled bytecodes
                        self.decomiled_bytecodes[str(self.contracts_bytecodes_hash[address])] = bytecode_path
                        return bytecode_path
                    else:
                        return 'Failure: Decompiler error'
                else:
                    # Raise error that contract's bytecode hash not found in distinct_bytecodes_hash.
                    raise KeyError("Contract's bytecode hash not found in distinct_bytecodes_hash. Update to the latest snapshot of Ethereum by calling the initialize function.")
        else:
            # Raise error that contract address not found in contracts_bytecodes_hash.
            raise KeyError("Contract address not found in contracts_bytecodes_hash. Update to the latest snapshot of Ethereum by calling the initialize function." )

In [449]:
def run_panoramix(contract_bytecode, timeout = Utils.DECOMPILER_TIMEOUT):
    try:
        result = subprocess.run(
            ['panoramix', contract_bytecode],
            capture_output=True,
            text=True,
            timeout=timeout  # Set the timeout to 2 minutes (120 seconds)
        )
        return result.stdout
    except subprocess.TimeoutExpired:
        print(f"Timeout: Decompilation")
        return None
    except Exception as e:
        print(f"Error: {e}")
        return None

def decompile_contract(args):
    bytecode_hash, bytecode, decompiler_output, timeout = args
    
    decompiled_bytecode = run_panoramix(bytecode, timeout)
    if decompiled_bytecode:
        bytecode_path = os.path.join(decompiler_output, "{}.txt".format(bytecode_hash))
        with open(bytecode_path, 'w') as writer:
            writer.write(Utils.escape_ansi(decompiled_bytecode))

def load_decompiled_bytecodes(decompiler_output):
    """
    This function reads all the files in a decompiled-bytecodes directory and returns a dictionary
    with the filenames as keys and their absolute paths as values.

    Returns:
    dict: A dictionary with filenames as keys and their absolute paths as values.
    """
    files_dict = {}

    try:
        # Iterate over all the entries in the directory
        for entry in os.listdir(decompiler_output):
            # Construct the full path
            full_path = os.path.join(decompiler_output, entry)
            # Get the absolute path
            absolute_path = os.path.abspath(full_path)
            # Check if it's a file
            if os.path.isfile(absolute_path):
                # Add to the dictionary
                files_dict[entry.split('.')[0]] = absolute_path
    except Exception as e:
        print(f"An error occurred while reading the directory: {e}")

    return files_dict
        
def decompile_contracts_in_parallel(addresses, contracts_bytecodes_hash, distinct_bytecodes_hash, decompiler_output, timeout = Utils.DECOMPILER_TIMEOUT):
    all_decompiled_files = load_decompiled_bytecodes(decompiler_output)
    _input = dict()
    for address in addresses:
        if str(contracts_bytecodes_hash[address]) not in all_decompiled_files:
            try:
                _input[contracts_bytecodes_hash[address]] = distinct_bytecodes_hash[contracts_bytecodes_hash[address]]
            except:
                print("contracts bytecode {} not found".format(address))
    args = []
    for _hash in _input.keys():
        args.append((_hash, _input[_hash], decompiler_output, timeout))


    if len(args) == 0:
        return 0
    
    start_time = time.time()  # Start the timer
    print('number of decompilation jobs after excluding duplicates:', len(args))
    with multiprocessing.Pool(processes=Utils.CORE_COUNT) as pool:
        pool.map(decompile_contract, args)

    end_time = time.time()  # End the timer
    return end_time - start_time  # Calculate the difference