In [81]:
import os
%run ./BaseProxyDetector.ipynb
%run ./GoogleClient.ipynb
%run ./Utils.ipynb


In [82]:
class DynamicProxyDetector(BaseProxyDetector, GoogleClient):

    """
    A class to detect proxy contracts in a dynamic way. For more details about the study, please refer to 
        @article{Ebrahimi23,
        author = {Ebrahimi, Amir and Adams, Bram and Oliva, Gustavo and Hassan, Ahmed E.},
        year = {2023},
        month = {05},
        pages = {},
        title = {A Large-Scale Exploratory Study on the Proxy Pattern in Ethereum}
        }  
        
    Attributes:
        project_id (str): Google Cloud project ID where the dataset and tables reside.
        dataset_name (str): The name of the dataset to operate within.
        client: The Google BigQuery client
    """
    def __init__(self, json_credentials, project_id, dataset_name, storage_path = Utils.DATA_DIR, init=False):

        super().__init__(json_credentials, project_id, dataset_name, storage_path)
        
        # Define table names for storing processed data
        self.contracts_table_name = "df-contracts"    
        self.contracts_delegate_trace_table_name = "df-contracts-delegate-traces-table"
        self.proxy_logic_pairs_table_name = "df-proxy-logic-pairs"
        self.contracts_proxy_status_table_name = "df-contracts-proxy-status"
        self.active_proxy_contracts_table_name = "df-active-proxy-contracts"
        if init:
            self.identify_proxies_as_of(as_of_timestamp=Utils.STUDY_END_DATE)

        self.active_proxy_logic_pairs = self.load_active_proxy_logic_pairs()
    
    def aggregate_delegatecall_traces(self, contracts_table, table_name, as_of_timestamp = '2040-09-01', override=False):
        """
        Collect full traces for contracts that delegate to another contract.

        Args:
            contracts_table (str): Name of the table containing contract addresses.
            table_name (str): Name of the table to create for storing traces.
            override (bool): If True, the existing table with the same name will be overwritten.
            as_of_timestamp (str): The timestamp as of traces has to be collected

        Raises:
            ValueError: If any parameter is missing or incorrect.
            ValueError: If dataset not fund under the project
            ValueError: If contracts_table not fund under the project
        """

        # Validate inputs
        if not all(isinstance(param, str) and param for param in [contracts_table, self.project_id, self.dataset_name, as_of_timestamp, table_name]):
            raise ValueError("contracts_table, project_id, dataset_name, and table_name must be non-empty strings.")
        if not self.dataset_exists():
            raise ValueError(f"The '{self.dataset_name}' dataset does not exist in project '{self.project_id}'.")
        if not self.table_exists(contracts_table):
            raise ValueError(f"The '{contracts_table}' table does not exist under project '{self.project_id}.{self.dataset_name}'.")

        # If override is True, drop the existing table
        if override:
            full_table_path = f"{self.project_id}.{self.dataset_name}.{table_name}"
            drop_table_query = f"DROP TABLE IF EXISTS `{full_table_path}`"
            self.client.query(drop_table_query).result()  # Wait for the query to finish
            print(f"Existing table dropped: {full_table_path}")

        # Construct the SQL query dynamically
        sql_query = f"""
        CREATE TABLE `{self.project_id}.{self.dataset_name}.{table_name}` AS (
            SELECT *,
            REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (REPLACE (trace_address, "197", "ÿ"), "196", "þ"), "195", "ý"), "194", "ü"), "193", "û"), "192", "ú"), "191", "ù"), "190", "ø"), "189", "÷"), "188", "ö"), "187", "õ"), "186", "ô"), "185", "ó"), "184", "ò"), "183", "ñ"), "182", "ð"), "181", "ï"), "180", "î"), "179", "í"), "178", "ì"), "177", "ë"), "176", "ê"), "175", "é"), "174", "è"), "173", "ç"), "172", "æ"), "171", "å"), "170", "ä"), "169", "ã"), "168", "â"), "167", "á"), "166", "à"), "165", "ß"), "164", "Þ"), "163", "Ý"), "162", "Ü"), "161", "Û"), "160", "Ú"), "159", "Ù"), "158", "Ø"), "157", "×"), "156", "Ö"), "155", "Õ"), "154", "Ô"), "153", "Ó"), "152", "Ò"), "151", "Ñ"), "150", "Ð"), "149", "Ï"), "148", "Î"), "147", "Í"), "146", "Ì"), "145", "Ë"), "144", "Ê"), "143", "É"), "142", "È"), "141", "Ç"), "140", "Æ"), "139", "Å"), "138", "Ä"), "137", "Ã"), "136", "Â"), "135", "Á"), "134", "À"), "133", "¿"), "132", "¾"), "131", "½"), "130", "¼"), "129", "»"), "128", "º"), "127", "¹"), "126", "¸"), "125", "·"), "124", "¶"), "123", "µ"), "122", "´"), "121", "³"), "120", "²"), "119", "±"), "118", "°"), "117", "¯"), "116", "®"), "115", "¬­"), "114", "«"), "113", "ª"), "112", "©"), "111", "¨"), "110", "§"), "109", "¦"), "108", "¥"), "107", "¤"), "106", "£"), "105", "¢"), "104", "¡"), "103", "Ÿ"), "102", "ž"), "101", "œ"), "100", "›"), "99", "š"), "98", "™"), "97", "˜"), "96", "—"), "95", "–"), "94", "•"), "93", "”"), "92", "“"), "91", "’"), "90", "‘"), "89", "Ž"), "88", "Œ"), "87", "‹"), "86", "Š"), "85", "‰"), "84", "ˆ"), "83", "‡"), "82", "†"), "81", "„"), "80", "ƒ"), "79", "€"), "78", "~"), "77", "}}"), "76", "|"), "75", "{{"), "74", "z"), "73", "y"), "72", "x"), "71", "w"), "70", "v"), "69", "u"), "68", "t"), "67", "s"), "66", "r"), "65", "q"), "64", "p"), "63", "o"), "62", "n"), "61", "m"), "60", "l"), "59", "k"), "58", "j"), "57", "i"), "56", "h"), "55", "g"), "54", "f"), "53", "e"), "52", "d"), "51", "c"), "50", "b"), "49", "a"), "48", "_"), "47", "^"), "46", "]"), "45", "["), "44", "Z"), "43", "Y"), "42", "X"), "41", "W"), "40", "V"), "39", "U"), "38", "T"), "37", "S"), "36", "R"), "35", "Q"), "34", "P"), "33", "O"), "32", "N"), "31", "M"), "30", "L"), "29", "K"), "28", "J"), "27", "I"), "26", "H"), "25", "G"), "24", "F"), "23", "E"), "22", "D"), "21", "C"), "20", "B"), "19", "A"), "18", "@"), "17", "?"), "16", ">"), "15", "="), "14", "<"), "13", ";"), "12", ":"), "11", "/"), "10", "."), "9", "-"), "8", "+"), "7", "*"), "6", ")"), "5", "("), "4", "&"), "3", "%"), "2", "$"), "1", "#"), "0", "!") AS trace_address2
            FROM `bigquery-public-data.crypto_ethereum.traces`
            WHERE transaction_hash IN (
                SELECT transaction_hash
                FROM `bigquery-public-data.crypto_ethereum.traces`
                WHERE from_address IN (
                    SELECT from_address
                    FROM `{self.project_id}.{self.dataset_name}.{contracts_table}`
                )
                AND block_timestamp < TIMESTAMP('{as_of_timestamp}')
                AND call_type = 'delegatecall'
                AND status != 0
            )
        )
        """

        # Run the query to create the new table
        query_job = self.client.query(sql_query)
        query_job.result()  # Wait for the query to finish
        print(f"Table created successfully: {self.project_id}.{self.dataset_name}.{table_name}")

    def identify_proxy_logic_pairs(self, contract_delegate_traces_table, contracts_table, table_name, override=False):
        """
        Detect all the proxy logic/implementation contract pairs and store the result in a new table.

        Args:
            contract_delegate_traces_table (str): Name of the table containing contract delegate traces.
            contracts_table (str): Name of the table containing contract addresses.
            table_name (str): Name of the table to create for storing the result.
            override (bool): If True, the existing table with the same name will be overwritten.

        Raises:
            ValueError: If any parameter is missing or incorrect.
            ValueError: If dataset not fund under the project
            ValueError: If contract_delegate_traces_table not fund under the project
            ValueError: If contracts_table not fund under the project
        """
        
        if not all(isinstance(param, str) and param for param in [contract_delegate_traces_table, contracts_table, self.project_id, self.dataset_name, table_name]):
            raise ValueError("contracts_table, project_id, dataset_name, and table_name must be non-empty strings.")
        if not self.dataset_exists():
            raise ValueError(f"The '{self.dataset_name}' dataset does not exist in project '{self.project_id}'.")
        if not self.table_exists(contract_delegate_traces_table):
            raise ValueError(f"The '{contract_delegate_traces_table}' table does not exist under project '{self.project_id}.{self.dataset_name}'.")
        if not self.table_exists(contracts_table):
            raise ValueError(f"The '{contracts_table}' table does not exist under project '{self.project_id}.{self.dataset_name}'.")
        

        # If override is True, drop the existing table
        if override:
            full_table_path = f"{self.project_id}.{self.dataset_name}.{table_name}"
            drop_table_query = f"DROP TABLE IF EXISTS `{full_table_path}`"
            self.client.query(drop_table_query).result()  # Wait for the query to finish
            print(f"Existing table dropped: {full_table_path}")

        # Construct the SQL query dynamically
        sql_query = f"""
        CREATE TABLE `{self.project_id}.{self.dataset_name}.{table_name}` AS (
            SELECT *
            FROM (
                SELECT
                    R.to_address AS from_address,
                    L.to_address,
                    COUNT(DISTINCT L.transaction_hash) AS counts
                FROM (
                    SELECT
                        transaction_hash,
                        from_address,
                        to_address,
                        input,
                        trace_address,
                        trace_address2
                    FROM
                        `{self.project_id}.{self.dataset_name}.{contract_delegate_traces_table}`
                    WHERE
                        call_type='delegatecall'
                        AND status != 0 ) AS L
                LEFT JOIN (
                    SELECT
                        OUTER_TRACE.input,
                        OUTER_TRACE.from_address,
                        OUTER_TRACE.to_address,
                        OUTER_TRACE.transaction_hash,
                        OUTER_TRACE.trace_address,
                        OUTER_TRACE.trace_address2
                    FROM
                        `{self.project_id}.{self.dataset_name}.{contract_delegate_traces_table}` AS OUTER_TRACE
                    RIGHT JOIN (
                        SELECT
                            transaction_hash
                        FROM
                            `{self.project_id}.{self.dataset_name}.{contract_delegate_traces_table}`
                        GROUP BY
                            transaction_hash
                        HAVING
                            MAX(subtraces) < 198) AS INNER_TRACE
                    ON
                        OUTER_TRACE.transaction_hash = INNER_TRACE.transaction_hash ) AS R
                ON
                    L.transaction_hash = R.transaction_hash
                WHERE
                    (R.trace_address2 < L.trace_address2
                        OR R.trace_address IS NULL)
                    AND COALESCE(R.trace_address,"root") = COALESCE(NULLIF(SUBSTRING(L.trace_address,0,CAST((ABS(LENGTH(L.trace_address)-2)+LENGTH(L.trace_address)-2)/2 AS int) ),''),"root")
                    AND SUBSTRING(L.input, 1,10) = SUBSTRING(R.input, 1,10)
                GROUP BY
                    R.to_address,
                    L.to_address
                ORDER BY
                    R.to_address )
            WHERE
                from_address IN (
                    SELECT
                        from_address
                    FROM
                        `{self.project_id}.{self.dataset_name}.{contracts_table}`))
        """

        # Run the query to create the new table
        query_job = self.client.query(sql_query)
        query_job.result()  # Wait for the query to finish
        print(f"Table created successfully: {self.project_id}.{self.dataset_name}.{table_name}")
        
        self.export_table_to_gcs(bucket_name = "{}-{}".format(self.project_id, table_name), target_table = table_name, shard_name = "df")
        self.download_bucket_from_gcs(bucket_name = "{}-{}".format(self.project_id, table_name), output_dir_name = table_name)
        self.decompress_gz_files(target_directory = os.path.join(self.storage_bucket_compresssed_path, table_name), output_dir_name = table_name)


    def evaluate_active_proxy_status(self, contracts_table, proxy_logic_pairs_table, table_name, override = False):
        """
        Determine the proxy status of contracts and store the result in a new table. 
        If contract is an active proxy, the is_active_proxy column will be True; otherwise, False

        Args:
            contracts_table (str): Name of the table containing contract addresses.
            proxy_logic_pairs_table (str): Name of the table containing proxy-logic pairs.
            table_name (str): Name of the table to create for storing the result.
            override (bool): If True, the existing table with the same name will be overwritten.

        Raises:
            ValueError: If any parameter is missing or incorrect.
            ValueError: If dataset not fund under the project
            ValueError: If contracts_table not fund under the project
            ValueError: If proxy_logic_pairs_table not fund under the project
        """
        
        if not all(isinstance(param, str) and param for param in [self.dataset_name, self.project_id, contracts_table, proxy_logic_pairs_table, table_name]):
            raise ValueError("contracts_table, project_id, dataset_name, and table_name must be non-empty strings.")
        if not self.dataset_exists():
            raise ValueError(f"The '{self.dataset_name}' dataset does not exist in project '{self.project_id}'.")
        if not self.table_exists(contracts_table):
            raise ValueError(f"The '{contracts_table}' table does not exist under project '{self.project_id}.{self.dataset_name}'.")
        if not self.table_exists(proxy_logic_pairs_table):
            raise ValueError(f"The '{proxy_logic_pairs_table}' table does not exist under project '{self.project_id}.{self.dataset_name}'.")
      
        # If override is True, drop the existing table
        if override:
            full_table_path = f"{self.project_id}.{self.dataset_name}.{table_name}"
            drop_table_query = f"DROP TABLE IF EXISTS `{full_table_path}`"
            self.client.query(drop_table_query).result()  # Wait for the query to finish
            print(f"Existing table dropped: {full_table_path}")

        # Construct the SQL query dynamically
        sql_query = f"""
        CREATE TABLE `{self.project_id}.{self.dataset_name}.{table_name}` AS (
            SELECT
                contracts.from_address,
                COALESCE(proxy_logic_pairs.is_active, False) AS is_active_proxy
            FROM
                `{self.project_id}.{self.dataset_name}.{contracts_table}` AS contracts
            LEFT JOIN (
                SELECT
                    DISTINCT from_address,
                    TRUE AS is_active
                FROM
                    `{self.project_id}.{self.dataset_name}.{proxy_logic_pairs_table}` ) AS proxy_logic_pairs
            ON
                proxy_logic_pairs.from_address = contracts.from_address)
        """

        # Run the query to create the new table
        query_job = self.client.query(sql_query)
        query_job.result()  # Wait for the query to finish
        print(f"Table created successfully: {self.project_id}.{self.dataset_name}.{table_name}")

    def download_active_proxy_contracts(self, contracts_proxy_status_table, table_name, override = False):
        """
        Determine and download the list of active proxy contracts. 
        If contract is an active proxy, the is_active_proxy column will be True; otherwise, False

        Args:
            contracts_proxy_status_table (str): Name of the table containing contracts proxy status.
            table_name (str): Name of the table to create for storing the result.
            override (bool): If True, the existing table with the same name will be overwritten.

        Raises:
            ValueError: If any parameter is missing or incorrect.
            ValueError: If dataset not fund under the project
            ValueError: If contracts_proxy_status_table not fund under the project
        """
        
        if not all(isinstance(param, str) and param for param in [self.dataset_name, self.project_id, contracts_proxy_status_table, table_name]):
            raise ValueError("contracts_table, project_id, dataset_name, and table_name must be non-empty strings.")
        if not self.dataset_exists():
            raise ValueError(f"The '{self.dataset_name}' dataset does not exist in project '{self.project_id}'.")
        if not self.table_exists(contracts_proxy_status_table):
            raise ValueError(f"The '{contracts_proxy_status_table}' table does not exist under project '{self.project_id}.{self.dataset_name}'.")

        # If override is True, drop the existing table
        if override:
            full_table_path = f"{self.project_id}.{self.dataset_name}.{table_name}"
            drop_table_query = f"DROP TABLE IF EXISTS `{full_table_path}`"
            self.client.query(drop_table_query).result()  # Wait for the query to finish
            print(f"Existing table dropped: {full_table_path}")

        # Construct the SQL query dynamically
        sql_query = f"""
        CREATE TABLE `{self.project_id}.{self.dataset_name}.{table_name}` AS (
            SELECT
                *
            FROM
                `{self.project_id}.{self.dataset_name}.{contracts_proxy_status_table}`
            WHERE is_active_proxy = True)
        """

        # Run the query to create the new table
        query_job = self.client.query(sql_query)
        query_job.result()  # Wait for the query to finish
        print(f"Table created successfully: {self.project_id}.{self.dataset_name}.{table_name}")
        self.export_table_to_gcs(bucket_name = "{}-{}".format(self.project_id, table_name), target_table = table_name, shard_name = "df")
        self.download_bucket_from_gcs(bucket_name = "{}-{}".format(self.project_id, table_name), output_dir_name = table_name)
        self.decompress_gz_files(target_directory = os.path.join(self.storage_bucket_compresssed_path, table_name), output_dir_name = table_name)

    @overrides
    def identify_proxies(self, contracts_list):
        """
        Orchestrates the process to classify each contract in the given list as active proxy or non-proxy.
        The process includes creating necessary tables, collecting delegatecall traces, identifying proxy-logic pairs,
        and evaluating the active proxy status. This function cannot detect inactive proxy contracts.
    
        Args:

            contracts (list): List of Ethereum contract addresses in hexadecimal format to be classified.
    
        Steps:
        1. Checks for the existence of the specified dataset within the project, and creates it if it does not exist.
        2. Constructs a table specifically for the provided contract addresses, ensuring to overwrite existing data if necessary.
        3. Collects full traces of delegatecalls from the contracts, which are essential for identifying proxy behaviors.
        4. Identifies pairs of proxy and logic (or implementation) contracts from the delegatecall traces, highlighting active communication.
        5. Evaluates the active proxy status of each contract, marking them as either active or not based on the identified interactions.
        """
    
        # Ensure the required dataset exists; create it if it doesn't.
        if not self.dataset_exists():
            self.create_dataset()
    
        # Construct the contracts table from the provided addresses, with an option to override existing data.
        self.create_contracts_table(contracts_list,
                                    self.contracts_table_name,
                                    override=True)
    
        # Aggregate delegatecall traces from the constructed contracts table to analyze contract interactions.
        self.aggregate_delegatecall_traces(self.contracts_table_name,
                                           self.contracts_delegate_trace_table_name,
                                           override=True)
        
        # Identify active proxy and their corresponding logic or implementation contracts from the aggregated traces.
        self.identify_proxy_logic_pairs(self.contracts_delegate_trace_table_name,
                                        self.contracts_table_name,
                                        self.proxy_logic_pairs_table_name,
                                        override=True)
        
        # Evaluate and update the proxy status of each contract based on identified pairs and interactions.
        self.evaluate_active_proxy_status(self.contracts_table_name,
                                          self.proxy_logic_pairs_table_name,
                                          self.contracts_proxy_status_table_name,
                                          override=True)
        
        self.download_active_proxy_contracts(self.contracts_proxy_status_table_name, self.active_proxy_contracts_table_name, override = True)

    @overrides
    def identify_proxies_as_of(self, as_of_timestamp=Utils.STUDY_END_DATE):
        """
        Identifies all the active proxy contracts as of a specified timestamp and records their statuses in a dataset.
    
        This function systematically builds and updates a dataset with contract data relevant for identifying
        proxy contracts as of a given date. It integrates multiple data collection and
        processing steps to produce a comprehensive view of proxy contracts as af certain timestamp.
    
        Parameters:
            as_of_timestamp (str, optional): A string representation of the timestamp, formatted as 'YYYY-MM-DD',
                                             that defines the upper limit for data collection and processing. 
                                             Defaults to '2022-09-01' which replicates the data for our recent publication.
    
        Workflow:
            1. Checks for the existence of the specified dataset, creating it if it does not exist.
            2. Collects all deployed contract addresses up to the specified timestamp and stores them in a table.
            3. Aggregates delegate call traces from these contracts, focusing on their interactions up to the specified timestamp.
            4. Identifies and records pairs of proxy and logic/implementation contracts based on the collected traces.
            5. Evaluates and records each contract's proxy status (active or inactive) in the final table.
        """
    
        # Create a new dataset if it does not already exist
        if not self.dataset_exists():
            self.create_dataset()
    
        # Collect all the unique deployed contract addresses as of the given timestamp
        self.collect_contract_addresses_as_of(self.contracts_table_name,
                                              as_of_timestamp,
                                              override=True)
    
        # Collect all contracts' full traces where contracts at least delegated once to another address as of the specified timestamp
        self.aggregate_delegatecall_traces(self.contracts_table_name,
                                           self.contracts_delegate_trace_table_name,
                                           as_of_timestamp,
                                           override=True)
        
        # Detect all the proxy and logic/implementation contracts pairs for each proxy contract that actively communicated with its implementation contracts
        self.identify_proxy_logic_pairs(self.contracts_delegate_trace_table_name,
                                        self.contracts_table_name,
                                        self.proxy_logic_pairs_table_name,
                                        override=True)
        
        # Flag the given list of contracts with their proxy status i.e., either active or inactive
        self.evaluate_active_proxy_status(self.contracts_table_name,
                                          self.proxy_logic_pairs_table_name, 
                                          self.contracts_proxy_status_table_name,
                                          override=True)

        self.download_active_proxy_contracts(self.contracts_proxy_status_table_name, self.active_proxy_contracts_table_name, override = True)
        
        # self.export_table_to_gcs(bucket_name = "{}-{}".format(self.project_id, self.proxy_logic_pairs_table_name), target_table = self.proxy_logic_pairs_table_name, shard_name = "df")
        # self.download_bucket_from_gcs(bucket_name = "{}-{}".format(self.project_id, self.proxy_logic_pairs_table_name), output_dir_name = self.proxy_logic_pairs_table_name)
        # self.decompress_gz_files(target_directory = os.path.join(self.storage_bucket_compresssed_path, self.proxy_logic_pairs_table_name), output_dir_name = self.proxy_logic_pairs_table_name)        
    
    def load_active_proxy_logic_pairs(self):
        df_active_proxy_logic_pairs = Utils.multicore_read_csv(os.path.join(self.storage_bucket_decompresssed_path, self.proxy_logic_pairs_table_name), num_cores=40)
        if len(df_active_proxy_logic_pairs) == 0:
            raise ("please first identify active proxy logic pairs")
        else:
            return df_active_proxy_logic_pairs.groupby('from_address')['to_address'].apply(list).to_dict()
    
    
    def is_proxy(self, address):
        if address in self.active_proxy_logic_pairs:
            return True, self.active_proxy_logic_pairs[address]
        else:
            return False, []
            