In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import openpyxl
from openpyxl.styles import Font
import time

In [3]:
class ExplorerCrawler:
    def __init__(self, workbook, sheet):
        self.color = Font(name="Arial", color="8f34eb")
        self.workbook = openpyxl.load_workbook(filename=workbook, data_only=True)

        self.options = webdriver.ChromeOptions()
        self.options.add_argument("headless")
        self.options.add_argument("--window-size=1920,1080")
        self.options.add_argument(f'user-agent=Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.50 Safari/537.36')

        self.only_address_map = {
            'ETH': [ ['ETH', "ETC"], "https://etherscan.io/tx/", 
            "//*[@id='ContentPlaceHolder1_maintable']/div[5]/div[2]", True],
            'BSC': [ ['BSC'], "https://bscscan.com/tx/", 
            "//*[@id='ContentPlaceHolder1_maintable']/div[5]/div[2]", True],
            'TRX': [ ["TRX", "TRON", "TRC20USDT"], "https://tronscan.org/#/transaction/", 
            "//*[@id='root']/div[2]/main/div/div/div[3]/div[2]/div/div/div/table/tbody/tr[1]/td/span/div/span/div/div/span/div/a/div",
            False],
            "HECO": [ ["HECO", "HRC"], "https://hecoinfo.com/tx/", 
            "//*[@id='ContentPlaceHolder1_maintable']/div[5]/div[2]", True ],
            "MATIC": [ ["MATIC"], "https://polygonscan.com/tx/", 
            "//*[@id='ContentPlaceHolder1_maintable']/div[5]/div[2]", True ],
            "ETC": [ ["ETC"], "https://etcblockexplorer.com/tx/", 
            "//*[@id='txSpecific']/span[19]", True ],
            "KLAY": [ ["KLAY"], "https://scope.klaytn.com/tx/", 
            "//*[@id='root']/div/div[2]/div[2]/div/div/div/div/div/div[1]/div[3]/div[2]/div/span[1]/div/a", True],
            "BTC": [ ["KLAY"], "https://blockstream.info/tx/", 
            "//*[@id='root']/div/div[2]/div[2]/div/div/div/div/div/div[1]/div[3]/div[2]/div/span[1]/div/a", False]
        }
        self.with_memo_map = {
            "ATOM": [ ["ATOM"], "https://atomscan.com/transactions/", 
            "//*[@id='app']/div[2]/div/div/div[2]/div/div[3]/div/article/div[2]/div[1]/div[2]/span/span/a/span",
            "//*[@id='app']/div[2]/div/div/div[2]/div/div[2]/div/div/div/div[6]/div[2]",
            False ],
            "EOS": [ ["EOS"], "https://eosflare.io/tx/", 
            "//*[@id='actions']/div[1]/div[3]/div/div[1]/a[1]",
            "//*[@id='actions']/div[1]/div[3]/div/div[2]",
            False ],
            "TERRA": [ ["TERRA"], "https://finder.terra.money/classic/tx/", 
            "//*[@id='root']/section/section/div[3]/div[2]/section[3]/div/a",
            "//*[@id='root']/section/section/div[2]/div[6]/div[2]",
            False ],
            "XRP": [ ["XRP"], "https://xrpscan.com/tx/",
            "//*[@id='root']/div/div/div/div/div[3]/div/div[1]/div[1]/div[2]/div[1]/div/table/tbody/tr[3]/td[2]/a",
            "//*[@id='root']/div/div/div/div/div[3]/div/div[1]/div[1]/div[2]/div[1]/div/table/tbody/tr[4]/td[2]/span/span/span",
            False ],
            "XLM": [ ["XLM"], "https://steexp.com/tx/",
            "//*[@id='operation-table']/tbody/tr/td[1]/span/a",
            "//*[@id='main-content']/div/div[1]/div/div[2]/table/tbody/tr[4]/td[2]",
            False ]
        }
        
    def _get_webdriver(self, options):
        driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)    
        return driver

    def _get_sheet(self):
        workbook = self.workbook
        sheet = workbook[self.sheet]
        return sheet

    def only_address(self, func):
        def wrapper(start, target_network):
            num_written = 0
            start_time = time.time()
            driver = self._get_webdriver(self.options)
            sheet = self._get_sheet()
            
            for i in range(start, sheet.max_row + 1):
                network = sheet[f"L{str(i)}"].value
                if network in  self.only_address_map[target_network][0]:
                    tx_hash = sheet[f"S{str(i)}"].value
                    if "Internal" in tx_hash or sheet[f"N{str(i)}"].value is not None: #요게 잘못됨, 무조건 continue하게 되어있음
                        continue
                    # invalid할 때
                    if self.only_address_map[target_network][4]:
                        tx_hash = tx_hash if tx_hash[:2] == "0x" else "0x" + tx_hash
                    else:
                        pass
                    driver.get(  self.only_address_map[target_network][1] + tx_hash )
                    driver.implicitly_wait(10)
                    try:
                        func(start, target_network)
                    except:
                        # sheet[f"N{str(i)}"].value = ""
                        print("Wrong chain or invalid existing tx_hash")

                    sheet[f"N{str(i)}"].font = self.color
                    driver.implicitly_wait(5)
            
            driver.quit()
            print(f"written {num_written}, took {time.time() - start_time} seconds")
        return wrapper

    def with_memo(self, func):
        def wrapper(start, target_network):
            num_written = 0
            start_time = time.time()
            driver = self._get_webdriver(self.options)
            sheet = self._get_sheet()
            
            for i in range(start, sheet.max_row + 1):
                network = sheet[f"L{str(i)}"].value
                if network in  self.with_memo_map[target_network][0]:
                    tx_hash = sheet[f"S{str(i)}"].value
                    if "Internal" in tx_hash or sheet[f"N{str(i)}"].value is not None: #요게 잘못됨, 무조건 continue하게 되어있음
                        continue
                    # invalid할 때
                    if self.with_memo_map[target_network][4]:
                        tx_hash = tx_hash if tx_hash[:2] == "0x" else "0x" + tx_hash
                    else:
                        pass
                    driver.get( self.with_memo_map[target_network][1] + tx_hash )
                    driver.implicitly_wait(10)
                    try:
                        func(start, target_network)
                    except:
                        # sheet[f"N{str(i)}"].value = ""
                        print("Wrong chain or invalid existing tx_hash")

                    sheet[f"N{str(i)}"].font = self.color
                    sheet[f"O{str(i)}"].font = self.color
                    driver.implicitly_wait(5)
            
            driver.quit()
            print(f"written {num_written}, took {time.time() - start_time} seconds")
        return wrapper

    # ------------------------------------------------------------------------------------------------------------------------------------
    @only_address
    def get_BTC_address(self, start, target_address = "BTC"):
        from_address_element = driver.find_element(by=By.XPATH, value=self.only_address_map[target_network][2])
        driver.implicitly_wait(10)
        from_address = from_address_element.get_attribute("href").split("/")[-1]
        memo_element = driver.find_element(by=By.XPATH, value=self.only_address_map[target_network][3])
        driver.implicitly_wait(10)
        memo = memo_element.t
        print("row:", i, "from_address:", from_address, "memo:", memo, "txhash:", tx_hash)
        sheet[f"N{str(i)}"].value = from_address
        sheet[f"O{str(i)}"].value = memo
        num_written += 1  
        pass

    @only_address
    def get_ETH_address(self, start, target_address = "ETH"):
        pass

    @only_address
    def get_BSC_address(self, start, target_address = "BSC"):
        pass

    @only_address
    def get_TRX_address(self, start, target_address = "TRX"):
        pass

    @only_address
    def get_HECO_address(self, start, target_address = "HECO"):
        pass

    @only_address
    def get_ETC_address(self, start, target_address = "ETC"):
        pass

    @only_address
    def get_KLAY_address(self, start, target_address = "KLAY"):
        pass

    @with_memo
    def get_ATOM_address_memo(self, start, target_address = "ATOM"):
        pass

    @with_memo
    def get_EOS_address_memo(self, start, target_address = "EOS"):
        pass

    @with_memo
    def get_TERRA_address_memo(self, start, target_address = "TERRA"):
        pass
    
    @with_memo
    def get_XRP_address_memo(self, start, target_address = "XRP"):
        from_address_href = from_address_element.get_attribute('href').split('/')[-1]
        from_address_description = from_address_element.text
        from_address = from_address_href + " (" + from_address_description+ ")"
        memo = memo_element.text
        memo = memo.split(":")[-1].strip()
        pass

    @with_memo
    def get_XLM_address_memo(self, start, target_address = "XLM"):

        from_address = former.get_attribute("href").split("/")[-1]
        memo = latter.text
        pass
