In [12]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import openpyxl
from openpyxl.styles import Font
from time import time, localtime, strftime, sleep

class ExplorerCrawler:
    def __init__(self, workbook, sheet):
        self.color = Font(name="Arial", color="ff8085")
        self.workbook = openpyxl.load_workbook(filename=workbook, data_only=True)
        self.sheet = self.workbook[sheet]

        self.options = webdriver.ChromeOptions()
        self.options.add_argument("headless")
        self.options.add_argument("--window-size=1920,1080")
        self.options.add_argument(f'user-agent=Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.50 Safari/537.36')
        
        self.driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=self.options)
        
        self.only_address_map = {
            'ETH': [ ['ETH', "ETC"], "https://etherscan.io/tx/", 
            "//*[@id='ContentPlaceHolder1_maintable']/div[5]/div[2]", True],
            'BSC': [ ['BSC'], "https://bscscan.com/tx/", 
            "//*[@id='ContentPlaceHolder1_maintable']/div[5]/div[2]", True],
            'TRX': [ ["TRX", "TRON", "TRC20USDT"], "https://tronscan.org/#/transaction/", 
            "//*[@id='root']/div[2]/main/div/div/div[3]/div[2]/div/div/div/table/tbody/tr[1]/td/span/div/span/div/div/span/div/a/div",
            False],
            "HECO": [ ["HECO", "HRC"], "https://hecoinfo.com/tx/", 
            "//*[@id='ContentPlaceHolder1_maintable']/div[5]/div[2]", True ],
            "MATIC": [ ["MATIC"], "https://polygonscan.com/tx/", 
            "//*[@id='ContentPlaceHolder1_maintable']/div[5]/div[2]", True ],
            "ETC": [ ["ETC"], "https://etcblockexplorer.com/tx/", 
            "//*[@id='wrap']/div/div[3]/div/div[2]/div[1]/div/table/tbody/tr/td/span/a", True ],
            "KLAY": [ ["KLAY"], "https://scope.klaytn.com/tx/", 
            "//*[@id='root']/div/div[2]/div[2]/div/div/div/div/div/div[1]/div[3]/div[2]/div/span[1]/div/a", True],
            "BTC": [ ["BTC"], "https://blockchair.com/bitcoin/transaction/", 
            "//*[@id='page-transaction-areas']/div[4]/div/div[3]/div/div/div/div/div[1]", False],
            "BCH": [ ["BCH"], "https://blockchair.com/bitcoin-cash/transaction/", 
            "//*[@id='page-transaction-areas']/div[3]/div/div[3]/div/div/div/div/div[1]", False],
            "LTC": [ ["LTC"], "https://blockchair.com/litecoin/transaction/", 
            "//*[@id='page-transaction-areas']/div[3]/div/div[3]/div/div/div/div/div[1]", False],
            "SOL": [ ["SOL"], "https://solscan.io/tx/", 
            "//*[@id='rc-tabs-1-panel-1']/section[1]/div/div[7]/div[2]/div[2]/div/div[1]/a", False]
        }
        self.with_memo_map = {
            "ATOM": [ ["ATOM"], "https://atomscan.com/transactions/", 
            "//*[@id='app']/div[2]/div/div/div[2]/div/div[3]/div/article/div[2]/div[1]/div[2]/span/span/a/span",
            "//*[@id='app']/div[2]/div/div/div[2]/div/div[2]/div/div/div/div[6]/div[2]",
            False ],
            "EOS": [ ["EOS"], "https://eosflare.io/tx/", 
            "//*[@id='actions']/div[1]/div[3]/div/div[1]/a[1]",
            "//*[@id='actions']/div[1]/div[3]/div/div[2]",
            False ],
            "TERRA": [ ["TERRA"], "https://finder.terra.money/classic/tx/", 
            "//*[@id='root']/section/section/div[3]/div[2]/section[3]/div/a",
            "//*[@id='root']/section/section/div[2]/div[6]/div[2]",
            False ],
            "XRP": [ ["XRP"], "https://xrpscan.com/tx/",
            "//*[@id='root']/div/div/div/div/div[3]/div/div[1]/div[1]/div[2]/div[1]/div/table/tbody/tr[3]/td[2]/a",
            "//*[@id='root']/div/div/div/div/div[3]/div/div[1]/div[1]/div[2]/div[1]/div/table/tbody/tr[4]/td[2]/span/span/span",
            False ],
            "XLM": [ ["XLM"], "https://steexp.com/tx/",
            "//*[@id='operation-table']/tbody/tr/td[1]/span/a",
            "//*[@id='main-content']/div/div[1]/div/div[2]/table/tbody/tr[4]/td[2]",
            False ]
        }
        
    # workbook도 property로 넣을 수 있을까?
    def __del__(self):
        self.driver.quit()

    def save_in(self, filename):
        self.workbook.save(f"files/{filename}.xlsx")

    def logger(func):
        def wrapper(*args, **kwargs):
            start = time()
            start_time = strftime("%Y-%m-%d %I:%M:%S", localtime(start))
            print(f"starts now! {start_time}")
            func(*args, **kwargs)

            end = time()
            end_time = strftime("%Y-%m-%d %I:%M:%S", localtime(end))
            print(f"completed at {end_time}, took {round(end - start, 2)} seconds")
        return wrapper
    
    # -------------------------------------------------------------------------------------------------------------------------

    @logger
    def get_ETH_address(self, target_network = "ETH"):
        for i in range(1, self.sheet.max_row + 1):
            network = self.sheet[f"L{str(i)}"].value
            if network in self.only_address_map[target_network][0]:
                tx_hash = self.sheet[f"S{str(i)}"].value
                if "Internal" in tx_hash or self.sheet[f"N{str(i)}"].value is not None: 
                    continue
                
                if self.only_address_map[target_network][3]:
                    tx_hash = tx_hash if tx_hash[:2] == "0x" else "0x" + tx_hash
                else:
                    pass

                try:
                    self.driver.get(  self.only_address_map[target_network][1] + tx_hash )
                    sleep(1)    
                    self.driver.implicitly_wait(10)
                except Exception as e:
                    print(e)
                    print(f"row {i}: wrong chain explorer url or tx_hash")

                try:
                    element = self.driver.find_element(by=By.XPATH, value= self.only_address_map[target_network][2])
                    self.driver.implicitly_wait(10)   
                    sleep(1)    
                    from_address = element.text
                except Exception as e:
                    print(e)
                    print(f"row {i}: wrong from_address XPATH")

                self.sheet[f"N{str(i)}"].value = from_address
                self.sheet[f"N{str(i)}"].font = self.color
                self.driver.implicitly_wait(10)
                print("row:", i, "from_address:", from_address, "txhash:", tx_hash)

    def get_BSC_address(self, target_network = "BSC"):
        self.get_ETH_address(target_network)

    def get_HECO_address(self, target_network = "HECO"):
        self.get_ETH_address(target_network)

    def get_MATIC_address(self, target_network = "ETC"):
        self.get_ETH_address(target_network)
    
    def get_ETC_address(self, target_network = "TRX"):
        self.get_ETH_address(target_network)

    def get_KLAY_address(self, target_network = "KLAY"):
        self.get_ETH_address(target_network)

    @logger
    def get_TRX_address(self, target_network = "TRX"):
        # owner일때 cdce7c5405bf53aa9f64f08796eca05642d16425dcc5aa641dfcb6a2a6eaf662
        # from일때 몰겠음
        for i in range(1, self.sheet.max_row + 1):
            network = self.sheet[f"L{str(i)}"].value
            if network in self.only_address_map[target_network][0]:
                tx_hash = self.sheet[f"S{str(i)}"].value
                if "Internal" in tx_hash or self.sheet[f"N{str(i)}"].value is not None: 
                    continue
                if self.only_address_map[target_network][3]:
                    tx_hash = tx_hash if tx_hash[:2] == "0x" else "0x" + tx_hash
                else:
                    pass
                try:
                    self.driver.get(  self.only_address_map[target_network][1] + tx_hash )
                    self.driver.implicitly_wait(10)
                    sleep(1)  
                except Exception as e:
                    print(e)
                    print(f"row {i}: wrong chain explorer url or tx_hash")

                try:
                    element = self.driver.find_element(by=By.XPATH, value= self.only_address_map[target_network][2])
                    self.driver.implicitly_wait(5)   
                    sleep(1)    
                    from_address_href = element.get_attribute("href").split("/")[-1]
                    from_address_description = element.get_attribute("commonlabel")
                    if from_address_href == from_address_description:
                        from_address = from_address_href
                    else:
                        from_address = from_address_href + " (" + from_address_description+ ")"
                    print(f"row {i}: TRX Adrress")   
                except:
                    element = self.driver.find_element(by=By.XPATH, value= "//*[@id='root']/div[2]/main/div/div/div[3]/div[2]/div/div/div/table/tbody/tr[1]/td/span/div/span/div/div/span/div/a")
                    self.driver.implicitly_wait(5)   
                    sleep(1)    
                    from_address = element.get_attribute("href").split("/")[-1]
                    print(f"row {i}: TRX FromAdrress")   

                self.sheet[f"N{str(i)}"].value = from_address
                self.sheet[f"N{str(i)}"].font = self.color
                self.driver.implicitly_wait(10)
                print("row:", i, "from_address:", from_address, "txhash:", tx_hash)

    @logger
    def get_SOL_address(self, target_network = "SOL"):
        for i in range(1, self.sheet.max_row + 1):
            network = self.sheet[f"L{str(i)}"].value
            if network in self.only_address_map[target_network][0]:
                tx_hash = self.sheet[f"S{str(i)}"].value
                if "Internal" in tx_hash or self.sheet[f"N{str(i)}"].value is not None: 
                    continue
                if self.only_address_map[target_network][3]:
                    tx_hash = tx_hash if tx_hash[:2] == "0x" else "0x" + tx_hash
                else:
                    pass
                try:
                    self.driver.get(  self.only_address_map[target_network][1] + tx_hash )
                    self.driver.implicitly_wait(10)
                    sleep(1)  
                except Exception as e:
                    print(e)
                    print(f"row {i}: wrong chain explorer url or tx_hash")

                try:
                    element = self.driver.find_element(by=By.XPATH, value= self.only_address_map[target_network][2])
                    self.driver.implicitly_wait(10)   
                    sleep(1)    
                    from_address = element.get_attribute("href").split("/")[-1]
                except Exception as e:
                    print(e)
                    print(f"row {i}: wrong from_address XPATH")   

                self.sheet[f"N{str(i)}"].value = from_address
                self.sheet[f"N{str(i)}"].font = self.color
                self.driver.implicitly_wait(10)
                print("row:", i, "from_address:", from_address, "txhash:", tx_hash)

    def _get_matched_from_address(self, STEP, QUANTITY, result, preprocessing_func = None):
        for i in range(len(result)):
            if i % STEP == 1:
                TOKEN_AMOUNT = result[i]
                AMOUNT, _ = TOKEN_AMOUNT.split()

                AMOUNT = preprocessing_func(AMOUNT)
                if round(QUANTITY, 3) == round(AMOUNT, 3):
                    return result[i-1]

    @logger
    def get_BTC_address(self, target_network = "BTC"):
        for i in range(1, self.sheet.max_row + 1):
            network = self.sheet[f"L{str(i)}"].value
            if network in self.only_address_map[target_network][0]:
                tx_hash = self.sheet[f"S{str(i)}"].value
                if "Internal" in tx_hash or self.sheet[f"N{str(i)}"].value is not None: 
                    continue
                
                if self.only_address_map[target_network][3]:
                    tx_hash = tx_hash if tx_hash[:2] == "0x" else "0x" + tx_hash
                else:
                    pass
                
                try:
                    self.driver.get(  self.only_address_map[target_network][1] + tx_hash )
                    sleep(1)
                    self.driver.implicitly_wait(10)
                except Exception as e:
                    print(e)
                    print(f"row {i}: wrong chain explorer url or tx_hash")

                try:
                    element = self.driver.find_element(by=By.XPATH, value= self.only_address_map[target_network][2])
                    sleep(1)    
                    self.driver.implicitly_wait(10) 

                    # QUANTITY = self.sheet[f"M{str(i)}"].value
                    result = element.text.split("\n")
                    if result[1] != "1":
                        continue
                    # from_address = self._get_matched_from_address(2, QUANTITY, result, preprocessing_func)
                    from_address = result[2]
                except Exception as e:
                    print(e)
                    print(f"row {i}: wrong from_address XPATH")    
                
                self.sheet[f"N{str(i)}"].value = from_address
                self.sheet[f"N{str(i)}"].font = self.color
                self.driver.implicitly_wait(10)
                print("row:", i, "from_address:", from_address, "txhash:", tx_hash)
    
    def get_BCH_address(self, target_network = "BCH"):
        self.get_BTC_address(target_network)

    def get_LTC_address(self, target_network = "LTC"):
        self.get_BTC_address(target_network)


    # ------------------------------------------------------------------------------------------------------------
    @logger
    def get_ATOM_address_memo(self, target_network = "ATOM"):
        for i in range(1, self.sheet.max_row + 1):
            network = self.sheet[f"L{str(i)}"].value
            if network in self.with_memo_map[target_network][0]:
                tx_hash = self.sheet[f"S{str(i)}"].value
                if "Internal" in tx_hash or self.sheet[f"N{str(i)}"].value is not None: #요게 잘못됨, 무조건 continue하게 되어있음
                    continue

                if self.with_memo_map[target_network][4]:
                    tx_hash = tx_hash if tx_hash[:2] == "0x" else "0x" + tx_hash
                else:
                    pass
                
                try:
                    self.driver.get(  self.with_memo_map[target_network][1] + tx_hash )
                    self.driver.implicitly_wait(10)
                    sleep(3)
                except Exception as e:
                    print(e)
                    print(f"row {i}: wrong chain explorer url or tx_hash")

                try:
                    from_address_element = self.driver.find_element(by=By.XPATH, value=self.with_memo_map[target_network][2])
                    self.driver.implicitly_wait(10)
                    sleep(3)
                    from_address = from_address_element.text
                except Exception as e:
                    print(e)
                    print(f"row {i}: wrong from_address XPATH")

                try:
                    memo_element = self.driver.find_element(by=By.XPATH, value=self.with_memo_map[target_network][3])
                    self.driver.implicitly_wait(10)
                    sleep(3)
                    memo = memo_element.text
                    memo = "" if memo == "<No Memo>" or memo == "-" else memo
                except Exception as e:
                    print(e)
                    print(f"row {i}: wrong memo XPATH")

                self.sheet[f"N{str(i)}"].value = from_address
                self.sheet[f"O{str(i)}"].value = memo
                self.sheet[f"N{str(i)}"].font = self.color
                self.sheet[f"O{str(i)}"].font = self.color
                print("row:", i, "from_address:", from_address, "memo:", memo, "txhash:", tx_hash)
                self.driver.implicitly_wait(10)

    def get_EOS_address_memo(self, target_network = "EOS"):
        self.get_ATOM_address_memo(self, target_network)

    def get_TERRA_address_memo(self, target_network = "TERRA"):
        self.get_ATOM_address_memo(self, target_network)
    
    @logger
    def get_XRP_address_memo(self, target_network = "XRP"):
        for i in range(1, self.sheet.max_row + 1):
            network = self.sheet[f"L{str(i)}"].value
            if network in self.with_memo_map[target_network][0]:
                tx_hash = self.sheet[f"S{str(i)}"].value
                if "Internal" in tx_hash or self.sheet[f"N{str(i)}"].value is not None: #요게 잘못됨, 무조건 continue하게 되어있음
                    continue

                if self.with_memo_map[target_network][4]:
                    tx_hash = tx_hash if tx_hash[:2] == "0x" else "0x" + tx_hash
                else:
                    pass
                
                try:
                    self.driver.get(  self.with_memo_map[target_network][1] + tx_hash )
                    self.driver.implicitly_wait(10)
                    sleep(3)
                except Exception as e:
                    print(e)
                    print(f"row {i}: wrong chain explorer url or tx_hash")

                try:
                    from_address_element = self.driver.find_element(by=By.XPATH, value=self.with_memo_map[target_network][2])
                    self.driver.implicitly_wait(10)
                    sleep(3)
                    from_address_href = from_address_element.get_attribute('href').split('/')[-1]
                    from_address_description = from_address_element.text
                    if from_address_href == from_address_description:
                        from_address = from_address_href
                    else:
                        from_address = from_address_href + " (" + from_address_description+ ")"
                except Exception as e:
                    print(e)
                    print(f"row {i}: wrong from_address XPATH")

                try:
                    memo_element = self.driver.find_element(by=By.XPATH, value=self.with_memo_map[target_network][3])
                    self.driver.implicitly_wait(10)
                    sleep(3)
                    memo = memo_element.text
                    memo = memo.split(":")[-1].strip()
                except Exception as e:
                    print(e)
                    print(f"row {i}: wrong memo XPATH")

                self.sheet[f"N{str(i)}"].value = from_address
                self.sheet[f"O{str(i)}"].value = memo
                self.sheet[f"N{str(i)}"].font = self.color
                self.sheet[f"O{str(i)}"].font = self.color
                print("row:", i, "from_address:", from_address, "memo:", memo, "txhash:", tx_hash)
                self.driver.implicitly_wait(10)

    @logger
    def get_XLM_address_memo(self, target_network = "XLM"):
        for i in range(1, self.sheet.max_row + 1):
            network = self.sheet[f"L{str(i)}"].value
            if network in self.with_memo_map[target_network][0]:
                tx_hash = self.sheet[f"S{str(i)}"].value
                if "Internal" in tx_hash or self.sheet[f"N{str(i)}"].value is not None: #요게 잘못됨, 무조건 continue하게 되어있음
                    continue

                if self.with_memo_map[target_network][4]:
                    tx_hash = tx_hash if tx_hash[:2] == "0x" else "0x" + tx_hash
                else:
                    pass
                
                try:
                    self.driver.get(  self.with_memo_map[target_network][1] + tx_hash )
                    self.driver.implicitly_wait(10)
                    sleep(3)
                except Exception as e:
                    print(e)
                    print(f"row {i}: wrong chain explorer url or tx_hash")

                try:
                    from_address_element = self.driver.find_element(by=By.XPATH, value=self.with_memo_map[target_network][2])
                    self.driver.implicitly_wait(10)
                    sleep(3)
                    from_address_href = from_address_element.get_attribute('href').split('/')[-1]
                    from_address_description = from_address_element.text
                    if from_address_href == from_address_description:
                        from_address = from_address_href
                    else:
                        from_address = from_address_href + " (" + from_address_description+ ")"
                except Exception as e:
                    print(e)
                    print(f"row {i}: wrong from_address XPATH")

                try:
                    memo_element = self.driver.find_element(by=By.XPATH, value=self.with_memo_map[target_network][3])
                    self.driver.implicitly_wait(10)
                    sleep(3)
                    memo = memo_element.text
                except Exception as e:
                    print(e)
                    print(f"row {i}: wrong memo XPATH")

                self.sheet[f"N{str(i)}"].value = from_address
                self.sheet[f"O{str(i)}"].value = memo
                self.sheet[f"N{str(i)}"].font = self.color
                self.sheet[f"O{str(i)}"].font = self.color
                print("row:", i, "from_address:", from_address, "memo:", memo, "txhash:", tx_hash)
                self.driver.implicitly_wait(10)



In [10]:
filename = "files/Bbrick_취합파일 정리_2200624 (거래소정리 이걸로).xlsx"
ec = ExplorerCrawler(filename, "입금")
ec.get_ATOM_address_memo()
ec.save("ATOM 제발")
del ec





[WDM] - Current google-chrome version is 103.0.5060
[WDM] - Get LATEST chromedriver version for 103.0.5060 google-chrome
[WDM] - Driver [/Users/bbrick/.wdm/drivers/chromedriver/mac64/103.0.5060.53/chromedriver] found in cache


starts now! 2022-06-29 05:58:25
row: 58 from_address: cosmos1yp8q44e4t4evksnczmzlw0ngxj4hwuv5rjw3a8 memo:  txhash: 2B8C6B92924225EB364BF5062B78D32568044957079778698743D07157CCCDD3
row: 100 from_address: cosmos1yp8q44e4t4evksnczmzlw0ngxj4hwuv5rjw3a8 memo:  txhash: FCDB2B2BE663ABD1AD7773A24AD18B884D185B3DD028FD956195EFE4360D4618
row: 102 from_address: cosmos1yp8q44e4t4evksnczmzlw0ngxj4hwuv5rjw3a8 memo:  txhash: 7197A9EFE9A48F6171C16DEA6C64A551AD168AE87F0B8E47A887A20A7379D087
row: 125 from_address: cosmos1yp8q44e4t4evksnczmzlw0ngxj4hwuv5rjw3a8 memo:  txhash: 79B4D9805F7D66B0CBA7246F5C80E1D044665CC3A35E92E7628D9DC987EAC526
row: 611 from_address: cosmos1yp8q44e4t4evksnczmzlw0ngxj4hwuv5rjw3a8 memo:  txhash: C1B56E9CCBDC8B1936C9774132357184BC69AC72709CBFF53FF879F8E01AE609
row: 813 from_address: cosmos1xxkueklal9vejv9unqu80w9vptyepfa95pd53u memo:  txhash: 629D4CE834F0B3779968B613FBC76C6B83276131170B4E24558C26F5A6690BC5
row: 868 from_address: cosmos1t3vqgqjlk0zzcx32juhqqhpn5aar0gus63lnnp memo: 