In [3]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import openpyxl
from openpyxl.styles import Font
from time import time, localtime, strftime

class ExplorerCrawler:
    def __init__(self, workbook, sheet):
        self.color = Font(name="Arial", color="8f34eb")
        self.workbook = openpyxl.load_workbook(filename=workbook, data_only=True)
        self.sheet = self.workbook[sheet]

        self.options = webdriver.ChromeOptions()
        self.options.add_argument("headless")
        self.options.add_argument("--window-size=1920,1080")
        self.options.add_argument(f'user-agent=Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.50 Safari/537.36')
        
        self.driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=self.options)
        
        
        self.only_address_map = {
            'ETH': [ ['ETH', "ETC"], "https://etherscan.io/tx/", 
            "//*[@id='ContentPlaceHolder1_maintable']/div[5]/div[2]", True],
            'BSC': [ ['BSC'], "https://bscscan.com/tx/", 
            "//*[@id='ContentPlaceHolder1_maintable']/div[5]/div[2]", True],
            'TRX': [ ["TRX", "TRON", "TRC20USDT"], "https://tronscan.org/#/transaction/", 
            "//*[@id='root']/div[2]/main/div/div/div[3]/div[2]/div/div/div/table/tbody/tr[1]/td/span/div/span/div/div/span/div/a/div",
            False],
            "HECO": [ ["HECO", "HRC"], "https://hecoinfo.com/tx/", 
            "//*[@id='ContentPlaceHolder1_maintable']/div[5]/div[2]", True ],
            "MATIC": [ ["MATIC"], "https://polygonscan.com/tx/", 
            "//*[@id='ContentPlaceHolder1_maintable']/div[5]/div[2]", True ],
            "ETC": [ ["ETC"], "https://etcblockexplorer.com/tx/", 
            "//*[@id='wrap']/div/div[3]/div/div[2]/div[1]/div/table/tbody/tr/td/span/a", True ],
            "KLAY": [ ["KLAY"], "https://scope.klaytn.com/tx/", 
            "//*[@id='root']/div/div[2]/div[2]/div/div/div/div/div/div[1]/div[3]/div[2]/div/span[1]/div/a", True],
            "BTC": [ ["BTC"], "https://blockstream.info/tx/", 
            "//*[@id='transaction-box']/div[2]/div[3]", False],
            "BCH": [ ["BCH"], "https://blockchair.com/bitcoin-cash/transaction/", 
            "//*[@id='page-transaction-areas']/div[3]/div/div[3]/div/div/div/div/div[2]", False]
        }
        self.with_memo_map = {
            "ATOM": [ ["ATOM"], "https://atomscan.com/transactions/", 
            "//*[@id='app']/div[2]/div/div/div[2]/div/div[3]/div/article/div[2]/div[1]/div[2]/span/span/a/span",
            "//*[@id='app']/div[2]/div/div/div[2]/div/div[2]/div/div/div/div[6]/div[2]",
            False ],
            "EOS": [ ["EOS"], "https://eosflare.io/tx/", 
            "//*[@id='actions']/div[1]/div[3]/div/div[1]/a[1]",
            "//*[@id='actions']/div[1]/div[3]/div/div[2]",
            False ],
            "TERRA": [ ["TERRA"], "https://finder.terra.money/classic/tx/", 
            "//*[@id='root']/section/section/div[3]/div[2]/section[3]/div/a",
            "//*[@id='root']/section/section/div[2]/div[6]/div[2]",
            False ],
            "XRP": [ ["XRP"], "https://xrpscan.com/tx/",
            "//*[@id='root']/div/div/div/div/div[3]/div/div[1]/div[1]/div[2]/div[1]/div/table/tbody/tr[3]/td[2]/a",
            "//*[@id='root']/div/div/div/div/div[3]/div/div[1]/div[1]/div[2]/div[1]/div/table/tbody/tr[4]/td[2]/span/span/span",
            False ],
            "XLM": [ ["XLM"], "https://steexp.com/tx/",
            "//*[@id='operation-table']/tbody/tr/td[1]/span/a",
            "//*[@id='main-content']/div/div[1]/div/div[2]/table/tbody/tr[4]/td[2]",
            False ]
        }
        
    # workbook도 property로 넣을 수 있을까?

    def logger(func):
        def wrapper(*args, **kwargs):
            start = time()
            start_time = strftime("%Y-%m-%d %I:%M:%S", localtime(start))
            print(f"starts now! {start_time}")
            func(*args, **kwargs)

            end = time()
            end_time = strftime("%Y-%m-%d %I:%M:%S", localtime(end))
            print(f"completed at {end_time}, took {end - start } seconds")
        return wrapper
    
    # -------------------------------------------------------------------------------------------------------------------------

    @logger
    def get_ETH_address(self, start, target_network = "ETH"):
        for i in range(start, self.sheet.max_row + 1):
            network = self.sheet[f"L{str(i)}"].value
            if network in self.only_address_map[target_network][0]:
                tx_hash = self.sheet[f"S{str(i)}"].value
                if "Internal" in tx_hash or self.sheet[f"N{str(i)}"].value is not None: 
                    continue
                
                if self.only_address_map[target_network][3]:
                    tx_hash = tx_hash if tx_hash[:2] == "0x" else "0x" + tx_hash
                else:
                    pass
                self.driver.get(  self.only_address_map[target_network][1] + tx_hash )
                self.driver.implicitly_wait(10)
                try:
                    element = self.driver.find_element(by=By.XPATH, value= self.only_address_map[target_network][2])
                    self.driver.implicitly_wait(10)   
                    from_address = element.text
                    self.sheet[f"N{str(i)}"].value = from_address
                    print("row:", i, "from_address:", from_address, "txhash:", tx_hash)
                except:
                    print(f"row {i}: Wrong chain, wrong map or invalid existing tx_hash")
                self.sheet[f"N{str(i)}"].font = self.color
                self.driver.implicitly_wait(10)

        self.driver.quit()
        self.workbook.save(f"files/{target_network} 완료.xlsx")

    @logger
    def get_BSC_address(self, start, target_network = "BSC"):
        self.get_ETH_address(start, target_network)

    @logger
    def get_HECO_address(self, start, target_network = "HECO"):
        self.get_ETH_address(start, target_network)

    @logger
    def get_MATIC_address(self, start, target_network = "ETC"):
        self.get_ETH_address(start, target_network)
    
    @logger
    def get_ETC_address(self, start, target_network = "TRX"):
        self.get_ETH_address(start, target_network)


    @logger
    def get_TRX_address(self, start, target_network = "TRX"):
        # owner 인지, from인지에 따라 달라서 여러번에 걸쳐 했었음
        # 나중에 하자
        pass

    def _get_matched_from_address(self, STEP, QUANTITY, result, preprocessing_func = None):
        for i in range(len(result)):
            if i % STEP == 1:
                TOKEN_AMOUNT = result[i]
                AMOUNT, _ = TOKEN_AMOUNT.split()

                AMOUNT = preprocessing_func(AMOUNT)
                if round(QUANTITY, 3) == round(AMOUNT, 3):
                    return result[i-1]

    @logger
    def get_BTC_address(self, start, target_network = "BTC", preprocessing_func = float):
        for i in range(start, self.sheet.max_row + 1):
            network = self.sheet[f"L{str(i)}"].value
            if network in self.only_address_map[target_network][0]:
                tx_hash = self.sheet[f"S{str(i)}"].value
                if "Internal" in tx_hash or self.sheet[f"N{str(i)}"].value is not None: 
                    continue
                
                if self.only_address_map[target_network][3]:
                    tx_hash = tx_hash if tx_hash[:2] == "0x" else "0x" + tx_hash
                else:
                    pass
                self.driver.implicitly_wait(10)
                self.driver.get(  self.only_address_map[target_network][1] + tx_hash )
                self.driver.implicitly_wait(10)
                try:
                    element = self.driver.find_element(by=By.XPATH, value= self.only_address_map[target_network][2])
                    self.driver.implicitly_wait(10) 

                    QUANTITY = self.sheet[f"M{str(i)}"].value
                    result = element.text.split("\n")
                    from_address = self._get_matched_from_address(2, QUANTITY, result, preprocessing_func)

                    self.sheet[f"N{str(i)}"].value = from_address
                    print("row:", i, "from_address:", from_address, "txhash:", tx_hash)
                except:
                    print(f"row {i}: wrong chain, wrong map or invalid existing tx_hash")
                self.sheet[f"N{str(i)}"].font = self.color
                self.driver.implicitly_wait(10)

        self.driver.quit()
        self.workbook.save(f"files/{target_network} 2t 완료.xlsx")
    
    # 이거 두개를 한번에 하고 싶은데...
    @logger
    def get_BCH_address(self, start, target_network = "BCH", preprocessing_func = lambda AMOUNT : float(AMOUNT.replace(",","")) ):
        for i in range(start, self.sheet.max_row + 1):
            network = self.sheet[f"L{str(i)}"].value
            if network in self.only_address_map[target_network][0]:
                tx_hash = self.sheet[f"S{str(i)}"].value
                if "Internal" in tx_hash or self.sheet[f"N{str(i)}"].value is not None: 
                    continue
                if self.only_address_map[target_network][3]:
                    tx_hash = tx_hash if tx_hash[:2] == "0x" else "0x" + tx_hash
                else:
                    pass
                self.driver.implicitly_wait(10)
                self.driver.get( self.only_address_map[target_network][1] + tx_hash )
                self.driver.implicitly_wait(10)
                try:
                    element = self.driver.find_element(by=By.XPATH, value= self.only_address_map[target_network][2])
                    self.driver.implicitly_wait(10) 

                    QUANTITY = self.sheet[f"M{str(i)}"].value
                    result = element.text.split("\n")[2:]
                    from_address = self._get_matched_from_address(3, QUANTITY, result, preprocessing_func)

                    self.sheet[f"N{str(i)}"].value = from_address
                    print("row:", i, "from_address:", from_address, "txhash:", tx_hash)
                except:
                    print(f"row {i}: wrong chain, wrong map or invalid existing tx_hash")
                self.sheet[f"N{str(i)}"].font = self.color
                self.driver.implicitly_wait(10)

        self.driver.quit()
        self.workbook.save(f"files/{target_network} 2t 완료.xlsx")

    @logger
    def get_KLAY_address(self, start, target_network = "KLAY"):
        for i in range(start, self.sheet.max_row + 1):
            network = self.sheet[f"L{str(i)}"].value
            if network in self.only_address_map[target_network][0]:
                tx_hash = self.sheet[f"S{str(i)}"].value

                if "Internal" in tx_hash or self.sheet[f"N{str(i)}"].value is not None: 
                    continue
                
                if self.only_address_map[target_network][3]:
                    tx_hash = tx_hash if tx_hash[:2] == "0x" else "0x" + tx_hash
                else:
                    pass
                
                self.driver.get( self.only_address_map[target_network][1] + tx_hash )
                self.driver.implicitly_wait(10)
                try:
                    element = self.driver.find_element(by=By.XPATH, value= self.only_address_map[target_network][2])
                    self.driver.implicitly_wait(10)   
                    address = element.get_attribute('href').split("/")[-1]
                    exchange = element.text
                    from_address = address + " (" + exchange + ")"
                    self.sheet[f"N{str(i)}"].value = from_address 
                    print("row:", i, "from_address:", from_address, "txhash:", tx_hash)
                except:
                    print(f"row {i}: Wrong chain, wrong map or invalid existing tx_hash")
                self.sheet[f"N{str(i)}"].font = self.color
                self.driver.implicitly_wait(10)

        self.driver.quit()
        self.workbook.save(f"files/{target_network} 완료.xlsx")

    # ------------------------------------------------------------------------------------------------------------
    @logger
    def get_ATOM_address_memo(self, start, target_network = "ATOM"):
        for i in range(start, self.sheet.max_row + 1):
            network = self.sheet[f"L{str(i)}"].value
            if network in self.with_memo_map[target_network][0]:
                tx_hash = self.sheet[f"S{str(i)}"].value
                if "Internal" in tx_hash or self.sheet[f"N{str(i)}"].value is not None: #요게 잘못됨, 무조건 continue하게 되어있음
                    continue

                if self.with_memo_map[target_network][4]:
                    tx_hash = tx_hash if tx_hash[:2] == "0x" else "0x" + tx_hash
                else:
                    pass

                self.driver.get(  self.with_memo_map[target_network][1] + tx_hash )
                self.driver.implicitly_wait(10)
                try:
                    from_address_element = self.driver.find_element(by=By.XPATH, value=self.with_memo_map[target_network][2])
                    self.driver.implicitly_wait(10)
                    from_address = from_address_element.get_attribute("href").split("/")[-1]
                    self.sheet[f"N{str(i)}"].value = from_address
                    memo_element = self.driver.find_element(by=By.XPATH, value=self.with_memo_map[target_network][3])
                    self.driver.implicitly_wait(10)
                    memo = memo_element.text
                    print("row:", i, "from_address:", from_address, "memo:", memo, "txhash:", tx_hash)
                    self.sheet[f"O{str(i)}"].value = memo

                    num_written += 1    
                except:
                    print(f"row {i}: Wrong chain, wrong map or invalid existing tx_hash")

                self.sheet[f"N{str(i)}"].font = self.color
                self.sheet[f"O{str(i)}"].font = self.color
                self.driver.implicitly_wait(10)

        self.driver.quit()
        self.workbook.save(f"files/{target_network} 완료.xlsx")

    @logger
    def get_EOS_address_memo(self, start, target_network = "EOS"):
        pass

    @logger
    def get_TERRA_address_memo(self, start, target_network = "TERRA"):
        pass
    
    @logger
    def get_XRP_address_memo(self, start, target_network = "XRP"):
        from_address_href = from_address_element.get_attribute('href').split('/')[-1]
        from_address_description = from_address_element.text
        from_address = from_address_href + " (" + from_address_description+ ")"
        memo = memo_element.text
        memo = memo.split(":")[-1].strip()
        pass

    @logger
    def get_XLM_address_memo(self, start, target_network = "XLM"):

        from_address = former.get_attribute("href").split("/")[-1]
        memo = latter.text
        pass


In [4]:
filename = "files/BTC 완료.xlsx"
ec = ExplorerCrawler(filename, "입금")
ec.get_BCH_address(1)




[WDM] - Current google-chrome version is 103.0.5060
[WDM] - Get LATEST chromedriver version for 103.0.5060 google-chrome
[WDM] - Driver [/Users/bbrick/.wdm/drivers/chromedriver/mac64/103.0.5060.24/chromedriver] found in cache


TypeError: __init__() got an unexpected keyword argument 'service'