In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
from webdriver_manager.chrome import ChromeDriverManager
import time
import numpy as np
import pandas as pd

In [2]:

def get_data(driver: webdriver.Chrome, target_data: dict) -> str:
    

    isnew_selector = Select(driver.find_element(By.ID, "isnew"))
    co_id_field = driver.find_element(By.ID, "co_id")
    year_field = driver.find_element(By.ID, "year")

    isnew_selector.select_by_value(target_data['isnew'])
    co_id_field.send_keys(target_data["co_id"])
    year_field.send_keys(target_data["year"])

    

    submit_button = driver.find_element(By.XPATH, "//input[@value=' 查詢 ']")
    submit_button.click()
    
    co_id_field.clear()
    year_field.clear()
    return driver.page_source


def web_interact(co_ids: list, year_start=110, year_end=112):
    url = "https://mops.twse.com.tw/mops/web/t163sb15"

    chrome_options = Options()
    chrome_options.add_argument("--headless")  # 啟用無頭模式
    chrome_options.add_argument("--window-size=1920,1080")  # 設置窗口大小，避免某些元素無法加載

    # 初始化 WebDriver
    driver = webdriver.Chrome(
        service=Service(ChromeDriverManager().install()),
        options=chrome_options
    )

    # 加載網站
    driver.get(url)
    driver.implicitly_wait(10)

    all_company_page_src = {}


    for co_id in co_ids:
        page_sources = []

        for year in range(year_start, year_end + 1):
            target_data = {
                "isnew" : "false",
                "co_id" : co_id,
                "year" : year
            }
            get_data(driver, target_data)
            time.sleep(3)
            print(year)
            page_sources.append(get_table_data(driver))

        all_company_page_src[co_id] = page_sources

    driver.quit()


    return all_company_page_src


def get_table_data(driver):
    table = driver.find_element(By.CLASS_NAME, "hasBorder")
    rows = table.find_elements(By.TAG_NAME, "tr")

    #提取表格數據
    data = {}
    for i in range(1, len(rows)):
        row = rows[i]
        try:
            cells = row.find_elements(By.TAG_NAME, "td")
            head = row.find_element(By.TAG_NAME, "th").text
            data[head] = [cell_data_type_change(cell.text) for cell in cells]
        except Exception as e:
            print(e)
    return pd.DataFrame(data)


def cell_data_type_change(cell_text):
    if cell_text == '-':
        return np.nan
    else:
        return float(str(cell_text).replace(',', ''))


In [None]:
year_start = 102
year_end = 113
test_data = web_interact([2330], year_start=year_start, year_end=year_end)

102
103
104
105
106
107
108
109
110
111
112
113


In [27]:

for i in range(len(test_data[2330])):
    td = test_data[2330][i]
    td.to_csv(f'./{i + year_start}.csv', index=False)
