In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
from webdriver_manager.chrome import ChromeDriverManager
import time
import numpy as np
import pandas as pd

In [4]:

def get_data(driver: webdriver.Chrome, target_data: dict) -> str:
    

    isnew_selector = Select(driver.find_element(By.ID, "isnew"))
    co_id_field = driver.find_element(By.ID, "co_id")
    year_field = driver.find_element(By.ID, "year")

    isnew_selector.select_by_value(target_data['isnew'])
    co_id_field.send_keys(target_data["co_id"])
    year_field.send_keys(target_data["year"])

    

    submit_button = driver.find_element(By.XPATH, "//input[@value=' 查詢 ']")
    submit_button.click()
    
    co_id_field.clear()
    year_field.clear()
    return driver.page_source


def web_interact(co_ids: list, year_start=110, year_end=112):
    url = "https://mops.twse.com.tw/mops/web/t163sb15"

    chrome_options = Options()
    chrome_options.add_argument("--headless")  # 啟用無頭模式
    chrome_options.add_argument("--window-size=1920,1080")  # 設置窗口大小，避免某些元素無法加載

    # 初始化 WebDriver
    driver = webdriver.Chrome(
        service=Service(ChromeDriverManager().install()),
        options=chrome_options
    )

    # 加載網站
    driver.get(url)
    driver.implicitly_wait(10)

    all_company_page_src = {}


    for co_id in co_ids:
        page_sources = []

        for year in range(year_start, year_end + 1):
            target_data = {
                "isnew" : "false",
                "co_id" : co_id,
                "year" : year
            }
            get_data(driver, target_data)
            time.sleep(3)
            print(year)
            page_sources.append(get_table_data(driver))

        all_company_page_src[co_id] = page_sources

    driver.quit()


    return all_company_page_src


def get_table_data(driver):
    table = driver.find_element(By.CLASS_NAME, "hasBorder")
    rows = table.find_elements(By.TAG_NAME, "tr")

    #提取表格數據
    data = {}
    for i in range(1, len(rows)):
        row = rows[i]
        try:
            cells = row.find_elements(By.TAG_NAME, "td")
            head = row.find_element(By.TAG_NAME, "th").text
            data[head] = [cell_data_type_change(cell.text) for cell in cells]
        except Exception as e:
            print(e)
    return pd.DataFrame(data)


def cell_data_type_change(cell_text):
    if cell_text == '-':
        return np.nan
    else:
        return float(str(cell_text).replace(',', ''))


In [5]:
test_data = web_interact([2330])

110
111
112


In [6]:
test_data[2330][2]


Unnamed: 0,營業收入,營業成本,原始認列生物資產及農產品之利益（損失）,生物資產當期公允價值減出售成本之變動利益（損失）,營業毛利（毛損）,未實現銷貨（損）益,已實現銷貨（損）益,營業毛利（毛損）淨額,營業費用,其他收益及費損淨額,...,本期淨利（淨損）,其他綜合損益（淨額）,本期綜合損益總額,淨利（淨損）歸屬於母公司業主,淨利（淨損）歸屬於共同控制下前手權益,淨利（淨損）歸屬於非控制權益,綜合損益總額歸屬於母公司業主,綜合損益總額歸屬於共同控制下前手權益,綜合損益總額歸屬於非控制權益,基本每股盈餘（元）
0,508633000.0,222132581.0,,,286500400.0,,,286500400.0,55309344.0,47109.0,...,206949036.0,-3295773.0,203653263.0,206986561.0,,-37525.0,203834717.0,,-181454.0,7.98
1,989474200.0,442773988.0,,,546700200.0,,,546700200.0,113503904.0,-135.0,...,388666042.0,2363331.0,391029373.0,388785582.0,,-119540.0,392021727.0,,-992354.0,14.99
2,1536207000.0,692864282.0,,,843342700.0,,,843342700.0,182212695.0,131095.0,...,599461316.0,28412556.0,627873872.0,599785521.0,,-324205.0,629307238.0,,-1433366.0,23.13
3,2161736000.0,986625213.0,,,1175111000.0,,,1175111000.0,253833716.0,188694.0,...,837767517.0,-8813644.0,828953873.0,838497664.0,,-730147.0,830509542.0,,-1555669.0,32.34
