In [4]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import pandas as pd

driver = webdriver.Chrome()

try:
    driver.get("https://www.ezmoney.com.tw/ETF/Fund/Info?fundCode=49YTW")
    
    wait = WebDriverWait(driver, 10)
    wait.until(EC.presence_of_element_located((By.XPATH, "//*[contains(text(), '台積電')]")))
    
    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')
    
    # 解析表格
    table = soup.find('table', class_='table')
    rows = table.find_all('tr')[2:]
    
    data = []
    for row in rows:
        cols = row.find_all('td')
        if len(cols) == 4:
            data.append({
                '股票代號': cols[0].text.strip(),
                '股票名稱': cols[1].text.strip(),
                '股數': cols[2].text.strip(),
                '持股權重': cols[3].text.strip()
            })
    
    df = pd.DataFrame(data)
    
    # 數據清理
    df['股數_數值'] = df['股數'].str.replace(',', '').astype(int)
    df['持股權重_數值'] = df['持股權重'].str.rstrip('%').astype(float)
    
    print(df)
    df.to_csv('stock_holdings.csv', index=False, encoding='utf-8-sig')
    
finally:
    driver.quit()

KeyError: '股數'

In [8]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import pandas as pd

driver = webdriver.Chrome()

try:
    driver.get("https://www.ezmoney.com.tw/ETF/Fund/Info?fundCode=49YTW")
    
    wait = WebDriverWait(driver, 10)
    wait.until(EC.presence_of_element_located((By.XPATH, "//*[contains(text(), '台積電')]")))
    
    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')
    
    # 方法1: 找到包含"台積電"的表格
    table = None
    for t in soup.find_all('table'):
        if '台積電' in t.text:
            table = t
            break
    
    # 或者方法2: 找到包含"股票代號"標題的表格
    # table = soup.find('table', string=lambda text: text and '股票代號' in text)
    
    # 或者方法3: 找到有 th 包含"股票代號"的表格
    # for t in soup.find_all('table'):
    #     if t.find('th', string='股票代號'):
    #         table = t
    #         break
    
    if table is None:
        print("找不到持股明細表格")
    else:
        print("找到表格了！")
        
        # 找到所有包含 span 的 tr（這是持股資料）
        data = []
        for row in table.find_all('tr'):
            # 檢查是否有 4 個 td，且 td 內有 span
            tds = row.find_all('td')
            if len(tds) == 4:
                spans = [td.find('span') for td in tds]
                if all(spans):  # 確保每個 td 都有 span
                    data.append({
                        '股票代號': spans[0].text.strip(),
                        '股票名稱': spans[1].text.strip(),
                        '股數': spans[2].text.strip(),
                        '持股權重': spans[3].text.strip()
                    })
        
        df = pd.DataFrame(data)
        
        # 數據清理
        df['股數_數值'] = df['股數'].str.replace(',', '').astype(int)
        df['持股權重_數值'] = df['持股權重'].str.rstrip('%').astype(float)
        
        print(f"\n共找到 {len(df)} 筆持股資料")
        print(df.head(10))
        
        df.to_csv('stock_holdings.csv', index=False, encoding='utf-8-sig')
        print("\n資料已儲存")
    
finally:
    driver.quit()

找到表格了！

共找到 50 筆持股資料
   股票代號   股票名稱         股數   持股權重    股數_數值  持股權重_數值
0  2330    台積電  3,062,000  9.41%  3062000     9.41
1  6669     緯穎    683,000  6.13%   683000     6.13
2  2383    台光電  1,878,000  6.06%  1878000     6.06
3  2345     智邦  2,357,000  5.98%  2357000     5.98
4  3017     奇鋐  1,975,000  5.77%  1975000     5.77
5  2368    金像電  3,988,000  5.05%  3988000     5.05
6  3665  貿聯-KY  1,532,848  4.74%  1532848     4.74
7  2308    台達電  2,457,000  4.71%  2457000     4.71
8  6223     旺矽    902,000  4.28%   902000     4.28
9  3653     健策    647,000  3.69%   647000     3.69

資料已儲存


In [13]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import pandas as pd
import time

driver = webdriver.Chrome()

try:
    # 訪問頁面
    url = "https://www.capitalfund.com.tw/etf/product/detail/399/portfolio"
    driver.get(url)
    
    # 等待表格內容載入
    wait = WebDriverWait(driver, 15)
    wait.until(EC.presence_of_element_located((By.XPATH, "//*[contains(text(), '台積電')]")))
    time.sleep(2)
    
    # 找到並點擊「展開全部」或「收合」按鈕
    try:
        # 找到按鈕（可能顯示「展開全部」或「收合」）
        toggle_button = wait.until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, '.pct-stock-table-tbody-toggle-btn'))
        )
        
        # 檢查按鈕文字，如果是「展開全部」就點擊
        button_text = toggle_button.text
        print(f"按鈕文字: {button_text}")
        
        if '展開' in button_text:
            print("點擊展開全部按鈕...")
            toggle_button.click()
            time.sleep(2)  # 等待展開動畫完成
        else:
            print("資料已經是展開狀態")
            
    except Exception as e:
        print(f"處理展開按鈕時發生錯誤: {e}")
        print("繼續嘗試抓取資料...")
    
    # 取得完整的 HTML
    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')
    
    # 找到持股明細的容器
    table_body = soup.find('div', class_='pct-stock-table-tbody')
    
    if table_body:
        # 找到所有的 tr（只要桌面版的）
        rows = table_body.find_all('div', class_='tr show-for-medium')
        
        data = []
        for row in rows:
            cells = row.find_all('div', class_=['th', 'td'])
            
            if len(cells) >= 4:
                stock_code = cells[0].text.strip()
                stock_name = cells[1].text.strip()
                weight = cells[2].text.strip()
                shares = cells[3].text.strip()
                
                data.append({
                    '股票代號': stock_code,
                    '股票名稱': stock_name,
                    '持股權重': weight,
                    '股數': shares
                })
        
        # 轉換成 DataFrame
        df = pd.DataFrame(data)
        
        # 數據清理
        df['股數_數值'] = df['股數'].str.replace(',', '').astype(int)
        df['持股權重_數值'] = df['持股權重'].str.rstrip('%').astype(float)
        
        print(f"\n共找到 {len(df)} 筆持股資料")
        print("\n前 10 筆資料：")
        print(df.head(10))
        print("\n後 10 筆資料：")
        print(df.tail(10))
        
        # 儲存為 CSV
        df.to_csv('capital_fund_holdings.csv', index=False, encoding='utf-8-sig')
        print(f"\n所有 {len(df)} 筆資料已儲存至 capital_fund_holdings.csv")
        
    else:
        print("找不到持股明細表格")
    
finally:
    driver.quit()

按鈕文字: 展開全部
點擊展開全部按鈕...

共找到 57 筆持股資料

前 10 筆資料：
   股票代號  股票名稱   持股權重         股數    股數_數值  持股權重_數值
0  2330   台積電   8.9%  1,707,000  1707000     8.90
1  3017    奇鋐  6.61%  1,332,000  1332000     6.61
2  6669    緯穎  6.59%    433,000   433000     6.59
3  5536   聖暉*  6.34%  2,278,000  2278000     6.34
4  2345    智邦  4.83%  1,123,000  1123000     4.83
5  2360    致茂  3.66%  1,378,000  1378000     3.66
6  2449  京元電子  3.11%  4,084,000  4084000     3.11
7  6139    亞翔  3.06%  1,437,000  1437000     3.06
8  6805   富世達  3.05%    577,000   577000     3.05
9  2383   台光電  2.62%    478,000   478000     2.62

後 10 筆資料：
    股票代號     股票名稱   持股權重       股數   股數_數值  持股權重_數值
47  8996       高力  0.14%   78,000   78000     0.14
48  7769       鴻勁  0.13%   11,000   11000     0.13
49  1319       東陽  0.13%  360,000  360000     0.13
50  3529       力旺  0.09%   15,000   15000     0.09
51  4441     振大環球  0.05%   60,000   60000     0.05
52  2548       華固  0.01%   29,600   29600     0.01
53  7722  LINEPAY     0%    1,000 

In [12]:
df

Unnamed: 0,股票代號,股票名稱,持股權重,股數,股數_數值,持股權重_數值
0,2330,台積電,8.9%,1707000,1707000,8.9
1,3017,奇鋐,6.61%,1332000,1332000,6.61
2,6669,緯穎,6.59%,433000,433000,6.59
3,5536,聖暉*,6.34%,2278000,2278000,6.34
4,2345,智邦,4.83%,1123000,1123000,4.83
5,2360,致茂,3.66%,1378000,1378000,3.66
6,2449,京元電子,3.11%,4084000,4084000,3.11
7,6139,亞翔,3.06%,1437000,1437000,3.06
8,6805,富世達,3.05%,577000,577000,3.05
9,2383,台光電,2.62%,478000,478000,2.62
