In [5]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time

# 設定 Selenium WebDriver
driver = webdriver.Chrome()  # 確保 chromedriver 在您的 PATH 中
base_url = "https://fubon-ebrokerdj.fbs.com.tw/z/zc/zco/zco_2330.djhtm"
driver.get(base_url)

# 取得當天的日期
end_date = time.strftime('%Y-%m-%d')
start_date = "2023-01-01"
ver = "V3"

# 取得網頁來源碼並使用 BeautifulSoup 解析
page_source = driver.page_source
soup = BeautifulSoup(page_source, 'html.parser')

# 找到買超券商資料所在的表格
table = soup.find('table', {'class': 't01'})
if table:
    rows = table.find_all('tr')[1:]  # 跳過表頭

    buy_superior = []
    for row in rows:
        cells = row.find_all('td')
        if len(cells) < 4:
            continue  # 確保有足夠的欄位資料
        try:
            broker = cells[0].text.strip()
            buy = int(cells[1].text.replace(',', '').strip())
            sell = int(cells[2].text.replace(',', '').strip())
            net_buy = int(cells[3].text.replace(',', '').strip())
            buy_superior.append((broker, buy, sell, net_buy))
        except ValueError:
            continue  # 忽略非數字欄位

    # 按買超張數排序並取前五名
    top_5_buy_superior = sorted(buy_superior, key=lambda x: x[3], reverse=True)[:5]

    # 儲存結果為 DataFrame
    brokers_df = pd.DataFrame(top_5_buy_superior, columns=['券商', '買進', '賣出', '買超'])

    # 找出每個券商的買超明細
    detail_data = []
    
    for broker in top_5_buy_superior:
        broker_name = broker[0]
        # 模擬點擊券商名字的動作
        try:
            print(f"Processing broker: {broker_name}")
            link = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.XPATH, f"//a[contains(text(), '{broker_name}')]"))
            )
            link.click()
            time.sleep(3)  # 等待頁面加載

            # 在當前 URL 後添加參數
            current_url = driver.current_url
            detail_url = f"{current_url}&C=1&D={start_date}&E={end_date}&ver={ver}"
            driver.get(detail_url)
            time.sleep(3)  # 等待頁面加載

            # 取得詳細資料頁面內容
            detail_page = driver.page_source
            detail_soup = BeautifulSoup(detail_page, 'html.parser')

            # 找到日期範圍的資料
            detail_table = detail_soup.find('table', {'class': 't01'})
            if detail_table:
                detail_rows = detail_table.find_all('tr')[1:]  # 跳過表頭

                for detail_row in detail_rows:
                    detail_cells = detail_row.find_all('td')
                    if len(detail_cells) < 4:
                        continue  # 確保有足夠的欄位資料
                    try:
                        date = detail_cells[0].text.strip()
                        buy_detail = int(detail_cells[1].text.replace(',', '').strip())
                        sell_detail = int(detail_cells[2].text.replace(',', '').strip())
                        net_buy_detail = int(detail_cells[3].text.replace(',', '').strip())
                        detail_data.append((broker_name, date, buy_detail, sell_detail, net_buy_detail))
                    except ValueError:
                        continue  # 忽略非數字欄位

            # 返回上一頁（點擊兩次確保返回到目標頁面）
            driver.back()
            time.sleep(3)  # 等待頁面加載
            driver.back()
            time.sleep(3)  # 等待頁面加載

        except Exception as e:
            print(f"Error processing broker {broker_name}: {e}")

    # 儲存詳細資料為 DataFrame
    details_df = pd.DataFrame(detail_data, columns=['券商', '日期', '買進', '賣出', '買超'])

    # 合併結果
    final_df = pd.merge(brokers_df, details_df, on='券商', how='left')
    print(final_df)
else:
    print("未能找到目標表格")

# 關閉瀏覽器
driver.quit()


Processing broker: 美林
Processing broker: 新加坡商瑞銀
Processing broker: 美商高盛
Processing broker: 港商麥格理
Processing broker: 台新證券
        券商  買進_x  賣出_x  買超_x          日期  買進_y  賣出_y  買超_y
0       美林  4054  2452  1602  2024/05/17  4054  2452  6506
1       美林  4054  2452  1602  2024/05/16  4182  2808  6990
2       美林  4054  2452  1602  2024/05/15  3957   984  4942
3       美林  4054  2452  1602  2024/05/14  1557  3348  4905
4       美林  4054  2452  1602  2024/05/13  4959  1117  6077
...    ...   ...   ...   ...         ...   ...   ...   ...
1630  台新證券   374    52   322  2023/01/09   112  1029  1141
1631  台新證券   374    52   322  2023/01/06   136    30   166
1632  台新證券   374    52   322  2023/01/05    56   139   195
1633  台新證券   374    52   322  2023/01/04   542    22   564
1634  台新證券   374    52   322  2023/01/03   113    57   169

[1635 rows x 8 columns]


In [6]:
final_df

Unnamed: 0,券商,買進_x,賣出_x,買超_x,日期,買進_y,賣出_y,買超_y
0,美林,4054,2452,1602,2024/05/17,4054,2452,6506
1,美林,4054,2452,1602,2024/05/16,4182,2808,6990
2,美林,4054,2452,1602,2024/05/15,3957,984,4942
3,美林,4054,2452,1602,2024/05/14,1557,3348,4905
4,美林,4054,2452,1602,2024/05/13,4959,1117,6077
...,...,...,...,...,...,...,...,...
1630,台新證券,374,52,322,2023/01/09,112,1029,1141
1631,台新證券,374,52,322,2023/01/06,136,30,166
1632,台新證券,374,52,322,2023/01/05,56,139,195
1633,台新證券,374,52,322,2023/01/04,542,22,564
