In [None]:
%pip install yfinance pandas requests beautifulSoup4
import yfinance as yf
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [None]:
# 設定台股與美股公司的股票代號
taiwan_stocks = ["2330.TW", "2303.TW"]  # 台積電、聯電
us_stocks = ["AAPL", "MSFT"]  # Apple、Microsoft

# 儲存數據的字典
data = {
    "stock": [],
    "income_statement": [],
    "balance_sheet": [],
    "cash_flow": [],
    "stock_prices": [],
}

In [None]:
# 爬取美股數據 (yfinance)
def fetch_us_stock_data(stock_symbol):
    print(f"正在獲取美股 {stock_symbol} 的數據...")
    stock = yf.Ticker(stock_symbol)
    # 損益表
    income_statement = stock.financials
    # 資產負債表
    balance_sheet = stock.balance_sheet
    # 現金流量表
    cash_flow = stock.cashflow
    # 股價數據
    stock_prices = stock.history(period="10y")  # 過去1年的股價數據
    return income_statement, balance_sheet, cash_flow, stock_prices

In [None]:
from io import StringIO
# 爬取台股數據 (台灣證交所公開資料)
def fetch_taiwan_stock_data(stock_symbol):
    print(f"正在獲取台股 {stock_symbol} 的數據...")
    url = f"https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID={stock_symbol}&SYEAR=2023&SSEASON=4"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    
    # 假設需要從HTML中爬取損益表、資產負債表、現金流量表（具體欄位需根據網頁內容調整）
    html_string = StringIO(response.text)
    tables = pd.read_html(html_string)
    if len(tables) >= 3:
        income_statement = tables[0]  # 第一張表為損益表
        balance_sheet = tables[1]    # 第二張表為資產負債表
        cash_flow = tables[2]        # 第三張表為現金流量表
    else:
        income_statement = balance_sheet = cash_flow = pd.DataFrame()
    
    # 台股股價 (透過Yahoo Finance獲取)
    stock = yf.Ticker(stock_symbol)
    stock_prices = stock.history(period="1y")  # 過去1年的股價數據
    return income_statement, balance_sheet, cash_flow, stock_prices

In [None]:
# 主函數：處理美股與台股數據
import os
# 處理美股數據
for stock in us_stocks:
    income_statement, balance_sheet, cash_flow, stock_prices = fetch_us_stock_data(stock)
    data["stock"].append(stock)
    data["income_statement"].append(income_statement)
    data["balance_sheet"].append(balance_sheet)
    data["cash_flow"].append(cash_flow)
    data["stock_prices"].append(stock_prices)
'''   
# 處理台股數據
for stock in taiwan_stocks:
    income_statement, balance_sheet, cash_flow, stock_prices = fetch_taiwan_stock_data(stock)
    data["stock"].append(stock)
    data["income_statement"].append(income_statement)
    data["balance_sheet"].append(balance_sheet)
    data["cash_flow"].append(cash_flow)
    data["stock_prices"].append(stock_prices)
'''
# 保存數據到本地檔案
for i, stock in enumerate(data["stock"]):
    print(f"保存 {stock} 的數據...")
    path = f"./{stock}"
    if not os.path.isdir(path):
        os.mkdir(path)
    data["income_statement"][i].to_csv(f"./{stock}/{stock}_income_statement.csv")
    data["balance_sheet"][i].to_csv(f"./{stock}/{stock}_balance_sheet.csv")
    data["cash_flow"][i].to_csv(f"./{stock}/{stock}_cash_flow.csv")
    data["stock_prices"][i].to_csv(f"./{stock}/{stock}_stock_prices.csv")