In [1]:
import pandas as pd
import io
import requests
import os
import time

from datetime import datetime
from dateutil.relativedelta import relativedelta

In [2]:
def transform_date(date):  # 民國轉西元
    y, m, d = date.split("/")
    return str(int(y) + 1911) + "/" + m + "/" + d

def process_data(data):
    data = data.replace(",", "")
    data = data.replace("--", "")

    return data

In [3]:
with requests.Session() as s:
    end = datetime.now() + relativedelta(day=1)
    d = end.strftime("%Y%m%d")

    histURL = f"https://www.twse.com.tw/indicesReport/MI_5MINS_HIST?response=csv&date={d}"
    totalReturnURL = f"https://www.twse.com.tw/indicesReport/MFI94U?response=csv&date={d}"

    c = s.get(histURL).content
    hist = pd.read_csv(io.StringIO(c.decode("big5")), header=1).drop("Unnamed: 5", axis=1)

    c = s.get(totalReturnURL).content
    totalReturn = pd.read_csv(io.StringIO(c.decode("big5")), header=1).drop("Unnamed: 2", axis=1)

In [4]:
df = hist.join(totalReturn)
assert df[df["日期"] != df["日　期"]].empty

df.loc[:, "Date"] = pd.to_datetime(df["日期"].apply(transform_date), format="%Y/%m/%d")
df.loc[:, "Open"] = df["開盤指數"].apply(process_data).astype(float)
df.loc[:, "High"] = df["最高指數"].str.replace(",", "").astype(float)
df.loc[:, "Low"] = df["最低指數"].str.replace(",", "").astype(float)
df.loc[:, "Close"] = df["收盤指數"].str.replace(",", "").astype(float)
df.loc[:, "Adj Close"] = df["發行量加權股價報酬指數"].str.replace(",", "").astype(float)
df.loc[:, "Dividends"] = 0
df.loc[:, "Stock Splits"] = 0
df

Unnamed: 0,日期,開盤指數,最高指數,最低指數,收盤指數,日　期,發行量加權股價報酬指數,Date,Open,High,Low,Close,Adj Close,Dividends,Stock Splits
0,110/02/01,15176.56,15429.98,15089.96,15410.09,110/02/01,29749.49,2021-02-01,15176.56,15429.98,15089.96,15410.09,29749.49,0,0
1,110/02/02,15546.69,15838.15,15546.69,15760.05,110/02/02,30425.11,2021-02-02,15546.69,15838.15,15546.69,15760.05,30425.11,0,0


In [5]:
df.dtypes

日期                      object
開盤指數                    object
最高指數                    object
最低指數                    object
收盤指數                    object
日　期                     object
發行量加權股價報酬指數             object
Date            datetime64[ns]
Open                   float64
High                   float64
Low                    float64
Close                  float64
Adj Close              float64
Dividends                int64
Stock Splits             int64
dtype: object

In [19]:
assert not df[df["日期"] != df["日　期"]].empty

AssertionError: 