In [2]:
from pathlib import Path
import pandas as pd

SRC = Path(r"C:\Users\Duncan Wan\Desktop\VSCODE\4hrs\funding")
OUT = SRC / "cleaned"
OUT.mkdir(exist_ok=True)
files = sorted(SRC.glob("BTCUSDT-fundingRate-*.csv"))

def to_dt_auto_ms_us(s: pd.Series) -> pd.Series:
    x = pd.to_numeric(s, errors="coerce")
    us = x >= 1_000_000_000_000_000  # >= 1e15 => microseconds
    out = pd.Series(pd.NaT, index=x.index, dtype="datetime64[ns, UTC]")
    out[us]  = pd.to_datetime(x[us],  unit="us", utc=True)
    out[~us] = pd.to_datetime(x[~us], unit="ms", utc=True)
    return out

all_frames = []

for fp in files:
    df = pd.read_csv(fp)  # header present
    df.columns = [c.strip() for c in df.columns]

    # expected: calc_time, funding_interval_hours, last_funding_rate
    df["calc_time"] = to_dt_auto_ms_us(df["calc_time"])
    df["last_funding_rate"] = pd.to_numeric(df["last_funding_rate"], errors="coerce")

    # drop the constant 8h column if you don't need it
    if "funding_interval_hours" in df.columns:
        df = df.drop(columns=["funding_interval_hours"])

    df.to_csv(OUT / f"{fp.stem}_clean.csv", index=False)
    all_frames.append(df)

combined = (
    pd.concat(all_frames, ignore_index=True)
      .drop_duplicates(subset=["calc_time"])
      .sort_values("calc_time")
      .reset_index(drop=True)
)
combined.to_csv(OUT / "BTCUSDT_funding_combined.csv", index=False)
combined.head()

Unnamed: 0,calc_time,last_funding_rate
0,2024-01-01 00:00:00+00:00,0.000374
1,2024-01-01 08:00:00+00:00,0.000272
2,2024-01-01 16:00:00+00:00,0.000336
3,2024-01-02 00:00:00+00:00,0.000658
4,2024-01-02 08:00:00+00:00,0.000352
