In [2]:
import pandas as pd
from pathlib import Path


# paths (edit to your actual folder)
#SRC = Path("/Users/duncanwan/Desktop/learning/Bitcoin/4hrs/spot")     #mac
SRC = Path(r"C:\Users\Duncan Wan\Desktop\VSCODE\4hrs\spot")    #windows
OUT = SRC / "cleaned"  #out to cleaned file
OUT.mkdir(exist_ok=True)   #create if not exist
files = sorted(SRC.glob("BTCUSDT-4h-20*-*.csv"))


all_frames = []       # collect per-file DataFrames for one big concat

col_names = [
    "open_time", "open", "high", "low", "close", "volume",
    "close_time", "quote_asset_volume", "number_of_trades",
    "taker_buy_base_asset_volume", "taker_buy_quote_asset_volume", "ignore"
]

numeric_cols = [
   "open", "high", "low", "close", "volume",
    "quote_asset_volume", "number_of_trades",
    "taker_buy_base_asset_volume", "taker_buy_quote_asset_volume"]


def to_datetime_auto(series: pd.Series) -> pd.Series:
    s = pd.to_numeric(series, errors="coerce")
    # microseconds have 16 digits (>= 1e15); milliseconds ~ 13 digits (< 1e15)
    us_mask = s >= 1_000_000_000_000_000  # 1e15
    out = pd.Series(pd.NaT, index=s.index, dtype="datetime64[ns, UTC]")
    out[us_mask]  = pd.to_datetime(s[us_mask],  unit="us", utc=True)
    out[~us_mask] = pd.to_datetime(s[~us_mask], unit="ms", utc=True)
    return out

for fp in files:
    df = pd.read_csv(fp, header=None, names=col_names)

    df["open_time"]  = to_datetime_auto(df["open_time"])
    df["close_time"] = to_datetime_auto(df["close_time"])

    if "ignore" in df.columns:
        df = df.drop(columns=["ignore"])

    df.to_csv(OUT / f"{fp.stem}_clean.csv", index=False)
    all_frames.append(df)

combined = (
    pd.concat(all_frames, ignore_index=True)
      .sort_values("open_time")
      .reset_index(drop=True)
)
combined.to_csv(OUT / "BTCUSDT_4h_2024_cleaned_v1.csv", index=False)