In [35]:
import os
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from autogluon.timeseries import TimeSeriesPredictor, TimeSeriesDataFrame

# ---------------------------
# Config
# ---------------------------
PREDICTOR_PATH = "models/usdinr_chronos_predictor"
EVAL_CSV_PATH  = "Data/USDINR_day_2025-01-2025-09-Infer-processed.csv"
ITEM_ID_VALUE  = "USDINR"
TARGET_COL     = "Close"
PREDICT_LAST_K = 4

# ---------------------------
# Utility: Add features
# ---------------------------
def add_indicators(df: pd.DataFrame) -> pd.DataFrame:
    """Recompute technical + calendar features safely."""
    df = df.copy()

    # Ensure timestamp is datetime
    if not np.issubdtype(df["timestamp"].dtype, np.datetime64):
        df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")

    # Drop any bad rows with NaT
    df = df.dropna(subset=["timestamp"])

    # Calendar features
    df["DayOfWeek"] = df["timestamp"].dt.dayofweek
    df["Month"]     = df["timestamp"].dt.month

    # Moving averages
    df["SMA_3"]  = df[TARGET_COL].rolling(3).mean()
    df["SMA_7"]  = df[TARGET_COL].rolling(7).mean()
    df["SMA_14"] = df[TARGET_COL].rolling(14).mean()

    # Lag features
    df["Lag1"] = df[TARGET_COL].shift(1)
    df["Lag2"] = df[TARGET_COL].shift(2)
    df["Lag3"] = df[TARGET_COL].shift(3)

    df = df.fillna(method="bfill").fillna(method="ffill")
    return df

# ---------------------------
# Load evaluation CSV
# ---------------------------
def load_eval_ts() -> TimeSeriesDataFrame:
    df = pd.read_csv(EVAL_CSV_PATH)

    # Normalize column names
    df.columns = [c.strip() for c in df.columns]

    # Ensure Close exists
    if TARGET_COL not in df.columns:
        raise ValueError(f"Expected column '{TARGET_COL}' in {EVAL_CSV_PATH}, found: {df.columns}")

    # Add timestamp + item_id if missing
    if "timestamp" not in df.columns:
        raise ValueError("CSV must contain 'timestamp' column")
    if "item_id" not in df.columns:
        df["item_id"] = ITEM_ID_VALUE

    # Parse timestamp
    df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")

    # Drop invalid
    df = df.dropna(subset=["timestamp", TARGET_COL])

    # Add features
    df = add_indicators(df)

    # Build TS dataframe
    keep_cols = ["item_id", "timestamp", TARGET_COL] + [
        c for c in df.columns if c not in ["item_id", "timestamp"]
    ]
    ts_df = TimeSeriesDataFrame.from_data_frame(df[keep_cols], id_column="item_id", timestamp_column="timestamp")
    return ts_df

# ---------------------------
# Slice last history
# ---------------------------
def slice_history(ts_df: TimeSeriesDataFrame) -> TimeSeriesDataFrame:
    end_time = ts_df.index.levels[1].max()
    start_time = end_time - pd.Timedelta(days=30)  # last 30 days history
    return ts_df.loc[(ITEM_ID_VALUE, slice(start_time, end_time))]

# ---------------------------
# Build future covariates
# ---------------------------
def build_future_known(hist_slice: TimeSeriesDataFrame) -> TimeSeriesDataFrame:
    last_time = hist_slice.index.levels[1].max()
    future_times = pd.date_range(start=last_time + pd.Timedelta(days=1), periods=PREDICT_LAST_K, freq="D")

    future_df = pd.DataFrame({
        "item_id": ITEM_ID_VALUE,
        "timestamp": future_times
    })

    # Calendar features
    future_df["DayOfWeek"] = future_df["timestamp"].dt.dayofweek
    future_df["Month"]     = future_df["timestamp"].dt.month

    return TimeSeriesDataFrame.from_data_frame(future_df, id_column="item_id", timestamp_column="timestamp")

# ---------------------------
# Predict last K days
# ---------------------------
def predict_last_k(predictor, hist_slice):
    future_known = build_future_known(hist_slice)
    return predictor.predict(data=hist_slice, known_covariates=future_known)

# ---------------------------
# Plot actual vs predicted
# ---------------------------
def plot_actual_vs_pred(ts_df, preds):
    actual = ts_df.loc[ITEM_ID_VALUE][TARGET_COL]
    pred   = preds.loc[ITEM_ID_VALUE]["mean"]

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=actual.index, y=actual, mode="lines+markers", name="Actual"))
    fig.add_trace(go.Scatter(x=pred.index,   y=pred,   mode="lines+markers", name="Predicted"))
    fig.update_layout(title="USDINR Actual vs Predicted", xaxis_title="Date", yaxis_title=TARGET_COL)
    fig.show()

# ---------------------------
# Main
# ---------------------------
def main():
    ts_df     = load_eval_ts()
    hist_slice = slice_history(ts_df)

    predictor = TimeSeriesPredictor.load(PREDICTOR_PATH)
    preds     = predict_last_k(predictor, hist_slice)

    print("Predictions:\n", preds)
    plot_actual_vs_pred(ts_df, preds)

if __name__ == "__main__":
    main()


ValueError: CSV must contain 'timestamp' column