In [None]:
import os, math, numpy as np, pandas as pd, matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

# --- CONFIG ---
RATE_INR_PER_USD = 83.0
LOOKBACK = 60          # 60 days lookback
FORECAST_STEPS = 30    # forecast 1 month ahead
np.random.seed(42); tf.random.set_seed(42)

# --- Load & Preprocess ---
df = pd.read_csv("/Users/vanshagarwal/Documents/GitHub/ML-Driven-Web-Platform-for-Cryptocurrency-Price-Forecasting_August_2025/Milestone1/crypto data/XRP_USD_historical_daily.csv")
df["timestamp"] = pd.to_datetime(df["TIMESTAMP"], unit="s", utc=True)
df = df.sort_values("timestamp").set_index("timestamp").asfreq("D")

# --- Features from OHLC ---
df["close_usd"] = df["CLOSE"].interpolate(method="time").ffill().bfill()
df["close_inr"] = df["close_usd"] * RATE_INR_PER_USD

# Add some useful features
df["hl_range"] = (df["HIGH"] - df["LOW"]) * RATE_INR_PER_USD   # daily volatility
df["oc_range"] = (df["CLOSE"] - df["OPEN"]) * RATE_INR_PER_USD # intraday change
df["ma7"] = df["close_inr"].rolling(7).mean().bfill()
df["ma30"] = df["close_inr"].rolling(30).mean().bfill()

# Select features
features = ["close_inr", "hl_range", "oc_range", "ma7", "ma30"]
values = df[features].values.astype("float32")


# scale
scaler = MinMaxScaler()
scaled = scaler.fit_transform(values)

# make sequences
X, y = [], []
for i in range(len(scaled) - LOOKBACK):
    X.append(scaled[i:i+LOOKBACK])
    y.append(scaled[i+LOOKBACK, 0])   # predict close_inr only
X, y = np.array(X), np.array(y)

# split
n = len(X); n_test = int(0.15*n); n_val = int(0.15*n); n_train = n - n_val - n_test
X_train, y_train = X[:n_train], y[:n_train]
X_val, y_val = X[n_train:n_train+n_val], y[n_train:n_train+n_val]
X_test, y_test = X[n_train+n_val:], y[n_train+n_val:]
t = df.index.values[LOOKBACK:]
t_test = t[n_train+n_val:]

# --- Enhanced Model ---
model = Sequential([
    Bidirectional(LSTM(128, return_sequences=True, input_shape=(LOOKBACK, X.shape[2]),
                       kernel_regularizer=tf.keras.regularizers.l2(1e-4))),
    Dropout(0.3),
    Bidirectional(LSTM(64, return_sequences=True, kernel_regularizer=tf.keras.regularizers.l2(1e-4))),
    Dropout(0.3),
    LSTM(32, kernel_regularizer=tf.keras.regularizers.l2(1e-4)),
    Dropout(0.4),
    Dense(32, activation="relu"),
    Dense(1)
])
model.compile(optimizer=tf.keras.optimizers.Adam(1e-3), loss="mse")

coin = "XRP"      # e.g. SOL, BTC, ETH
interval = "1d" 
ckpt = f"{coin.lower()}_{'hourly' if interval=='1h' else 'daily'}_lstm.keras"
callbacks = [
    EarlyStopping(monitor="val_loss", patience=6, restore_best_weights=True),
    ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3, min_lr=1e-5),
    ModelCheckpoint(ckpt, monitor="val_loss", save_best_only=True)
]

# --- Train ---
hist = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=32,
    verbose=1,
    callbacks=callbacks
)

# --- Evaluate ---
pred_test_scaled = model.predict(X_test, verbose=0)
pred_test = scaler.inverse_transform(np.hstack([pred_test_scaled, 
                                                np.zeros((len(pred_test_scaled), len(features)-1))]))[:,0]
y_test_actual = scaler.inverse_transform(np.hstack([y_test.reshape(-1,1), 
                                                    np.zeros((len(y_test), len(features)-1))]))[:,0]

mae_daily = mean_absolute_error(y_test_actual, pred_test)
rmse_daily = math.sqrt(mean_squared_error(y_test_actual, pred_test))
mape_daily = np.mean(np.abs((y_test_actual - pred_test) / np.maximum(y_test_actual,1e-8))) * 100
r2_daily = r2_score(y_test_actual, pred_test)

print(f"Daily Model → MAE ₹{mae_daily:.2f} | RMSE ₹{rmse_daily:.2f} | MAPE {mape_daily:.2f}% | R² {r2_daily:.4f}")

# --- Plots ---
plt.figure(figsize=(8,5))
plt.plot(hist.history["loss"], label="Train")
plt.plot(hist.history["val_loss"], label="Val")
plt.title("Daily Model Training vs Validation Loss")
plt.xlabel("Epoch"); plt.ylabel("MSE Loss"); plt.legend(); plt.show()

plt.figure(figsize=(10,5))
plt.plot(t_test, y_test_actual, label="Actual")
plt.plot(t_test, pred_test, label="Predicted")
plt.title("Daily Model: Actual vs Predicted (INR)")
plt.xlabel("Time"); plt.ylabel("Price (INR)"); plt.legend(); plt.show()

# --- Forecast 30 days ---
last_window = scaled[-LOOKBACK:].copy().reshape(1,LOOKBACK,X.shape[2])
future_scaled = []
for _ in range(FORECAST_STEPS):
    nxt = model.predict(last_window, verbose=0)[0,0]
    nxt_full = np.zeros((1, X.shape[2])); nxt_full[0,0] = nxt
    future_scaled.append(nxt_full[0])
    last_window = np.concatenate([last_window[:,1:,:], nxt_full.reshape(1,1,X.shape[2])], axis=1)

future = scaler.inverse_transform(np.array(future_scaled))[:,0]
future_times = pd.date_range(start=df.index[-1] + pd.Timedelta(days=1), periods=FORECAST_STEPS, freq="D")

plt.figure(figsize=(10,5))
plt.plot(df.index[-200:], df["close_inr"].iloc[-200:], label="Recent Actual")
plt.plot(future_times, future, label="Forecast")
plt.title(f"Next {FORECAST_STEPS} Days Forecast (INR)")
plt.xlabel("Date"); plt.ylabel("Price (INR)")
plt.legend(); plt.show()
