# Model Training
,

cells
metadata
source
Train XGBoost and LSTM models, plot training curves.

In [None]:
import os
from pathlib import Path

import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, LSTM

FEATURES = [
    "latitude",
    "longitude",
    "depth_km",
    "prev_magnitude",
    "quake_count_7d",
    "quake_count_30d",
    "avg_magnitude_30d",
    "max_magnitude_30d",
    "days_since_last_quake",
    "month",
    "seismic_zone",
]

base_dir = Path("..").resolve()
if "COLAB_RELEASE_TAG" in os.environ:
    repo_url = "https://github.com/vsiva763-git/Earthquake-alert-system.git"
    repo_dir = Path("/content/Earthquake-alert-system")
    if not repo_dir.exists():
        import subprocess
        subprocess.run(["git", "clone", repo_url, str(repo_dir)], check=True)
    base_dir = repo_dir / "earthquake_model"
data_path = base_dir / "data" / "processed" / "features.csv"

df = pd.read_csv(data_path, parse_dates=["time"])
df.head()

In [None]:
X = df[FEATURES].fillna(0.0)
y = df["magnitude"].values

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

xgb = XGBRegressor(
    n_estimators=300,
    max_depth=6,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    objective="reg:squarederror",
    random_state=42,
)
xgb.fit(X_train, y_train)

models_dir = base_dir / "models"
models_dir.mkdir(exist_ok=True)
xgb.save_model(models_dir / "xgb_model.json")

In [None]:
import numpy as np

sequence_len = 10
seq_features = ["latitude", "longitude", "depth_km", "magnitude", "days_since_last_quake", "seismic_zone"]

sequences = []
targets = []
for _, zone_df in df.groupby("seismic_zone"):
    zone_df = zone_df.sort_values("time")
    data = zone_df[seq_features].values
    for i in range(sequence_len, len(data)):
        sequences.append(data[i - sequence_len : i])
        targets.append(zone_df.iloc[i]["magnitude"])

sequences = np.array(sequences)
targets = np.array(targets)

X_train, X_test, y_train, y_test = train_test_split(
    sequences, targets, test_size=0.2, random_state=42
)

from tensorflow.keras.layers import Input

lstm = Sequential([
    Input(shape=(sequence_len, len(seq_features))),
    LSTM(32),
    Dense(16, activation="relu"),
    Dense(1)
])
lstm.compile(optimizer="adam", loss="mse", metrics=["mae"])
history = lstm.fit(X_train, y_train, validation_split=0.2, epochs=10, batch_size=32)

models_dir = base_dir / "models"
models_dir.mkdir(exist_ok=True)
lstm.save(models_dir / "lstm_model.keras")

In [None]:
plt.figure(figsize=(6, 4))
plt.plot(history.history["loss"], label="train")
plt.plot(history.history["val_loss"], label="val")
plt.title("LSTM Training Loss")
plt.xlabel("Epoch")
plt.ylabel("MSE")
plt.legend()
plt.tight_layout()