# Dual AI Training: Predictive Steering and Anomaly Detection

This notebook trains two models on the hybrid wired/satellite datasets:

- Predictive model (supervised): predicts `current_optimal_path` 60s ahead.
- Anomaly detector (unsupervised): LSTM Autoencoder trained on normal data to detect anomalies via reconstruction error.

It is optimized to run on Kaggle (GPU optional). Models and artifacts are saved under `/kaggle/working/`.



In [None]:
# Environment and imports
%pip -q install "numpy==1.26.4" "pandas==2.2.2" "scikit-learn==1.4.2" "tensorflow==2.15.0" "seaborn==0.13.2" "matplotlib==3.8.4" "pyarrow==14.0.2" "fastparquet==2024.2.0"

import os
import json
import math
from pathlib import Path
from typing import Tuple

# Reduce verbose TensorFlow/CUDA logs
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
from tensorflow.keras import layers, models, callbacks, optimizers

# GPU availability & memory growth
physical_gpus = tf.config.list_physical_devices('GPU')
if physical_gpus:
    try:
        for gpu in physical_gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"GPUs detected: {len(physical_gpus)} -> using {physical_gpus[0].name}")
    except Exception as e:
        print("GPU memory growth setup failed:", e)
else:
    print("No GPU detected; running on CPU")

print(tf.__version__)
ARTIFACTS_DIR = Path('/kaggle/working')
ARTIFACTS_DIR.mkdir(exist_ok=True)



In [None]:
# Paths to Kaggle inputs (update if your dataset slug differs)
INPUT_DIR = Path('/kaggle/input/satellite-and-wired')
DATASET_A = INPUT_DIR / 'hybrid_v1_dataset_a.csv'
DATASET_B = INPUT_DIR / 'hybrid_v1_dataset_b.csv'
assert DATASET_A.exists(), DATASET_A
assert DATASET_B.exists(), DATASET_B

print('Dataset A:', DATASET_A)
print('Dataset B:', DATASET_B)



In [None]:
# Utility: windowing and splits
from sklearn.preprocessing import StandardScaler

def create_sequences(df: pd.DataFrame, feature_cols, label_col=None, window: int = 12, horizon: int = 6):
    """
    Create overlapping windowed sequences of length `window`.
    If label_col is provided, it creates classification targets at t+`horizon`.
    Assumes rows are chronological.
    """
    X = df[feature_cols].values
    y = None
    if label_col is not None:
        y_raw = df[label_col].values
    seq_X, seq_y = [], []
    for i in range(len(df) - window - horizon + 1):
        seq_X.append(X[i:i+window])
        if label_col is not None:
            seq_y.append(y_raw[i+window-1 + horizon])
    X = np.array(seq_X)
    if label_col is not None:
        y = np.array(seq_y)
    return X, y


def chronological_split(X, y=None, train=0.7, val=0.15):
    n = len(X)
    n_train = int(n * train)
    n_val = int(n * val)
    idx_train = slice(0, n_train)
    idx_val = slice(n_train, n_train + n_val)
    idx_test = slice(n_train + n_val, n)
    if y is None:
        return X[idx_train], X[idx_val], X[idx_test]
    return X[idx_train], y[idx_train], X[idx_val], y[idx_val], X[idx_test], y[idx_test]



In [None]:
# Load datasets
usecols_common = [
    'wired_latency_ms','satellite_latency_ms',
    'wired_jitter_ms','satellite_jitter_ms',
    'wired_packet_loss_pct','satellite_packet_loss_pct',
    'wired_bandwidth_mbps','satellite_bandwidth_mbps',
    'wired_quality_cost','satellite_quality_cost'
]

# A: includes label current_optimal_path
A = pd.read_csv(DATASET_A, usecols=lambda c: (c in usecols_common) or (c=='current_optimal_path') or (c=='timestamp'))
B = pd.read_csv(DATASET_B, usecols=lambda c: (c in usecols_common) or (c=='timestamp'))

# Encode label
a_label = A['current_optimal_path'].str.lower().map({'wired':0,'satellite':1}).astype(int)
A = A.drop(columns=['current_optimal_path'])

print('Shapes -> A:', A.shape, 'B:', B.shape)
A.head(), B.head()



In [None]:
# Scale features (numeric only; exclude timestamp)
scaler = StandardScaler()
numeric_cols = usecols_common  # the numeric feature list you defined earlier

A_scaled = pd.DataFrame(
    scaler.fit_transform(A[numeric_cols]),
    columns=numeric_cols
)
B_scaled = pd.DataFrame(
    scaler.transform(B[numeric_cols]),
    columns=numeric_cols
)

# Save scaler
import joblib
joblib.dump(scaler, ARTIFACTS_DIR / 'scaler.pkl')
print('Scaler saved')

In [None]:
# Windowing
FEATURES = numeric_cols
WINDOW = 12  # ~ 12 time-steps (adjust to your sampling rate)
HORIZON = 6  # predict 60s ahead if 10s step

XA, ya = create_sequences(pd.concat([A_scaled, a_label.rename('label')], axis=1), FEATURES, 'label', WINDOW, HORIZON)
XB, _ = create_sequences(B_scaled, FEATURES, None, WINDOW, 1)

print('XA', XA.shape, 'ya', ya.shape, 'XB', XB.shape)



In [None]:
# Chronological splits
XA_tr, ya_tr, XA_val, ya_val, XA_te, ya_te = chronological_split(XA, ya)
XB_tr, XB_val, XB_te = chronological_split(XB)

len(XA_tr), len(XA_val), len(XA_te), len(XB_tr), len(XB_val), len(XB_te)


In [None]:
# Predictive model (LSTM classifier)
num_features = XA.shape[-1]

clf = models.Sequential([
    layers.Input(shape=(WINDOW, num_features)),
    layers.LSTM(64, return_sequences=True),
    layers.Dropout(0.2),
    layers.LSTM(32),
    layers.Dropout(0.2),
    layers.Dense(16, activation='relu'),
    layers.Dense(2, activation='softmax')
])

clf.compile(optimizer=optimizers.Adam(1e-3), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
clf.summary()



In [None]:
# Train classifier
ckpt_clf = callbacks.ModelCheckpoint(filepath=str(ARTIFACTS_DIR / 'predictive_clf.keras'), monitor='val_accuracy', save_best_only=True)
early = callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

hist_clf = clf.fit(
    XA_tr, ya_tr,
    validation_data=(XA_val, ya_val),
    epochs=20,
    batch_size=256,
    callbacks=[ckpt_clf, early],
    verbose=1
)

# Evaluate
clf_best = models.load_model(ARTIFACTS_DIR / 'predictive_clf.keras')
loss, acc = clf_best.evaluate(XA_te, ya_te, verbose=0)
print({'test_loss': float(loss), 'test_accuracy': float(acc)})
with open(ARTIFACTS_DIR / 'predictive_metrics.json', 'w') as f:
    json.dump({'test_loss': float(loss), 'test_accuracy': float(acc)}, f, indent=2)



In [None]:
# Anomaly detector (LSTM Autoencoder)
use_device = "/GPU:0" if tf.config.list_physical_devices('GPU') else "/CPU:0"
strategy = tf.distribute.OneDeviceStrategy(device=use_device)
print("Autoencoder strategy device:", use_device)
with strategy.scope():
    enc_in = layers.Input(shape=(WINDOW, num_features))
    enc = layers.LSTM(64, return_sequences=True)(enc_in)
    enc = layers.LSTM(32)(enc)
    lat = layers.RepeatVector(WINDOW)(enc)

    dec = layers.LSTM(32, return_sequences=True)(lat)
    dec = layers.LSTM(64, return_sequences=True)(dec)
    out = layers.TimeDistributed(layers.Dense(num_features))(dec)

    autoenc = models.Model(enc_in, out)
    autoenc.compile(optimizer=optimizers.Adam(1e-3), loss='mse')
autoenc.summary()



In [None]:
# Train autoencoder on normal-only windows (XB_tr)
ckpt_ae = callbacks.ModelCheckpoint(filepath=str(ARTIFACTS_DIR / 'autoencoder.keras'), monitor='val_loss', save_best_only=True)
early_ae = callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

hist_ae = autoenc.fit(
    XB_tr, XB_tr,
    validation_data=(XB_val, XB_val),
    epochs=20,
    batch_size=256,
    callbacks=[ckpt_ae, early_ae],
    verbose=1
)



In [None]:
# Anomaly scoring: reconstruction error distribution
best_ae = models.load_model(ARTIFACTS_DIR / 'autoencoder.keras')

recon_tr = np.mean(np.square(XB_tr - best_ae.predict(XB_tr, verbose=0)), axis=(1,2))
recon_te = np.mean(np.square(XB_te - best_ae.predict(XB_te, verbose=0)), axis=(1,2))

threshold = float(np.percentile(recon_tr, 99))
metrics = {
    'train_mean_error': float(np.mean(recon_tr)),
    'train_99pct_threshold': threshold,
    'test_mean_error': float(np.mean(recon_te))
}
print(metrics)
with open(ARTIFACTS_DIR / 'anomaly_metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

# Plot distributions
plt.figure(figsize=(8,4))
sns.kdeplot(recon_tr, label='train')
sns.kdeplot(recon_te, label='test')
plt.axvline(threshold, color='r', linestyle='--', label='threshold@p99')
plt.legend(); plt.title('Reconstruction Error Distributions')
plt.savefig(ARTIFACTS_DIR / 'reconstruction_error_distributions.png', dpi=200, bbox_inches='tight')
plt.close()



## What to expect
- `predictive_clf.keras`, `predictive_metrics.json`: classifier and test metrics (accuracy).
- `autoencoder.keras`, `anomaly_metrics.json`: autoencoder and reconstruction error stats.
- `reconstruction_error_distributions.png`: density plot showing train vs test error and a 99th percentile threshold.

You can download artifacts from the right-hand panel (/kaggle/working) in Kaggle.



In [None]:
# Package artifacts for download (no changes to previous cells)
import os, json, shutil, zipfile
from pathlib import Path

art_dir = ARTIFACTS_DIR
bundle_dir = art_dir / 'bundle'
bundle_dir.mkdir(exist_ok=True)

# Copy produced artifacts if present
files_to_copy = [
    'predictive_clf.keras',
    'autoencoder.keras',
    'scaler.pkl',
    'predictive_metrics.json',
    'anomaly_metrics.json',
    'reconstruction_error_distributions.png',
]
for name in files_to_copy:
    src = art_dir / name
    if src.exists():
        shutil.copy2(src, bundle_dir / name)

# Save a manifest with environment + training config
manifest = {
    'tensorflow': tf.__version__,
    'pandas': pd.__version__,
    'numpy': np.__version__,
    'feature_columns': list(FEATURES),
    'window': int(WINDOW),
    'horizon': int(HORIZON),
}
(bundle_dir / 'manifest.json').write_text(json.dumps(manifest, indent=2))

# Create zip
zip_path = art_dir / 'leo_dual_ai_artifacts.zip'
if zip_path.exists():
    zip_path.unlink()
shutil.make_archive(str(zip_path.with_suffix('')), 'zip', root_dir=bundle_dir)
print('Created archive:', zip_path)



In [None]:
# Inspect archive contents (optional)
import zipfile
with zipfile.ZipFile(ARTIFACTS_DIR / 'leo_dual_ai_artifacts.zip', 'r') as zf:
    print('Archive files:')
    for n in zf.namelist():
        print(' -', n)

