Visualization & Dashboard

In [8]:
# =============================================================================
# Stress Prediction Visualization & Dashboard - Final Colab Version (Stable)
# =============================================================================

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# ----------------------------
# Set Base Path in Drive
# ----------------------------
import os
BASE_PATH = "/content/drive/My Drive/stress-project/results"

# ----------------------------
# Create Required Folders
# ----------------------------
folders = [
    "figures/pca_tsne",
    "figures/roc_curves",
    "figures/time_series",
    "metrics"
]
for f in folders:
    path = os.path.join(BASE_PATH, f)
    os.makedirs(path, exist_ok=True)
print(" All folders created successfully")

# ----------------------------
# Import Libraries
# ----------------------------
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.metrics import roc_curve, auc

sns.set(style="whitegrid", context="notebook")

# ----------------------------
#  Load HRV Data (Robust)
# ----------------------------
hrv_file = os.path.join(BASE_PATH, "hrv_anomalies_week3.parquet")
hrv_parquet = pd.read_parquet(hrv_file)

print(" HRV Columns:", list(hrv_parquet.columns))

# Extract features
feature_cols = [c for c in hrv_parquet.columns if c not in ['anomaly', 'recon_error']]
X_hrv = hrv_parquet[feature_cols].values

# Handle timestamps
if 'timestamp' in hrv_parquet.columns:
    timestamps_hrv = pd.to_datetime(hrv_parquet['timestamp'])
else:
    print(" No 'timestamp' column found — generating synthetic timestamps (1 per record)")
    timestamps_hrv = pd.date_range(start='2025-01-01', periods=len(hrv_parquet), freq='5min')

# Participant info
participants_hrv = hrv_parquet.get('participant', pd.Series(['P_unknown'] * len(hrv_parquet))).astype(str).tolist()

# Labels
if 'anomaly' in hrv_parquet.columns:
    y_hrv = hrv_parquet['anomaly'].values
    print(" Using 'anomaly' column as HRV labels")
else:
    pred_file = os.path.join(BASE_PATH, "hrv_pred_anomaly.npy")
    if os.path.exists(pred_file):
        pred = np.load(pred_file)
        y_hrv = (pred > np.percentile(pred, 75)).astype(int)
        print(" Using thresholded predictions as HRV labels")
    else:
        y_hrv = np.zeros(len(hrv_parquet))
        print(" No labels found, using zeros")

# Stress probability
hrv_pred_file = os.path.join(BASE_PATH, "hrv_pred_anomaly.npy")
if os.path.exists(hrv_pred_file):
    hrv_pred = np.load(hrv_pred_file)
    if np.max(hrv_pred) != np.min(hrv_pred):
        hrv_stress_prob = (hrv_pred - hrv_pred.min()) / (hrv_pred.max() - hrv_pred.min())
    else:
        hrv_stress_prob = np.zeros(len(hrv_pred))
    print(" HRV stress probabilities computed")
else:
    hrv_stress_prob = np.random.rand(len(hrv_parquet))
    print(" Using random stress probabilities (no prediction file found)")

print(f" HRV ready | Records: {len(hrv_parquet)} | Features: {len(feature_cols)}")

# ----------------------------
#  Load WESAD Results (Safe)
# ----------------------------
wesad_file = os.path.join(BASE_PATH, "wesad_lstm_results.json")
with open(wesad_file, "r") as f:
    wesad_results = json.load(f)

print(" WESAD Keys:", wesad_results.keys())

wesad_accuracy = wesad_results.get("Test Accuracy", None)
wesad_loss = wesad_results.get("Loss", None)
wesad_auroc = wesad_results.get("AUROC_per_class", {})

print("\n WESAD Summary:")
print(f"  - Test Accuracy: {wesad_accuracy}")
print(f"  - Loss: {wesad_loss}")
print(f"  - AUROC (per class): {wesad_auroc}")

# Placeholder (no feature data inside JSON)
X_wesad = np.zeros((1, 5))
y_wesad = np.zeros((1,))
wesad_pred_prob = np.zeros((1,))
timestamps_wesad = np.arange(len(y_wesad))
participants_wesad = ["P_unknown"]

print(" WESAD JSON lacks raw feature/prediction arrays — skipping direct visualizations.")

# ----------------------------
#  Load PhysioNet Data
# ----------------------------
physio_pred = np.load(os.path.join(BASE_PATH, "physio_pred_anomaly.npy"))
physio_recon = np.load(os.path.join(BASE_PATH, "physio_recon_err.npy"))

if np.max(physio_pred) != np.min(physio_pred):
    physio_stress_prob = (physio_pred - physio_pred.min()) / (physio_pred.max() - physio_pred.min())
else:
    physio_stress_prob = np.zeros(len(physio_pred))

y_physio = (physio_recon > np.percentile(physio_recon, 75)).astype(int)
timestamps_physio = np.arange(len(physio_pred))
print(" PhysioNet data loaded")

# ----------------------------
#  PCA & t-SNE Visualizations (Safe)
# ----------------------------
print("\n📈 Generating PCA & t-SNE plots...")
datasets = {
    "HRV": (X_hrv, y_hrv),
    "PhysioNet": (np.expand_dims(physio_stress_prob, axis=1), y_physio)
}

for name, (X, y) in datasets.items():
    if X.shape[1] < 2:
        print(f" Skipping PCA/t-SNE for {name} (only {X.shape[1]} feature)")
        continue

    if len(X) > 5000:
        idx = np.random.choice(len(X), 5000, replace=False)
        X, y = X[idx], y[idx]

    # PCA
    pca = PCA(n_components=2)
    X_pca = pca.fit_transform(X)
    plt.figure(figsize=(8,6))
    sns.scatterplot(x=X_pca[:,0], y=X_pca[:,1], hue=y, palette="coolwarm", alpha=0.7)
    plt.title(f"PCA Cluster Plot - {name}")
    plt.xlabel("PC1")
    plt.ylabel("PC2")
    plt.legend(title="Stress")
    plt.tight_layout()
    plt.savefig(os.path.join(BASE_PATH, "figures/pca_tsne", f"pca_{name.lower()}.png"), dpi=300)
    plt.close()

    # t-SNE
    tsne = TSNE(n_components=2, random_state=42, perplexity=30)
    X_tsne = tsne.fit_transform(X)
    plt.figure(figsize=(8,6))
    sns.scatterplot(x=X_tsne[:,0], y=X_tsne[:,1], hue=y, palette="coolwarm", alpha=0.7)
    plt.title(f"t-SNE Cluster Plot - {name}")
    plt.xlabel("t-SNE1")
    plt.ylabel("t-SNE2")
    plt.tight_layout()
    plt.savefig(os.path.join(BASE_PATH, "figures/pca_tsne", f"tsne_{name.lower()}.png"), dpi=300)
    plt.close()

print(" PCA & t-SNE plots done (skipped 1D datasets safely).")

# ----------------------------
#  ROC Curves
# ----------------------------
print("\n Generating ROC curves...")
roc_datasets = {
    "HRV_LGBM": (y_hrv, hrv_stress_prob),
    "PhysioNet": (y_physio, physio_stress_prob)
}

for name, (y_true, y_prob) in roc_datasets.items():
    fpr, tpr, _ = roc_curve(y_true, y_prob)
    roc_auc = auc(fpr, tpr)
    plt.figure(figsize=(7,6))
    plt.plot(fpr, tpr, label=f'{name} (AUC = {roc_auc:.2f})')
    plt.plot([0,1],[0,1],'k--')
    plt.title(f'ROC Curve: {name}')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.legend(loc='lower right')
    plt.savefig(os.path.join(BASE_PATH, "figures/roc_curves", f"roc_{name.lower()}.png"), dpi=300)
    plt.close()
print(" ROC curves generated")

# ----------------------------
#  Time-Series Stress Trends
# ----------------------------
print("\n Generating time-series stress trends...")
time_series_datasets = {
    "HRV": (timestamps_hrv, hrv_stress_prob),
    "PhysioNet": (timestamps_physio, physio_stress_prob)
}

for name, (timestamps, stress_prob) in time_series_datasets.items():
    plt.figure(figsize=(12,5))
    sns.lineplot(x=timestamps, y=stress_prob)
    plt.title(f"Time-Series Stress Probability: {name}")
    plt.xlabel("Time")
    plt.ylabel("Stress Probability")
    plt.tight_layout()
    plt.savefig(os.path.join(BASE_PATH, "figures/time_series", f"stress_trend_{name.lower()}.png"), dpi=300)
    plt.close()
print(" Time-series plots complete")

# ----------------------------
#  Dashboard CSV Export
# ----------------------------
print("\n Exporting dashboard summary...")
dashboard_rows = []

for ts, part, prob in zip(timestamps_hrv, participants_hrv, hrv_stress_prob):
    dashboard_rows.append({"timestamp": ts, "participant_id": part, "dataset": "HRV", "stress_prob": prob})

for ts, prob in zip(timestamps_physio, physio_stress_prob):
    dashboard_rows.append({"timestamp": ts, "participant_id": "P_unknown", "dataset": "PhysioNet", "stress_prob": prob})

dashboard_df = pd.DataFrame(dashboard_rows)
dashboard_csv_path = os.path.join(BASE_PATH, "metrics", "dashboard_data.csv")
dashboard_df.to_csv(dashboard_csv_path, index=False)

print(f" Dashboard CSV saved: {dashboard_csv_path}")
print("\n All visualizations and dashboard data generated successfully!")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
 All folders created successfully
 HRV Columns: ['HR_mean', 'HR_std', 'rmssd_mean', 'sdnn_mean', 'lf/hf_mean', 'recon_error', 'anomaly']
 No 'timestamp' column found — generating synthetic timestamps (1 per record)
 Using 'anomaly' column as HRV labels
 HRV stress probabilities computed
 HRV ready | Records: 343379 | Features: 5
 WESAD Keys: dict_keys(['Test Accuracy', 'Loss', 'AUROC_per_class', 'Notes'])

 WESAD Summary:
  - Test Accuracy: 0.8182
  - Loss: 0.423
  - AUROC (per class): [0.90515645, 0.99472014, 0.90798485]
 WESAD JSON lacks raw feature/prediction arrays — skipping direct visualizations.
 PhysioNet data loaded

📈 Generating PCA & t-SNE plots...
 Skipping PCA/t-SNE for PhysioNet (only 1 feature)
 PCA & t-SNE plots done (skipped 1D datasets safely).

 Generating ROC curves...
 ROC curves generated

 Generating time-series stress trends...
 Time-s

Have a look into Dashboard

In [9]:
import pandas as pd
pd.read_csv("/content/drive/My Drive/stress-project/results/metrics/dashboard_data.csv").head()


  pd.read_csv("/content/drive/My Drive/stress-project/results/metrics/dashboard_data.csv").head()


Unnamed: 0,timestamp,participant_id,dataset,stress_prob
0,2025-01-01 00:00:00,P_unknown,HRV,0.0
1,2025-01-01 00:05:00,P_unknown,HRV,0.0
2,2025-01-01 00:10:00,P_unknown,HRV,0.0
3,2025-01-01 00:15:00,P_unknown,HRV,0.0
4,2025-01-01 00:20:00,P_unknown,HRV,0.0
