In [1]:
# 🌊 Generalized Physical Factors Flood Forecasting System - Jupyter Implementation
# Real-time predictions for any location based on physical data

import numpy as np
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

print("🌊 GENERALIZED PHYSICAL FACTORS FLOOD FORECASTING SYSTEM")
print("=" * 60)

class PhysicalFloodPredictor:
    def __init__(self):
        self.model = None
        self.is_trained = False
        # Feature columns no longer include 'district_encoded'
        self.feature_columns = [
            'rainfall_mm', 'river_discharge_cumec', 'water_level_m',
            'soil_moisture_percent', 'temperature_c', 'humidity_percent', 'wind_speed_ms',
            'pressure_hpa', 'elevation_m', 'population_density', 'drainage_efficiency',
            'distance_to_coast_km', 'deforestation_index'
        ]

    def generate_physical_dataset(self, n_samples=10000):
        """Generate a generalized dataset with randomized physical factors."""
        print(f"🔄 Generating {n_samples} generalized samples...")
        np.random.seed(42)
        data = []

        # Define plausible ranges for generalized locations in a country like India
        MAX_ELEVATION = 2500 # Capping at a reasonable max for inhabited areas

        for _ in range(n_samples):
            # Generate location-agnostic physical properties
            elevation = np.random.uniform(0, MAX_ELEVATION)
            population_density = max(10, np.random.lognormal(mean=6, sigma=1.5)) # Skewed towards more common densities
            distance_to_coast = np.random.uniform(0, 1500)
            drainage_efficiency = np.random.uniform(0.1, 0.9)
            deforestation = np.random.uniform(0.05, 0.9)

            # --- The rest of the generation logic remains similar ---
            rainfall_scenario = np.random.random()
            if rainfall_scenario > 0.95:
                rainfall = np.random.uniform(150, 300)
            elif rainfall_scenario > 0.85:
                rainfall = np.random.uniform(50, 150)
            else:
                rainfall = max(0, np.random.exponential(15))

            river_discharge = max(0, (rainfall * 0.5 * (1 + elevation / 1000)) + np.random.normal(0, 30))
            water_level = max(0, 1.5 + (river_discharge / 300) + np.random.normal(0, 0.8))
            soil_moisture = min(100, max(15, 35 + rainfall * 1.5 + np.random.normal(0, 12)))
            temperature = np.random.normal(25, 6)
            humidity = min(100, max(40, 60 + rainfall * 0.7))
            wind_speed = max(0, np.random.normal(10, 8))
            pressure = np.random.normal(1010, 12) - (rainfall / 10) # Lower pressure with more rain

            # Calculate comprehensive flood risk score
            flood_risk_score = (
                (rainfall / 250) * 0.30 +
                (river_discharge / 800) * 0.25 +
                (water_level / 8) * 0.20 +
                (soil_moisture / 100) * 0.08 +
                ((MAX_ELEVATION - elevation) / MAX_ELEVATION) * 0.07 + # Low elevation risk
                (deforestation) * 0.05 +
                ((1 - drainage_efficiency)) * 0.03 +
                (min(population_density, 5000) / 5000) * 0.02 # Capped population pressure
            )

            flood_occurred = 1 if flood_risk_score > 0.40 else 0
            if np.random.random() < 0.03: # Add noise
                flood_occurred = 1 - flood_occurred

            sample = [
                rainfall, river_discharge, water_level, soil_moisture,
                temperature, humidity, wind_speed, pressure, elevation,
                population_density, drainage_efficiency, distance_to_coast,
                deforestation, flood_occurred
            ]
            data.append(sample)

        columns = self.feature_columns + ['flood_occurred']
        df = pd.DataFrame(data, columns=columns)
        print(f"✅ Generalized dataset created! Shape: {df.shape}, Floods: {df['flood_occurred'].sum()}")
        return df

    def train_model(self, df=None):
        """Train flood prediction model on the generalized dataset."""
        if df is None:
            df = self.generate_physical_dataset()

        print("🔧 Training model on generalized physical factors...")
        X = df[self.feature_columns]
        y = df['flood_occurred']

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

        self.model = GradientBoostingClassifier(n_estimators=150, learning_rate=0.1, max_depth=8, random_state=42)
        self.model.fit(X_train, y_train)

        test_pred = self.model.predict(X_test)
        test_acc = accuracy_score(y_test, test_pred)
        print(f"✅ Model trained successfully! Test Accuracy: {test_acc:.3f}")
        print("\n📊 Detailed Performance Report:")
        print(classification_report(y_test, test_pred, target_names=['Normal', 'Flood']))
        
        self.is_trained = True
        return self

    def predict_flood_risk(self, **kwargs):
        """Predict flood risk from any location's physical conditions."""
        if not self.is_trained:
            raise ValueError("❌ Model not trained! Please run train_model() first.")

        # Validate that all required features are present
        for param in self.feature_columns:
            if param not in kwargs:
                raise ValueError(f"❌ Missing parameter: {param}")

        # Prepare feature vector in the exact order used for training
        features = np.array([kwargs[param] for param in self.feature_columns]).reshape(1, -1)

        prediction = self.model.predict(features)[0]
        probability = self.model.predict_proba(features)[0][1]

        # Risk assessment and recommendation logic
        if probability >= 0.85:
            risk_level = "🚨 CRITICAL"
            recommendation = "IMMEDIATE EVACUATION may be required! Contact authorities."
        elif probability >= 0.65:
            risk_level = "🔴 HIGH"
            recommendation = "Take immediate precautionary measures. Prepare for potential evacuation."
        elif probability >= 0.45:
            risk_level = "🟠 MEDIUM"
            recommendation = "Monitor conditions closely. Prepare emergency supplies."
        elif probability >= 0.25:
            risk_level = "🟡 LOW"
            recommendation = "Stay alert and monitor weather updates."
        else:
            risk_level = "🟢 SAFE"
            recommendation = "Current conditions appear normal."

        return {
            'prediction': 'FLOOD WARNING' if prediction == 1 else 'NORMAL CONDITIONS',
            'probability': round(probability, 3),
            'risk_level': risk_level,
            'recommendation': recommendation,
            'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        }

# --- Main Execution ---
# Initialize and train the predictor
predictor = PhysicalFloodPredictor()
predictor.train_model()

print("\n" + "="*60)
print("🎉 SYSTEM READY FOR GENERALIZED REAL-TIME PREDICTIONS!")
print("="*60)

🌊 GENERALIZED PHYSICAL FACTORS FLOOD FORECASTING SYSTEM
🔄 Generating 10000 generalized samples...
✅ Generalized dataset created! Shape: (10000, 14), Floods: 966
🔧 Training model on generalized physical factors...
✅ Model trained successfully! Test Accuracy: 0.957

📊 Detailed Performance Report:
              precision    recall  f1-score   support

      Normal       0.96      0.99      0.98      2710
       Flood       0.88      0.65      0.75       290

    accuracy                           0.96      3000
   macro avg       0.92      0.82      0.86      3000
weighted avg       0.96      0.96      0.95      3000


🎉 SYSTEM READY FOR GENERALIZED REAL-TIME PREDICTIONS!


In [3]:
# --- Sample Prediction: High-Risk Coastal Scenario ---

# Define the physical conditions of the location you want to check
high_risk_conditions = {
    'rainfall_mm': 100.0,              # Very heavy rainfall
    'river_discharge_cumec': 250.0,    # High river flow
    'water_level_m': 4.5,              # Dangerously high water level
    'soil_moisture_percent': 95.0,     # Soil is completely saturated
    'temperature_c': 24.0,             # Cooler storm temperature
    'humidity_percent': 98.0,          # Extremely high humidity
    'wind_speed_ms': 20.0,             # Strong winds
    'pressure_hpa': 998.0,             # Low atmospheric pressure (storm)
    'elevation_m': 10.0,               # Very low-lying area
    'population_density': 1800.0,      # Densely populated urban area
    'drainage_efficiency': 0.4,        # Poor drainage system
    'distance_to_coast_km': 5.0,       # Very close to the coast
    'deforestation_index': 0.7         # High deforestation, causing more runoff
}

# Get the prediction by unpacking the dictionary as keyword arguments
try:
    result = predictor.predict_flood_risk(**high_risk_conditions)

    # Print the results in a user-friendly format
    print(f"🌊 Flood Risk Assessment Results")
    print("-" * 35)
    print(f"Prediction:     {result['prediction']}")
    print(f"Probability:    {result['probability'] * 100:.1f}%")
    print(f"Risk Level:     {result['risk_level']}")
    print(f"Recommendation: {result['recommendation']}")
    print(f"Timestamp:      {result['timestamp']}")
    print("-" * 35)

except ValueError as e:
    print(e)

🌊 Flood Risk Assessment Results
-----------------------------------
Prediction:     NORMAL CONDITIONS
Probability:    15.7%
Risk Level:     🟢 SAFE
Recommendation: Current conditions appear normal.
Timestamp:      2025-10-07 13:03:39
-----------------------------------


In [1]:
# =============================================
# 🌊 GENERALIZED PHYSICAL FACTORS FLOOD FORECASTING SYSTEM
# =============================================
# Real-time prediction version — ready for input/output demonstration
# Author: Team DTU TECH TRAILBLAZERS (Optimized for competition use)

import numpy as np
import pandas as pd
import joblib
from datetime import datetime
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, PowerTransformer
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.calibration import CalibratedClassifierCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score

import warnings
warnings.filterwarnings('ignore')

print("🌊 GENERALIZED PHYSICAL FACTORS FLOOD FORECASTING SYSTEM")
print("=" * 70)

# ---------------------------------------------
# CLASS DEFINITION
# ---------------------------------------------
class PhysicalFloodPredictor:
    def __init__(self):
        self.model = None
        self.is_trained = False
        self.feature_columns = [
            'rainfall_mm', 'river_discharge_cumec', 'water_level_m',
            'soil_moisture_percent', 'temperature_c', 'humidity_percent', 'wind_speed_ms',
            'pressure_hpa', 'elevation_m', 'population_density', 'drainage_efficiency',
            'distance_to_coast_km', 'deforestation_index'
        ]

    def generate_physical_dataset(self, n_samples=8000):
        np.random.seed(42)
        MAX_ELEVATION = 3000
        data = []

        for _ in range(n_samples):
            elevation = np.random.exponential(scale=300)
            elevation = min(elevation, MAX_ELEVATION)
            rainfall = np.random.exponential(20)
            if np.random.random() < 0.1:
                rainfall += np.random.uniform(100, 300)

            river_discharge = max(0, rainfall * 0.6 + np.random.normal(0, 20))
            water_level = max(0, 1.5 + river_discharge / 300 + np.random.normal(0, 0.4))
            soil_moisture = np.clip(25 + rainfall * 1.5 + np.random.normal(0, 10), 5, 100)
            temperature = np.random.normal(25 - (elevation / 800), 5)
            humidity = np.clip(50 + rainfall * 0.5 + np.random.normal(0, 10), 10, 100)
            wind_speed = abs(np.random.normal(8, 5))
            pressure = np.random.normal(1012 - rainfall / 10, 8)
            population_density = np.random.lognormal(mean=6, sigma=1.2)
            drainage_efficiency = np.clip(np.random.beta(2, 5), 0.1, 0.95)
            distance_to_coast = np.random.uniform(0, 1500)
            deforestation = np.random.uniform(0.05, 0.9)

            flood_score = (
                (rainfall / 300) * 0.35 +
                (river_discharge / 800) * 0.25 +
                (water_level / 10) * 0.15 +
                (soil_moisture / 100) * 0.08 +
                (1 - drainage_efficiency) * 0.05 +
                (deforestation) * 0.05 +
                ((MAX_ELEVATION - elevation) / MAX_ELEVATION) * 0.05 +
                (min(population_density, 5000) / 5000) * 0.02
            )

            flood = 1 if flood_score > 0.45 else 0
            if np.random.random() < 0.03:
                flood = 1 - flood

            data.append([
                rainfall, river_discharge, water_level, soil_moisture, temperature,
                humidity, wind_speed, pressure, elevation, population_density,
                drainage_efficiency, distance_to_coast, deforestation, flood
            ])

        df = pd.DataFrame(data, columns=self.feature_columns + ['flood_occurred'])
        print(f"✅ Dataset ready: {df.shape}, Flood cases: {df['flood_occurred'].sum()}")
        return df

    def train_model(self, df=None):
        if df is None:
            df = self.generate_physical_dataset()

        X = df[self.feature_columns]
        y = df['flood_occurred']

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)

        # Preprocessing + Model Pipeline
        preprocessor = ColumnTransformer([
            ('num', Pipeline([
                ('power', PowerTransformer(method='yeo-johnson')),
                ('scale', StandardScaler())
            ]), self.feature_columns)
        ])

        base_model = HistGradientBoostingClassifier(max_iter=250, learning_rate=0.08, max_depth=6, random_state=42)
        model = CalibratedClassifierCV(base_model, method='isotonic', cv=3)
        pipeline = Pipeline([
            ('pre', preprocessor),
            ('clf', model)
        ])

        print("🔧 Training model on generalized data...")
        pipeline.fit(X_train, y_train)
        y_pred = pipeline.predict(X_test)
        y_prob = pipeline.predict_proba(X_test)[:, 1]

        acc = accuracy_score(y_test, y_pred)
        roc_auc = roc_auc_score(y_test, y_prob)
        print(f"✅ Model trained! Accuracy: {acc:.3f}, ROC-AUC: {roc_auc:.3f}")
        print(classification_report(y_test, y_pred, target_names=['Normal', 'Flood']))

        self.model = pipeline
        self.is_trained = True
        return self

    def predict_flood_risk(self, **kwargs):
        if not self.is_trained:
            raise ValueError("❌ Model not trained yet!")

        # ensure all parameters provided
        for col in self.feature_columns:
            if col not in kwargs:
                raise ValueError(f"Missing input: {col}")

        X = pd.DataFrame([[kwargs[col] for col in self.feature_columns]], columns=self.feature_columns)
        prob = self.model.predict_proba(X)[0, 1]
        pred = int(prob >= 0.5)

        if prob >= 0.85:
            risk = "🚨 CRITICAL"
            rec = "IMMEDIATE EVACUATION may be required!"
        elif prob >= 0.65:
            risk = "🔴 HIGH"
            rec = "Prepare for potential evacuation."
        elif prob >= 0.45:
            risk = "🟠 MEDIUM"
            rec = "Monitor conditions closely."
        elif prob >= 0.25:
            risk = "🟡 LOW"
            rec = "Stay alert and monitor weather updates."
        else:
            risk = "🟢 SAFE"
            rec = "Conditions normal."

        return {
            "prediction": "FLOOD WARNING" if pred == 1 else "NORMAL CONDITIONS",
            "probability": round(prob, 3),
            "risk_level": risk,
            "recommendation": rec,
            "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        }


🌊 GENERALIZED PHYSICAL FACTORS FLOOD FORECASTING SYSTEM


In [2]:
predictor = PhysicalFloodPredictor()
predictor.train_model()  # Generates synthetic data + trains

✅ Dataset ready: (8000, 14), Flood cases: 787
🔧 Training model on generalized data...
✅ Model trained! Accuracy: 0.970, ROC-AUC: 0.882
              precision    recall  f1-score   support

      Normal       0.97      0.99      0.98      1803
       Flood       0.93      0.76      0.83       197

    accuracy                           0.97      2000
   macro avg       0.95      0.87      0.91      2000
weighted avg       0.97      0.97      0.97      2000



<__main__.PhysicalFloodPredictor at 0x147ff3cf2d0>

In [5]:
# Example sample input (typical monsoon conditions)
sample_input = {
    'rainfall_mm': 100.0,
    'river_discharge_cumec': 400.0,
    'water_level_m': 3.2,
    'soil_moisture_percent': 70.0,
    'temperature_c': 27.5,
    'humidity_percent': 88.0,
    'wind_speed_ms': 12.0,
    'pressure_hpa': 995.0,
    'elevation_m': 200.0,
    'population_density': 1500.0,
    'drainage_efficiency': 0.3,
    'distance_to_coast_km': 100.0,
    'deforestation_index': 0.45
}

# Run prediction
output = predictor.predict_flood_risk(**sample_input)
print("\n🌍 Flood Prediction Output:")
for k, v in output.items():
    print(f"{k}: {v}")


🌍 Flood Prediction Output:
prediction: NORMAL CONDITIONS
probability: 0.492
risk_level: 🟠 MEDIUM
recommendation: Monitor conditions closely.
timestamp: 2025-10-26 12:09:49


In [1]:
# ================================================================
# 🌊 PHASE 4: SPATIO-TEMPORAL GRAPH NEURAL NETWORK (STGNN) SYSTEM
# ================================================================
#
# This model analyzes an entire network of sensors as a graph,
# learning how upstream events propagate downstream.
#
# REQUIRED: pip install tensorflow spektral joblib pandas numpy
#

import numpy as np
import pandas as pd
import joblib
import sqlite3
from datetime import datetime
import warnings, os
from collections import deque

# --- AI & Graph Libraries ---
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout, TimeDistributed
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score

# Spektral is the GNN library
try:
    import spektral
    from spektral.layers import GraphConv
    from spektral.data import Graph, Dataset
    from spektral.transforms import GCNConv
except ImportError:
    print("="*75)
    print("ERROR: 'spektral' library not found. Please run: pip install spektral")
    print("="*75)
    exit()

warnings.filterwarnings('ignore')

print("🌊 PHASE 4: SPATIO-TEMPORAL GNN (STGNN) FORECASTING SYSTEM - DTU TECH TRAILBLAZERS")
print("=" * 75)


class GraphFloodPredictor:
    def _init_(self, model_dir="model_stgnn", sequence_length=72):
        self.model = None
        self.scaler = None
        self.is_trained = False
        self.sequence_length = sequence_length # Look at 72 hours
        
        # --- Node/Sensor Network Definition ---
        self.sensor_nodes = {
            0: "Prayagraj (Upstream)",
            1: "Varanasi (Midstream)",
            2: "Patna (Downstream)"
        }
        self.num_nodes = len(self.sensor_nodes)
        
        # Adjacency Matrix (Graph "Map"): Defines river flow
        # 1 means a connection exists (including self-loops)
        self.adjacency_matrix = np.array([
            [1, 1, 0],  # Prayagraj -> Varanasi
            [0, 1, 1],  # Varanasi -> Patna
            [0, 0, 1]   # Patna (no outflow in our model)
        ], dtype=float)
        # Add reverse connections (for GNN message passing) & normalize
        self.adjacency_matrix = GraphConv.preprocess(self.adjacency_matrix)

        # --- Base Features (per-sensor) ---
        self.base_feature_columns = [
            'rainfall_mm', 'river_discharge_cumec', 'water_level_m',
            'soil_moisture_percent', 'temperature_c', 'humidity_percent'
        ]
        self.n_features = len(self.base_feature_columns)
        
        # --- Persistence ---
        self.model_dir = model_dir
        self.model_path = os.path.join(model_dir, "flood_model_stgnn.h5")
        self.scaler_path = os.path.join(model_dir, "flood_scaler_stgnn.joblib")
        os.makedirs(self.model_dir, exist_ok=True)

        self.db_path = "flood_cache_graph.db"
        self._setup_database()

    # -------------------- DATABASE (OFFLINE LOGGING) --------------------
    def _setup_database(self):
        # (Same as before, logs predictions)
        conn = sqlite3.connect(self.db_path)
        conn.execute("""CREATE TABLE IF NOT EXISTS flood_logs (
                        timestamp TEXT, location TEXT, probability REAL,
                        risk_level TEXT, recommendation TEXT)""")
        conn.close()

    def log_prediction(self, location, probability, risk_level, recommendation):
        # (Same as before)
        conn = sqlite3.connect(self.db_path)
        conn.execute("INSERT INTO flood_logs VALUES (?, ?, ?, ?, ?)",
                     (datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                      location, probability, risk_level, recommendation))
        conn.commit()
        conn.close()

    # -------------------- ADVANCED DATA GENERATION (GRAPH) --------------------
    def generate_graph_dataset(self, n_hours=10000):
        print(f"🧬 Generating {n_hours} hours of synthetic graph data for {self.num_nodes} nodes...")
        np.random.seed(42)
        
        # --- Create Time-Varying Features for ALL nodes ---
        # Shape: (n_hours, num_nodes, n_features)
        X_data = np.zeros((n_hours, self.num_nodes, self.n_features))
        
        # Store rainfall separately to calculate flow
        rain_data = np.zeros((n_hours, self.num_nodes))
        
        # --- Define Node Properties (e.g., lag time from upstream) ---
        # Lag (in hours) for discharge from upstream node
        flow_lag = { 1: 12, 2: 18 } # 12h: Pryj->Vns, 18h: Vns->Pat
        
        for n in range(self.num_nodes):
            time = np.arange(n_hours)
            temp = 25 + 10 * np.sin(2 * np.pi * time / (365 * 24)) + np.random.normal(0, 1.5, n_hours)
            
            rain = np.random.exponential(0.5, n_hours)
            storm_indices = np.random.randint(0, n_hours, n_hours // 100)
            rain[storm_indices] += np.random.uniform(20, 80, len(storm_indices))
            rain = np.clip(rain, 0, 200)
            rain_data[:, n] = rain
            
            humidity = np.clip(50 + rain * 0.5 + (temp - 25), 20, 100)
            
            soil = np.zeros(n_hours)
            soil[0] = 30
            for i in range(1, n_hours):
                soil[i] = np.clip(soil[i-1] + rain[i]*0.5 - 1.0, 10, 100)
                
            # --- Store in main matrix ---
            X_data[:, n, 0] = rain
            X_data[:, n, 3] = soil
            X_data[:, n, 4] = temp
            X_data[:, n, 5] = humidity
            
        # --- Simulate Hydrological Flow (The "Graph" part) ---
        print("Simulating downstream hydrological flow...")
        discharge_data = np.zeros((n_hours, self.num_nodes))
        water_level_data = np.zeros((n_hours, self.num_nodes))

        for n in range(self.num_nodes):
            # Base discharge from local rainfall
            local_discharge = pd.Series(rain_data[:, n]).rolling(48).sum() * 0.2 + 5
            
            # Add discharge from upstream node (with lag)
            upstream_discharge = 0
            if n == 1: # Varanasi
                upstream_rain = pd.Series(rain_data[:, 0]).shift(flow_lag[n])
                upstream_discharge = upstream_rain.rolling(48).sum() * 0.4
            elif n == 2: # Patna
                upstream_rain = pd.Series(rain_data[:, 1]).shift(flow_lag[n])
                upstream_discharge = upstream_rain.rolling(48).sum() * 0.5
            
            total_discharge = local_discharge.fillna(5) + upstream_discharge.fillna(0)
            discharge_data[:, n] = total_discharge
            water_level_data[:, n] = 1.5 + total_discharge / 50 + np.random.normal(0, 0.1, n_hours)

        X_data[:, :, 1] = discharge_data
        X_data[:, :, 2] = water_level_data

        # --- Define Flood Event (per-node) ---
        # Shape: (n_hours, num_nodes)
        water_level_risk = water_level_data / 8.0 # 8m = critical level
        rain_risk = pd.DataFrame(rain_data).rolling(24).sum().values / 150
        
        flood_risk = (water_level_risk * 0.7) + (rain_risk * 0.3)
        y_data = (flood_risk > 0.65).astype(int)
        
        # Clean NaNs from rolling/lag
        X_data = X_data[100:]
        y_data = y_data[100:]
        
        print(f"✅ Data generated. Total flood events: {np.sum(y_data, axis=0)}")
        return X_data, y_data, self.adjacency_matrix

    # --- Helper to create ST-GNN sequences ---
    def create_sequences(self, X_data, y_data):
        X, y = [], []
        # X_data shape: (time, nodes, features)
        # y_data shape: (time, nodes)
        for i in range(len(X_data) - self.sequence_length):
            # Input sequence: (seq_len, nodes, features)
            X.append(X_data[i:(i + self.sequence_length)])
            
            # Output label: (nodes, 1) - risk at the end of the sequence
            y.append(y_data[i + self.sequence_length - 1])
            
        return np.array(X), np.array(y)

    # -------------------- MASTER TRAINING PIPELINE (STGNN) --------------------
    def train_model(self, X_data=None, y_data=None, adj_matrix=None):
        if X_data is None:
            X_data, y_data, adj_matrix = self.generate_graph_dataset()
        
        # --- 1. Scale Features ---
        # We scale features before creating sequences
        # X_data shape: (n_hours, num_nodes, n_features)
        print("Scaling features...")
        self.scaler = StandardScaler()
        
        # Fit the scaler on (n_hours * num_nodes, n_features)
        X_data_reshaped = X_data.reshape(-1, self.n_features)
        self.scaler.fit(X_data_reshaped)
        
        # Transform back to original shape
        X_data_scaled = self.scaler.transform(X_data_reshaped).reshape(X_data.shape)
        
        # --- 2. Create Sequences ---
        print(f"🔄 Creating sequences of length {self.sequence_length}...")
        # X shape: (samples, seq_len, nodes, features)
        # y shape: (samples, nodes)
        X_seq, y_seq = self.create_sequences(X_data_scaled, y_data)
        
        # Reshape y to (samples, nodes, 1) for the model output
        y_seq = y_seq.reshape(y_seq.shape[0], y_seq.shape[1], 1)
        
        # --- 3. Build the STGNN Model ---
        print("🧠 Building Spatio-Temporal GNN (STGNN) model...")
        
        # Input shapes
        X_in = Input(shape=(self.sequence_length, self.num_nodes, self.n_features))
        A_in = Input(shape=(self.num_nodes, self.num_nodes), sparse=True)
        
        # We process the graph at each time step
        # Use TimeDistributed to apply the same GNN layer to each of the 72 timesteps
        
        # Spatial Layer 1 (GNN)
        graph_conv_1 = TimeDistributed(GraphConv(32, activation='relu'))([X_in, A_in])
        
        # Spatial Layer 2 (GNN)
        graph_conv_2 = TimeDistributed(GraphConv(16, activation='relu'))([graph_conv_1, A_in])
        
        # Temporal Layers (LSTM)
        # We need to reshape for LSTM. The LSTM should see the output of the GNNs
        # for each node as its features.
        # (samples, seq_len, nodes, 16) -> (samples, nodes, seq_len, 16)
        lstm_input = tf.transpose(graph_conv_2, perm=[0, 2, 1, 3])
        
        # Apply an LSTM to each node's timeline
        # TimeDistributed here applies the same LSTM to each of the 3 nodes
        lstm_out = TimeDistributed(LSTM(32, activation='relu', dropout=0.2))(lstm_input)
        
        # Output Layer
        # We need a risk probability (0-1) for each node
        output = TimeDistributed(Dense(1, activation='sigmoid'))(lstm_out)
        
        # Build Model
        self.model = Model(inputs=[X_in, A_in], outputs=output)
        self.model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        self.model.summary()

        # --- 4. Train Model ---
        # Create a static Adjacency Matrix input for all samples
        A_train = np.array([adj_matrix] * X_seq.shape[0])
        
        X_train, X_test, A_train_s, A_test_s, y_train, y_test = train_test_split(
            X_seq, A_train, y_seq, test_size=0.2, shuffle=False
        )
        
        early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

        print("🔧 Training STGNN model...")
        self.model.fit(
            [X_train, A_train_s], y_train,
            epochs=30,
            batch_size=32,
            validation_data=([X_test, A_test_s], y_test),
            callbacks=[early_stopping],
            verbose=1
        )

        print("✅ STGNN Model Trained.")
        self.is_trained = True
        return self

    # -------------------- MODEL PERSISTENCE --------------------
    def save_model(self):
        if self.is_trained:
            print(f"💾 Saving model to {self.model_dir}...")
            self.model.save(self.model_path)
            joblib.dump(self.scaler, self.scaler_path)
            print("✅ Model and scaler saved.")
        else:
            print("⚠ Cannot save, model is not trained.")

    def load_model(self):
        try:
            print(f"Loading model from {self.model_dir}...")
            self.model = keras.models.load_model(self.model_path)
            self.scaler = joblib.load(self.scaler_path)
            self.is_trained = True
            print("✅ Model and scaler loaded.")
        except Exception as e:
            print(f"ℹ No model files found or error loading: {e}. Please train a new model.")

    # -------------------- PREDICTION (GRAPH-BASED) --------------------
    def predict(self, history_data_all_nodes=None):
        if not self.is_trained:
            raise ValueError("❌ Model not trained yet! Please run train_model() or load_model().")
        
        # history_data_all_nodes shape: (seq_len, num_nodes, num_features)
        if (not isinstance(history_data_all_nodes, np.ndarray) or
            history_data_all_nodes.shape != (self.sequence_length, self.num_nodes, self.n_features)):
            raise ValueError(f"❌ 'history_data_all_nodes' must be a NumPy array with shape "
                             f"({self.sequence_length}, {self.num_nodes}, {self.n_features})")

        # --- 1. Scale the input data ---
        X_scaled_reshaped = self.scaler.transform(history_data_all_nodes.reshape(-1, self.n_features))
        X_scaled = X_scaled_reshaped.reshape(self.sequence_length, self.num_nodes, self.n_features)
        
        # --- 2. Add batch dimension ---
        X_batch = np.expand_dims(X_scaled, axis=0) # (1, seq_len, nodes, features)
        A_batch = np.expand_dims(self.adjacency_matrix, axis=0) # (1, nodes, nodes)
        
        # --- 3. Predict ---
        # Output shape: (1, num_nodes, 1)
        probs = self.model.predict([X_batch, A_batch], verbose=0)[0]
        
        # --- 4. Format Output ---
        results = {}
        for n in range(self.num_nodes):
            prob = probs[n, 0]
            location_name = self.sensor_nodes[n]
            
            pred = "FLOOD WARNING" if prob > 0.5 else "NORMAL CONDITIONS"
            if prob >= 0.85: risk, rec = "🚨 CRITICAL", "IMMEDIATE EVACUATION may be required!"
            elif prob >= 0.65: risk, rec = "🔴 HIGH", "Prepare for evacuation."
            elif prob >= 0.45: risk, rec = "🟠 MEDIUM", "Monitor closely."
            elif prob >= 0.25: risk, rec = "🟡 LOW", "Stay alert."
            else: risk, rec = "🟢 SAFE", "Conditions normal."
            
            self.log_prediction(location_name, float(prob), risk, rec)
            
            results[location_name] = {
                "prediction": pred,
                "probability": round(float(prob), 3),
                "risk_level": risk,
                "recommendation": rec
            }
        
        return results

    # -------------------- REAL-TIME SIMULATION --------------------
    def simulate_real_time_feed(self, sim_hours=50):
        print("\n\n--- 🔴 STARTING REAL-TIME GRAPH (STGNN) SIMULATION ---")
        X_data, _, _ = self.generate_graph_dataset(n_hours=self.sequence_length + sim_hours)
        
        # Use deque for an efficient rolling window of graph snapshots
        # Each item in deque is a snapshot of shape (num_nodes, num_features)
        history = deque(maxlen=self.sequence_length)
        
        print(f"Bootstrapping model with first {self.sequence_length} hours of data...")
        for i in range(self.sequence_length):
            history.append(X_data[i])
        
        print("⚡ Simulation LIVE. Predicting graph state hour-by-hour...\n")
        
        for i in range(self.sequence_length, self.sequence_length + sim_hours):
            # Get the new data (snapshot for all nodes)
            new_graph_snapshot = X_data[i] # Shape: (num_nodes, num_features)
            history.append(new_graph_snapshot) 
            
            # Make prediction using the full 72-hour history of the graph
            # Convert deque to numpy array: (seq_len, num_nodes, num_features)
            current_history_array = np.array(history)
            
            # Get predictions for ALL nodes
            results = self.predict(history_data_all_nodes=current_history_array)
            
            print(f"--- HOUR {i} ---")
            for node_name, result in results.items():
                print(f"  📍 {node_name:<25} -> {result['risk_level']:<10} (Prob: {result['probability']})")
            print("-" * 60)


# -------------------- RUN DEMO --------------------
# -------------------- RUN DEMO --------------------
if __name__ == '__main__':
    predictor = GraphFloodPredictor()

    predictor.load_model()

    if not predictor.is_trained:
        print("\n--- No saved STGNN model found, training new system... ---")
        predictor.train_model()
        predictor.save_model()
        print("-----------------------------------------------------------\n")

    predictor.simulate_real_time_feed(sim_hours=50)



ERROR: 'spektral' library not found. Please run: pip install spektral
🌊 PHASE 4: SPATIO-TEMPORAL GNN (STGNN) FORECASTING SYSTEM - DTU TECH TRAILBLAZERS
ℹ No model files found or error loading: 'GraphFloodPredictor' object has no attribute 'model_dir'. Please train a new model.


AttributeError: 'GraphFloodPredictor' object has no attribute 'is_trained'

In [1]:
pip install spektral

Collecting spektral
  Downloading spektral-1.3.1-py3-none-any.whl.metadata (5.9 kB)
Collecting networkx (from spektral)
  Downloading networkx-3.5-py3-none-any.whl.metadata (6.3 kB)
Downloading spektral-1.3.1-py3-none-any.whl (140 kB)
Downloading networkx-3.5-py3-none-any.whl (2.0 MB)
   ---------------------------------------- 0.0/2.0 MB ? eta -:--:--
   ----- ---------------------------------- 0.3/2.0 MB ? eta -:--:--
   ----- ---------------------------------- 0.3/2.0 MB ? eta -:--:--
   --------------- ------------------------ 0.8/2.0 MB 1.4 MB/s eta 0:00:01
   -------------------- ------------------- 1.0/2.0 MB 1.5 MB/s eta 0:00:01
   ------------------------------ --------- 1.6/2.0 MB 1.6 MB/s eta 0:00:01
   ------------------------------------ --- 1.8/2.0 MB 1.6 MB/s eta 0:00:01
   ---------------------------------------- 2.0/2.0 MB 1.6 MB/s eta 0:00:00
Installing collected packages: networkx, spektral
Successfully installed networkx-3.5 spektral-1.3.1
Note: you may need to rest


[notice] A new release of pip is available: 24.2 -> 25.3
[notice] To update, run: C:\Users\yashv\AppData\Local\Programs\Python\Python311\python.exe -m pip install --upgrade pip


In [1]:
# Complete fixed STGNN-style flood predictor (no spektral dependency)
# - Fixes: __init__, __name__ main block, model save/load, EarlyStopping import
# - Spatial processing done with adjacency-matrix aggregation + Dense layers (GNN-like)
# - Requires: tensorflow, scikit-learn, joblib, pandas, numpy
#
# pip install tensorflow scikit-learn joblib pandas numpy

import os
import numpy as np
import pandas as pd
import joblib
import sqlite3
from datetime import datetime
from collections import deque
import warnings

warnings.filterwarnings("ignore")

# TensorFlow / Keras
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout, TimeDistributed, Lambda
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# -------------------- GraphFloodPredictor Class --------------------
class GraphFloodPredictor:
    def __init__(self, model_dir="model_stgnn", sequence_length=72):
        # Basic attributes
        self.model = None
        self.scaler = None
        self.is_trained = False
        self.sequence_length = sequence_length  # lookback hours
        
        # Node/sensor network
        self.sensor_nodes = {
            0: "Prayagraj (Upstream)",
            1: "Varanasi (Midstream)",
            2: "Patna (Downstream)"
        }
        self.num_nodes = len(self.sensor_nodes)
        
        # Adjacency matrix (simple river-flow map)
        A = np.array([
            [1.0, 1.0, 0.0],  # Prayagraj -> Varanasi
            [0.0, 1.0, 1.0],  # Varanasi -> Patna
            [0.0, 0.0, 1.0]
        ], dtype=float)
        # Add self-loops and symmetrize / normalize (symmetric normalization)
        A = A + np.eye(self.num_nodes)
        deg = np.sum(A, axis=1)
        deg_inv_sqrt = np.diag(1.0 / np.sqrt(deg))
        self.adjacency_matrix = deg_inv_sqrt @ A @ deg_inv_sqrt  # normalized adjacency

        # Feature columns per node
        self.base_feature_columns = [
            'rainfall_mm', 'river_discharge_cumec', 'water_level_m',
            'soil_moisture_percent', 'temperature_c', 'humidity_percent'
        ]
        self.n_features = len(self.base_feature_columns)
        
        # Persistence paths
        self.model_dir = model_dir
        os.makedirs(self.model_dir, exist_ok=True)
        self.model_path = os.path.join(self.model_dir, "flood_model_stgnn.keras")
        self.scaler_path = os.path.join(self.model_dir, "flood_scaler_stgnn.joblib")
        
        # DB for logs
        self.db_path = "flood_cache_graph.db"
        self._setup_database()

    # -------------------- DB --------------------
    def _setup_database(self):
        conn = sqlite3.connect(self.db_path)
        conn.execute("""CREATE TABLE IF NOT EXISTS flood_logs (
                        timestamp TEXT, location TEXT, probability REAL,
                        risk_level TEXT, recommendation TEXT)""")
        conn.commit()
        conn.close()

    def log_prediction(self, location, probability, risk_level, recommendation):
        conn = sqlite3.connect(self.db_path)
        conn.execute("INSERT INTO flood_logs VALUES (?, ?, ?, ?, ?)",
                     (datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                      location, probability, risk_level, recommendation))
        conn.commit()
        conn.close()

    # -------------------- Synthetic Graph Data --------------------
    def generate_graph_dataset(self, n_hours=10000):
        print(f"🧬 Generating {n_hours} hours of synthetic graph data for {self.num_nodes} nodes...")
        np.random.seed(42)

        X_data = np.zeros((n_hours, self.num_nodes, self.n_features))
        rain_data = np.zeros((n_hours, self.num_nodes))

        # Lag times for flow
        flow_lag = {1: 12, 2: 18}

        for n in range(self.num_nodes):
            time = np.arange(n_hours)
            temp = 25 + 10 * np.sin(2 * np.pi * time / (365 * 24)) + np.random.normal(0, 1.5, n_hours)

            rain = np.random.exponential(0.5, n_hours)
            storm_indices = np.random.randint(0, n_hours, n_hours // 100)
            rain[storm_indices] += np.random.uniform(20, 80, len(storm_indices))
            rain = np.clip(rain, 0, 200)
            rain_data[:, n] = rain

            humidity = np.clip(50 + rain * 0.5 + (temp - 25), 20, 100)

            soil = np.zeros(n_hours)
            soil[0] = 30
            for i in range(1, n_hours):
                soil[i] = np.clip(soil[i - 1] + rain[i] * 0.5 - 1.0, 10, 100)

            X_data[:, n, 0] = rain
            X_data[:, n, 3] = soil
            X_data[:, n, 4] = temp
            X_data[:, n, 5] = humidity

        # Simulate discharge and water level using upstream contributions
        print("Simulating downstream hydrological flow...")
        discharge_data = np.zeros((n_hours, self.num_nodes))
        water_level_data = np.zeros((n_hours, self.num_nodes))

        for n in range(self.num_nodes):
            local_discharge = pd.Series(rain_data[:, n]).rolling(48).sum() * 0.2 + 5

            upstream_discharge = 0
            if n == 1:  # Varanasi gets from Prayagraj
                upstream_rain = pd.Series(rain_data[:, 0]).shift(flow_lag[n])
                upstream_discharge = upstream_rain.rolling(48).sum() * 0.4
            elif n == 2:  # Patna gets from Varanasi
                upstream_rain = pd.Series(rain_data[:, 1]).shift(flow_lag[n])
                upstream_discharge = upstream_rain.rolling(48).sum() * 0.5

            total_discharge = local_discharge.fillna(5) + upstream_discharge.fillna(0)
            discharge_data[:, n] = total_discharge
            water_level_data[:, n] = 1.5 + total_discharge / 50 + np.random.normal(0, 0.1, n_hours)

        X_data[:, :, 1] = discharge_data
        X_data[:, :, 2] = water_level_data

        water_level_risk = water_level_data / 8.0  # 8m critical
        rain_risk = pd.DataFrame(rain_data).rolling(24).sum().values / 150.0

        flood_risk = (water_level_risk * 0.7) + (rain_risk * 0.3)
        y_data = (flood_risk > 0.65).astype(int)

        # Remove early NaN-ish rows
        X_data = X_data[100:]
        y_data = y_data[100:]

        print(f"✅ Data generated. Total flood events per node: {np.sum(y_data, axis=0)}")
        return X_data, y_data, self.adjacency_matrix

    # -------------------- Sequence creator --------------------
    def create_sequences(self, X_data, y_data):
        X, y = [], []
        for i in range(len(X_data) - self.sequence_length):
            X.append(X_data[i:(i + self.sequence_length)])
            y.append(y_data[i + self.sequence_length - 1])
        return np.array(X), np.array(y)

    # -------------------- Build & Train Model --------------------
    def build_model(self):
        # Inputs
        X_in = Input(shape=(self.sequence_length, self.num_nodes, self.n_features), name="X_in")
        A_in = Input(shape=(self.num_nodes, self.num_nodes), name="A_in")

        # Spatial aggregation per time-step: result shape -> (batch, seq, nodes, features)
        def spatial_aggregate(inputs):
            A, X = inputs  # A: (batch, nodes, nodes), X: (batch, seq, nodes, features)
            # einsum sums over j: out[b,t,i,f] = sum_j A[b,i,j] * X[b,t,j,f]
            return tf.einsum('bij,btjf->btif', A, X)

        agg = Lambda(spatial_aggregate, name="graph_aggregate")([A_in, X_in])

        # After aggregation, apply Dense feature transforms in time-distributed fashion (like GraphConv)
        td1 = TimeDistributed(Dense(32, activation='relu'), name="spatial_dense_1")(agg)
        td2 = TimeDistributed(Dense(16, activation='relu'), name="spatial_dense_2")(td1)

        # Temporal processing: transpose to (batch, nodes, seq, features) so each node has a sequence
        lstm_input = Lambda(lambda x: tf.transpose(x, perm=[0, 2, 1, 3]), name="transpose_for_lstm")(td2)

        # Apply same LSTM for each node (TimeDistributed wraps LSTM which sees (seq, features) per node)
        lstm_out = TimeDistributed(LSTM(32, activation='tanh', dropout=0.2, return_sequences=False),
                                   name="per_node_lstm")(lstm_input)

        # Output: per-node probability
        outputs = TimeDistributed(Dense(1, activation='sigmoid'), name="node_output")(lstm_out)

        model = Model(inputs=[X_in, A_in], outputs=outputs, name="STGNN_like")
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        return model

    def train_model(self, X_data=None, y_data=None, adj_matrix=None, epochs=30, batch_size=32):
        if X_data is None or y_data is None or adj_matrix is None:
            X_data, y_data, adj_matrix = self.generate_graph_dataset()

        # Scale features
        print("Scaling features...")
        self.scaler = StandardScaler()
        X_data_reshaped = X_data.reshape(-1, self.n_features)
        self.scaler.fit(X_data_reshaped)
        X_data_scaled = self.scaler.transform(X_data_reshaped).reshape(X_data.shape)

        print(f"🔄 Creating sequences of length {self.sequence_length}...")
        X_seq, y_seq = self.create_sequences(X_data_scaled, y_data)
        y_seq = y_seq.reshape(y_seq.shape[0], y_seq.shape[1], 1)  # (samples, nodes, 1)

        print("🧠 Building model...")
        self.model = self.build_model()
        self.model.summary()

        # Prepare adjacency batch (same adj for all samples)
        A_batch = np.array([adj_matrix] * X_seq.shape[0])

        # Split (note: shuffle=False to respect time ordering)
        X_train, X_test, A_train_s, A_test_s, y_train, y_test = train_test_split(
            X_seq, A_batch, y_seq, test_size=0.2, shuffle=False
        )

        early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

        print("🔧 Training STGNN-like model...")
        self.model.fit(
            [X_train, A_train_s], y_train,
            epochs=epochs,
            batch_size=batch_size,
            validation_data=([X_test, A_test_s], y_test),
            callbacks=[early_stopping],
            verbose=1
        )

        print("✅ STGNN-like Model Trained.")
        self.is_trained = True
        return self

    # -------------------- Persistence --------------------
    def save_model(self):
        if self.is_trained and self.model is not None:
            os.makedirs(self.model_dir, exist_ok=True)
            print(f"💾 Saving model to {self.model_path} ...")
            try:
                # Keras native save
                self.model.save(self.model_path)
                # Save scaler
                joblib.dump(self.scaler, self.scaler_path)
                print("✅ Model and scaler saved.")
            except Exception as e:
                print(f"⚠ Error saving model: {e}")
        else:
            print("⚠ Cannot save, model is not trained or missing.")

    def load_model(self):
        try:
            if os.path.exists(self.model_path) and os.path.exists(self.scaler_path):
                print(f"Loading model from {self.model_path} ...")
                self.model = keras.models.load_model(self.model_path, compile=True)
                self.scaler = joblib.load(self.scaler_path)
                self.is_trained = True
                print("✅ Model and scaler loaded.")
            else:
                raise FileNotFoundError("Model or scaler file not found.")
        except Exception as e:
            print(f"ℹ No model files found or error loading: {e}. Please train a new model.")
            self.model = None
            self.scaler = None
            self.is_trained = False

    # -------------------- Prediction --------------------
    def predict(self, history_data_all_nodes=None):
        if not self.is_trained or self.model is None:
            raise ValueError("❌ Model not trained yet! Please run train_model() or load_model().")

        if (not isinstance(history_data_all_nodes, np.ndarray) or
            history_data_all_nodes.shape != (self.sequence_length, self.num_nodes, self.n_features)):
            raise ValueError(
                f"❌ 'history_data_all_nodes' must be a NumPy array with shape "
                f"({self.sequence_length}, {self.num_nodes}, {self.n_features})"
            )

        # Scale the input
        X_scaled_reshaped = self.scaler.transform(history_data_all_nodes.reshape(-1, self.n_features))
        X_scaled = X_scaled_reshaped.reshape(self.sequence_length, self.num_nodes, self.n_features)

        # Batch dimension
        X_batch = np.expand_dims(X_scaled, axis=0)  # (1, seq_len, nodes, features)
        A_batch = np.expand_dims(self.adjacency_matrix, axis=0)  # (1, nodes, nodes)

        probs = self.model.predict([X_batch, A_batch], verbose=0)[0]  # (nodes, 1)
        results = {}
        for n in range(self.num_nodes):
            prob = float(probs[n, 0])
            location_name = self.sensor_nodes[n]

            pred = "FLOOD WARNING" if prob > 0.5 else "NORMAL CONDITIONS"
            if prob >= 0.85:
                risk, rec = "🚨 CRITICAL", "IMMEDIATE EVACUATION may be required!"
            elif prob >= 0.65:
                risk, rec = "🔴 HIGH", "Prepare for evacuation."
            elif prob >= 0.45:
                risk, rec = "🟠 MEDIUM", "Monitor closely."
            elif prob >= 0.25:
                risk, rec = "🟡 LOW", "Stay alert."
            else:
                risk, rec = "🟢 SAFE", "Conditions normal."

            self.log_prediction(location_name, prob, risk, rec)
            results[location_name] = {
                "prediction": pred,
                "probability": round(prob, 3),
                "risk_level": risk,
                "recommendation": rec
            }
        return results

    # -------------------- Real-time simulation --------------------
    def simulate_real_time_feed(self, sim_hours=50):
        print("\n\n--- 🔴 STARTING REAL-TIME GRAPH (STGNN-like) SIMULATION ---")
        X_data, _, _ = self.generate_graph_dataset(n_hours=self.sequence_length + sim_hours)

        history = deque(maxlen=self.sequence_length)
        print(f"Bootstrapping model with first {self.sequence_length} hours of data...")
        for i in range(self.sequence_length):
            history.append(X_data[i])

        print("⚡ Simulation LIVE. Predicting hour-by-hour...\n")
        for i in range(self.sequence_length, self.sequence_length + sim_hours):
            new_graph_snapshot = X_data[i]  # (nodes, features)
            history.append(new_graph_snapshot)
            current_history_array = np.array(history)  # (seq_len, nodes, features)
            results = self.predict(history_data_all_nodes=current_history_array)

            print(f"--- HOUR {i} ---")
            for node_name, result in results.items():
                print(f"  📍 {node_name:<25} -> {result['risk_level']:<10} (Prob: {result['probability']})")
            print("-" * 60)

# -------------------- RUN DEMO --------------------
if __name__ == '__main__':
    predictor = GraphFloodPredictor()

    # Try to load a pre-trained model
    predictor.load_model()

    if not predictor.is_trained:
        print("\n--- No saved STGNN model found or couldn't load it; training new system... ---")
        predictor.train_model(epochs=15)  # shorten epochs for demo; raise for real training
        predictor.save_model()
        print("-----------------------------------------------------------\n")

    # Run a short real-time simulation (adjust sim_hours as needed)
    predictor.simulate_real_time_feed(sim_hours=10)



ℹ No model files found or error loading: Model or scaler file not found.. Please train a new model.

--- No saved STGNN model found or couldn't load it; training new system... ---
🧬 Generating 10000 hours of synthetic graph data for 3 nodes...
Simulating downstream hydrological flow...


AttributeError: 'int' object has no attribute 'fillna'

In [2]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam

class GraphFloodPredictor:
    def __init__(self, model_dir="model_simple", sequence_length=72):
        self.model_dir = model_dir
        self.sequence_length = sequence_length
        self.model = None
        self.scaler = StandardScaler()
        self.is_trained = False

    def generate_dataset(self, n_hours=5000):
        print("🌧 Generating synthetic rainfall and water level data...")
        rng = np.random.default_rng(42)
        rainfall = rng.uniform(0, 30, n_hours)
        discharge = rainfall * 2 + rng.normal(0, 1, n_hours)
        water_level = 1.5 + discharge * 0.05 + rng.normal(0, 0.2, n_hours)

        data = np.stack([rainfall, discharge, water_level], axis=1)
        df = pd.DataFrame(data, columns=["rainfall", "discharge", "water_level"])
        flood_flag = (df["water_level"] > (df["water_level"].mean() + 2 * df["water_level"].std())).astype(int)

        return df.values, flood_flag.values

    def create_sequence_data(self, data, labels):
        X, y = [], []
        for i in range(len(data) - self.sequence_length):
            X.append(data[i:i+self.sequence_length])
            y.append(labels[i+self.sequence_length])
        return np.array(X), np.array(y)

    def build_model(self):
        model = Sequential([
            LSTM(32, return_sequences=True, input_shape=(self.sequence_length, 3)),
            LSTM(16),
            Dense(1, activation='sigmoid')
        ])
        model.compile(optimizer=Adam(0.001), loss='binary_crossentropy', metrics=['accuracy'])
        return model

    def train_model(self, epochs=8, batch_size=32):
        data, labels = self.generate_dataset()
        scaled_data = self.scaler.fit_transform(data)

        X, y = self.create_sequence_data(scaled_data, labels)

        self.model = self.build_model()

        es = EarlyStopping(monitor='loss', patience=2, restore_best_weights=True)
        print("🚀 Training model...")
        self.model.fit(X, y, epochs=epochs, batch_size=batch_size, verbose=1, callbacks=[es])
        self.is_trained = True
        print("✅ Training complete!")

    def save_model(self):
        os.makedirs(self.model_dir, exist_ok=True)
        self.model.save(os.path.join(self.model_dir, "simple_model.keras"))
        np.save(os.path.join(self.model_dir, "scaler.npy"), self.scaler.mean_)
        np.save(os.path.join(self.model_dir, "scale.npy"), self.scaler.scale_)
        print("📦 Model and scaler saved!")

    def load_model(self):
        try:
            self.model = load_model(os.path.join(self.model_dir, "simple_model.keras"))
            self.scaler.mean_ = np.load(os.path.join(self.model_dir, "scaler.npy"))
            self.scaler.scale_ = np.load(os.path.join(self.model_dir, "scale.npy"))
            self.is_trained = True
            print("✅ Model loaded successfully!")
        except:
            print("ℹ No saved model found. Will train a new one.")
            self.is_trained = False

    def predict_realtime(self, hours=50):
        print(f"\n📡 Simulating {hours} hours of real-time forecasting...")
        rng = np.random.default_rng()
        window = rng.uniform(0,1,(self.sequence_length,3))
        flood_alert = []

        for h in range(hours):
            sample = window.reshape(1,self.sequence_length,3)
            p = self.model.predict(sample, verbose=0)[0][0]
            flood_alert.append(int(p>0.5))
            new_data = rng.uniform(0,1,3)
            window = np.vstack([window[1:], new_data])

        print("🌊 Flood alerts:", flood_alert[-10:])
        print("✅ Real-time simulation done!")

# -------------------- RUN SCRIPT --------------------
predictor = GraphFloodPredictor()
predictor.load_model()

if not predictor.is_trained:
    print("\n--- Training new model... ---")
    predictor.train_model(epochs=8)
    predictor.save_model()

predictor.predict_realtime(hours=30)


ℹ No saved model found. Will train a new one.

--- Training new model... ---
🌧 Generating synthetic rainfall and water level data...

🚀 Training model...
Epoch 1/8


Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
✅ Training complete!
📦 Model and scaler saved!

📡 Simulating 30 hours of real-time forecasting...
🌊 Flood alerts: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
✅ Real-time simulation done!
