In [9]:
import pandas as pd
from statsmodels.stats.outliers_influence import variance_inflation_factor

# Load your dataset (replace 'your_data.csv' with the actual file path)
# Example for CSV; adjust if your data is in another format (e.g., Excel, SQL, etc.)
df = pd.read_csv(r"C:\Users\hp\Documents\code for internship 3rd year\CLEANED EEP DATASET 2017\data last\Amerti Neshi.csv")  # Update with your file path
# Print column names and their data types to diagnose issues
print("Column names and data types:")
print(df.dtypes)

# Ensure 'Date_GC' is parsed as datetime
df['Date_GC'] = pd.to_datetime(df['Date_GC'], errors='coerce')

# Columns to clean for numeric conversion
numeric_cols = ['U1_pr', 'U2_pr', 'Max_ALoad', 'Min_ALoad', 'Auxiliary', 'Daily discharge', 'Actuall energy']

# Clean numeric columns: apply .str.replace only to string columns
for col in numeric_cols:
    if df[col].dtype == 'object':  # Check if column is string (object)
        df[col] = pd.to_numeric(df[col].str.replace(',', ''), errors='coerce')
    else:
        # If already numeric, ensure it's float
        df[col] = pd.to_numeric(df[col], errors='coerce')

# Create lagged columns
df['PRECTOTCORR_lag110'] = df['PRECTOTCORR'].shift(110)  # Lag by 110 days
df['T2M_lag60'] = df['T2M'].shift(60)  # Lag by 60 days
df['ALLSKY_SFC_SW_DWN_lag90'] = df['ALLSKY_SFC_SW_DWN'].shift(90)  # Lag by 90 days
df['water_level_last_year'] = df['Water_Level'].shift(365)  # Lag by 365 days (1 year)

# Print column names to verify
print("\nColumns in DataFrame after cleaning and adding lagged columns:", df.columns.tolist())

# Define numerical columns for VIF calculation
numerical_columns = [
    'Water_Level', 'T2M', 'PRECTOTCORR', 'ALLSKY_SFC_SW_DWN', 
    'RH2M', 'WS2M', 'PRECTOTCORR_lag110', 'T2M_lag60', 
    'ALLSKY_SFC_SW_DWN_lag90', 'water_level_last_year',
    'U1_pr', 'U2_pr', 'Max_ALoad', 'Min_ALoad', 'Auxiliary', 
    'Daily discharge', 'Actuall energy'
]

# Check if all specified columns exist and are numeric
missing_columns = [col for col in numerical_columns if col not in df.columns]
if missing_columns:
    print(f"Error: The following columns are missing in the DataFrame: {missing_columns}")
    print("Please check the column names or update the 'numerical_columns' list.")
else:
    # Verify that all columns are numeric
    non_numeric_cols = [col for col in numerical_columns if not pd.api.types.is_numeric_dtype(df[col])]
    if non_numeric_cols:
        print(f"Error: The following columns are not numeric: {non_numeric_cols}")
    else:
        # Create a DataFrame with only numerical columns and drop rows with any missing values
        df_vif = df[numerical_columns].dropna()

        # Calculate VIF for each feature
        vif_data = pd.DataFrame()
        vif_data['Feature'] = numerical_columns
        vif_data['VIF'] = [variance_inflation_factor(df_vif.values, i) for i in range(df_vif.shape[1])]

        # Display VIF results
        print("\nVIF Results:")
        print(vif_data)

        # Interpretation of VIF values
        print("\nVIF Interpretation:")
        print("VIF < 5: Low multicollinearity")
        print("5 ≤ VIF < 10: Moderate multicollinearity")
        print("VIF ≥ 10: High multicollinearity")

Column names and data types:
Date_GC               object
Date_EC               object
U1_pr                 object
U2_pr                 object
Max_ALoad            float64
Min_ALoad            float64
Auxiliary            float64
Water_Level          float64
T2M                  float64
PRECTOTCORR          float64
Daily discharge       object
Actuall energy       float64
ALLSKY_SFC_SW_DWN    float64
RH2M                 float64
WS2M                 float64
dtype: object

Columns in DataFrame after cleaning and adding lagged columns: ['Date_GC', 'Date_EC', 'U1_pr', 'U2_pr', 'Max_ALoad', 'Min_ALoad', 'Auxiliary', 'Water_Level', 'T2M', 'PRECTOTCORR', 'Daily discharge', 'Actuall energy', 'ALLSKY_SFC_SW_DWN', 'RH2M', 'WS2M', 'PRECTOTCORR_lag110', 'T2M_lag60', 'ALLSKY_SFC_SW_DWN_lag90', 'water_level_last_year']

VIF Results:
                    Feature           VIF
0               Water_Level  7.317582e+05
1                       T2M  1.540211e+02
2               PRECTOTCORR  2.284075e+0

In [60]:
import pandas as pd
import numpy as np
import os

# --- Data Source Configurations ---
POWER_PLANT_DATA = {
    
    'Amerti': {
        'path': r"C:\Users\hp\Documents\code for internship 3rd year\CLEANED EEP DATASET 2017\data last\Amerti Neshi.csv",
        'target_vars': ['water_level', 'total-pr', 'max_load', 'min_load'],
        'date_col': 'Date_GC',
        'feature_cols': ['T2M', 'PRECTOTCORR', 'RH2M', 'PS', 'WS10M', 
                         'water level_last_year', 'total-pr_last_year', 
                         'max load_last_year', 'min load_last_year', 
                         'monthly_evap', 'seasonal_humidity']
    }
    # Add more plants if needed
}

# --- Data Preprocessing ---
def preprocess_data(df, date_col):
    if df.empty or date_col not in df.columns:
        return df
    df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
    df = df.dropna(subset=[date_col]).sort_values(date_col)
    for col in df.columns:
        if col != date_col:
            if pd.api.types.is_numeric_dtype(df[col]):
                df[col] = df[col].fillna(df[col].median())
            else:
                df[col] = df[col].fillna(df[col].mode()[0] if not df[col].mode().empty else 'Unknown')
    if len(df) < 2:
        return pd.DataFrame()
    return df

def load_default_data(power_plant):
    if power_plant not in POWER_PLANT_DATA:
        return pd.DataFrame(), None, f"Power plant {power_plant} not found."
    config = POWER_PLANT_DATA[power_plant]
    file_path, date_col = config['path'], config['date_col']
    if not os.path.exists(file_path):
        return pd.DataFrame(), None, f"File not found for {power_plant} at {file_path}."
    try:
        df = pd.read_csv(file_path)
        df_filtered = df[df['Power_Plant_Name'] == power_plant].copy() if 'Power_Plant_Name' in df.columns else df.copy()
        df_filtered = preprocess_data(df_filtered, date_col)
        if df_filtered.empty:
            return pd.DataFrame(), None, f"No valid data after preprocessing for {power_plant}."
        return df_filtered, date_col, None
    except Exception as e:
        return pd.DataFrame(), None, f"Error loading data for {power_plant}: {e}"

# --- Function to Analyze Numeric Relationships Only ---
def analyze_variable_relationships(power_plant='Gibe 1'):
    # Load data
    df, date_col, error = load_default_data(power_plant)
    if df.empty:
        return f"Error: {error}"
    
    # Identify Water_Level column (case-insensitive)
    water_level_col = next((col for col in df.columns 
                            if 'water_level' in col.lower() and pd.api.types.is_numeric_dtype(df[col])), None)
    if not water_level_col:
        return f"No water level column found in {power_plant} dataset."
    
    # Add lag features if enough rows
    if len(df) > 110:
        t2m_cols = [col for col in df.columns if 't2m' in col.lower() and pd.api.types.is_numeric_dtype(df[col])]
        if t2m_cols:
            df['T2M_lag60'] = df[t2m_cols[0]].shift(60)
        precip_cols = [col for col in df.columns if 'prectotcorr' in col.lower() and pd.api.types.is_numeric_dtype(df[col])]
        if precip_cols:
            df['PRECTOTCORR_lag110'] = df[precip_cols[0]].shift(110)
        sw_cols = [col for col in df.columns if 'allsky_sfc_sw_dwn' in col.lower() and pd.api.types.is_numeric_dtype(df[col])]
        if sw_cols:
            df['ALLSKY_SFC_SW_DWN60'] = df[sw_cols[0]].shift(90)
    
    # Get numeric columns (excluding date + water level itself)
    numeric_cols = [col for col in df.columns if pd.api.types.is_numeric_dtype(df[col]) and col not in [date_col, water_level_col]]
    if not numeric_cols:
        return f"No numeric columns available to analyze relationships with {water_level_col}."
    
    # Compute correlations
    correlations = df[numeric_cols + [water_level_col]].corr()[water_level_col].drop(water_level_col)
    corr_df = pd.DataFrame({
        'Variable': correlations.index,
        'Correlation_with_Water_Level': correlations.values
    }).sort_values(by='Correlation_with_Water_Level', ascending=False)
    
    return corr_df

# --- Run Example ---
if __name__ == '__main__':
    results = analyze_variable_relationships('Amerti')
    if isinstance(results, str):
        print(results)
    else:
        print("\nCorrelation with Water_Level:")
        print(results.to_string(index=False))



Correlation with Water_Level:
           Variable  Correlation_with_Water_Level
          Min_ALoad                      0.454066
 PRECTOTCORR_lag110                      0.302771
     Actuall energy                      0.277016
          Max_ALoad                      0.141454
  ALLSKY_SFC_SW_DWN                      0.045445
               RH2M                      0.012227
                T2M                     -0.056987
               WS2M                     -0.062228
          Auxiliary                     -0.066924
        PRECTOTCORR                     -0.135443
          T2M_lag60                     -0.230205
ALLSKY_SFC_SW_DWN60                     -0.334184


In [1]:

from jupyter_dash import JupyterDash
import dash
from dash import dcc, html, Input, Output, State, dash_table
import dash_bootstrap_components as dbc
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from xgboost import XGBRegressor, XGBClassifier
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.svm import SVR, SVC
from sklearn.neighbors import KNeighborsRegressor, KNeighborsClassifier
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split
from datetime import datetime
import warnings
import base64
import io
import json
import os
from reportlab.lib.pagesizes import letter
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.platypus import SimpleDocTemplate, Paragraph, Table, TableStyle, Spacer
from reportlab.lib import colors

warnings.filterwarnings('ignore')

# --- AI Model Placeholders ---
def get_gemini_response(prompt, context_data=None):
    return "AI response functionality is currently a placeholder."

def get_chatgpt_response(prompt, context_data=None):
    return "AI response functionality is currently a placeholder."

AI_MODELS = {
    'google_gemini': {'name': 'Google Gemini', 'function': get_gemini_response},
    'chat_gpt': {'name': 'ChatGPT (OpenAI)', 'function': get_chatgpt_response}
}

def query_ai_model(prompt, model_id='google_gemini', context_data=None):
    return AI_MODELS.get(model_id, {}).get('function', lambda p, c: "Invalid AI model selected.")(prompt, context_data)

# --- Data Source Configurations ---
POWER_PLANT_DATA = {
    'Gibe 1': {
        'path': r"C:\Users\hp\Documents\code for internship 3rd year\CLEANED EEP DATASET 2017\data last\Gibe1.csv",
        'target_vars': ['Water_Level', 'Total_pr', 'Max_ALoad', 'Min_ALoad'],
        'date_col': 'Date_GC',
        'feature_cols': ['T2M', 'PRECTOTCORR', 'RH2M', 'PS', 'WS10M', 'Water_Level_last_year', 'Total_pr_last_year', 'Max_ALoad_last_year', 'Min_ALoad_last_year', 'monthly_precip_avg', 'seasonal_temp_avg']
    },
    'Amerti': {
        'path': r"C:\Users\hp\Documents\code for internship 3rd year\CLEANED EEP DATASET 2017\data last\Amerti Neshi.csv",
        'target_vars': ['water_level', 'total-pr', 'max_load', 'min_load'],
        'date_col': 'Date_GC',
        'feature_cols': ['T2M', 'PRECTOTCORR', 'RH2M', 'PS', 'WS10M', 'water level_last_year', 'total-pr_last_year', 'max load_last_year', 'min load_last_year', 'monthly_evap', 'seasonal_humidity']
    },
    'GERD': {
        'path': r"C:\Users\hp\Documents\code for internship 3rd year\CLEANED EEP DATASET 2017\data last\GERD.csv",
        'target_vars': ['water_level', 'total-pr', 'max_load', 'min_load'],
        'date_col': 'Date_GC',
        'feature_cols': ['T2M', 'PRECTOTCORR', 'RH2M', 'PS', 'WS10M', 'water_level_last_year', 'total-pr_last_year', 'max_load_last_year', 'min_load_last_year', 'solar_radiation', 'cloud_cover_index']
    },
    'Gibe3': {
        'path': r"C:\Users\hp\Documents\code for internship 3rd year\CLEANED EEP DATASET 2017\data last\Gibe3.csv",
        'target_vars': ['Water_Level', 'total-pr', 'max load', 'min load'],
        'date_col': 'Date_GC',
        'feature_cols': ['T2M', 'PRECTOTCORR', 'RH2M', 'PS', 'WS10M', 'water level_last_year', 'total-pr_last_year', 'max load_last_year', 'min load_last_year', 'dew_point', 'evapotranspiration']
    },
    'Finchaa': {
        'path': r"C:\Users\hp\Documents\code for internship 3rd year\CLEANED EEP DATASET 2017\data last\fincha.csv",
        'target_vars': ['Water_Level', 'total-pr', 'max load', 'min load'],
        'date_col': 'Date_GC',
        'feature_cols': ['T2M', 'PRECTOTCORR', 'RH2M', 'PS', 'WS10M', 'water level_last_year', 'total-pr_last_year', 'max load_last_year', 'min load_last_year', 'temp_range', 'rain_days_count']
    },
    'Gibe 2': {
        'path': r"C:\Users\hp\Documents\code for internship 3rd year\CLEANED EEP DATASET 2017\data last\Gibe2.csv",
        'target_vars': ['water_level', 'total-pr', 'max_load', 'min_load'],
        'date_col': 'Date_GC',
        'feature_cols': ['T2M', 'PRECTOTCORR', 'RH2M', 'PS', 'WS10M', 'water level_last_year', 'total-pr_last_year', 'max load_last_year', 'min load_last_year', 'wind_direction', 'soil_moisture']
    },
    'Koka': {
        'path': r"C:\Users\hp\Documents\code for internship 3rd year\CLEANED EEP DATASET 2017\data last\Koka Plant.csv",
        'target_vars': ['water_level', 'total-pr', 'max_load', 'min_load'],
        'date_col': 'Date_GC',
        'feature_cols': ['T2M', 'PRECTOTCORR', 'RH2M', 'PS', 'WS10M', 'water level_last_year', 'total-pr_last_year', 'max load_last_year', 'min load_last_year', 'humidity_index', 'pressure_trend']
    },
    'Tana Beles': {
        'path': r"C:\Users\hp\Documents\code for internship 3rd year\CLEANED EEP DATASET 2017\data last\Tana_Beles.csv",
        'target_vars': ['water_level', 'total-pr', 'max load', 'min load'],
        'date_col': 'Date_GC',
        'feature_cols': ['T2M', 'PRECTOTCORR', 'RH2M', 'PS', 'WS10M', 'water level_last_year', 'total-pr_last_year', 'max load_last_year', 'min load_last_year', 'reservoir_inflow', 'upstream_level']
    },
    'Tekeze': {
        'path': r"C:\Users\hp\Documents\code for internship 3rd year\CLEANED EEP DATASET 2017\data last\Tekeze.csv",
        'target_vars': ['water_level', 'total-pr', 'max load', 'min load'],
        'date_col': 'Date_GC',
        'feature_cols': ['T2M', 'PRECTOTCORR', 'RH2M', 'PS', 'WS10M', 'water level_last_year', 'total-pr_last_year', 'max load_last_year', 'min_load_last_year', 'snow_melt_rate', 'glacier_index']
    },
    'Melka Wakena': {
        'path': r"C:\Users\hp\Documents\code for internship 3rd year\CLEANED EEP DATASET 2017\data last\wakena.csv",
        'target_vars': ['water_level', 'total-pr', 'max_load', 'min_load'],
        'date_col': 'Date_GC',
        'feature_cols': ['T2M', 'PRECTOTCORR', 'RH2M', 'PS', 'WS10M', 'water level_last_year', 'total-pr_last_year', 'max load_last_year', 'min load_last_year', 'sediment_level', 'turbidity_index']
    },
    'Awash2': {
        'path': r"C:\Users\hp\Documents\code for internship 3rd year\CLEANED EEP DATASET 2017\data last\Awash2.csv",
        'target_vars': ['water_level', 'total-pr', 'max load', 'min load'],
        'date_col': 'Date_GC',
        'feature_cols': ['T2M', 'PRECTOTCORR', 'RH2M', 'PS', 'WS10M', 'water level_last_year', 'total-pr_last_year', 'max load_last_year', 'min load_last_year', 'sediment_level', 'turbidity_index']
    },
    'Awash3': {
        'path': r"C:\Users\hp\Documents\code for internship 3rd year\CLEANED EEP DATASET 2017\data last\Awash3.csv",
        'target_vars': ['water_level', 'total-pr', 'max load', 'min load'],
        'date_col': 'Date_GC',
        'feature_cols': ['T2M', 'PRECTOTCORR', 'RH2M', 'PS', 'WS10M', 'water level_last_year', 'total-pr_last_year', 'max load_last_year', 'min load_last_year', 'sediment_level', 'turbidity_index']
    },
    'Genale': {
        'path': r"C:\Users\hp\Documents\code for internship 3rd year\CLEANED EEP DATASET 2017\data last\Genale .csv",
        'target_vars': ['water_level', 'total-pr', 'max load', 'min load'],
        'date_col': 'Date_GC',
        'feature_cols': ['T2M', 'PRECTOTCORR', 'RH2M', 'PS', 'WS2M', 'water level_last_year', 'total-pr_last_year', 'max load_last_year', 'min load_last_year', 'sediment_level', 'turbidity_index']
    }
}

DATE_COL_PREFIXES = ['Date', 'Record_Date', 'Time', 'Date GC', 'Date EC']
FEATURE_COL_PREFIXES = ['T2M', 'PRECTOT', 'Temp', 'Rain', 'Wind', 'Humidity', 'Pressure']
EXCLUDE_COLS = ['ID', 'Index', 'Key', 'Power_Plant_Name']
ALLOWED_TARGET_KEYWORDS = ['level', 'pr', 'energy', 'discharge', 'auxiliary', 'load', 'u1', 'u2', 'u3', 'u4', 'u5', 'u6']

data_cache = {
    'default': {'source': 'default', 'preprocessed_dfs': {}, 'power_plants': list(POWER_PLANT_DATA.keys()), 'date_col': None, 'target_col': None, 'task_type': None},
    'uploaded': {'source': None, 'type': None, 'df': None, 'preprocessed_dfs': {}, 'power_plants': [], 'date_col': None, 'target_col': None, 'task_type': None, 'pending_file': None, 'pending_filename': None}
}

# --- Data Processing Functions ---
def preprocess_data(df, date_col):
    if df.empty or date_col not in df.columns:
        return df
    df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
    df = df.dropna(subset=[date_col]).sort_values(date_col)
    
    # Handle duplicate dates by aggregating numeric columns with mean
    if df[date_col].duplicated().any():
        df = df.groupby(date_col).mean(numeric_only=True).reset_index()
    
    for col in df.columns:
        if col != date_col:
            if pd.api.types.is_numeric_dtype(df[col]):
                df[col] = df[col].fillna(df[col].median())
            else:
                df[col] = df[col].fillna(df[col].mode()[0] if not df[col].mode().empty else 'Unknown')
    
    if len(df) < 2:
        return pd.DataFrame()
    
    # Reset index to ensure uniqueness
    df = df.reset_index(drop=True)
    return df

def load_default_data(power_plant):
    if power_plant not in POWER_PLANT_DATA:
        return pd.DataFrame(), None, f"Power plant {power_plant} not found."
    config = POWER_PLANT_DATA[power_plant]
    file_path, date_col = config['path'], config['date_col']
    if not os.path.exists(file_path):
        return pd.DataFrame(), None, f"File not found for {power_plant} at {file_path}."
    try:
        if power_plant in data_cache['default']['preprocessed_dfs']:
            return data_cache['default']['preprocessed_dfs'][power_plant], date_col, None
        df = pd.read_csv(file_path)
        df_filtered = df[df['Power_Plant_Name'] == power_plant].copy() if 'Power_Plant_Name' in df.columns else df.copy()
        df_filtered = preprocess_data(df_filtered, date_col)
        if df_filtered.empty:
            return pd.DataFrame(), None, f"No valid data after preprocessing for {power_plant}."
        data_cache['default']['preprocessed_dfs'][power_plant] = df_filtered
        data_cache['default']['date_col'] = date_col
        return df_filtered, date_col, None
    except Exception as e:
        return pd.DataFrame(), None, f"Error loading data for {power_plant}: {e}"

def parse_uploaded_file(contents, filename, power_plant):
    content_type, content_string = contents.split(',')
    decoded = base64.b64decode(content_string)
    try:
        if filename.endswith('.csv'):
            df = pd.read_csv(io.StringIO(decoded.decode('utf-8')))
        elif filename.endswith('.parquet'):
            df = pd.read_parquet(io.BytesIO(decoded))
        elif filename.endswith(('.xlsx', '.xls')):
            df = pd.read_excel(io.BytesIO(decoded))
        elif filename.endswith('.json'):
            df = pd.read_json(io.StringIO(decoded.decode('utf-8')))
        else:
            return None, None, [], None, f"Unsupported file format: {filename}."
        power_plants = df['Power_Plant_Name'].unique().tolist() if 'Power_Plant_Name' in df.columns else [power_plant or 'Uploaded Data']
        date_col = next((col for col in df.columns if any(p.lower() in col.lower() for p in DATE_COL_PREFIXES)), None)
        if not date_col:
            return None, None, [], None, "No valid date column found."
        df = preprocess_data(df, date_col)
        if df.empty:
            return None, None, [], None, "No valid data after preprocessing."
        for plant in power_plants:
            data_cache['uploaded']['preprocessed_dfs'][plant] = df[df['Power_Plant_Name'] == plant].copy() if 'Power_Plant_Name' in df.columns else df.copy()
        return filename.split('.')[-1], df, power_plants, date_col, None
    except Exception as e:
        return None, None, [], None, f"Error processing {filename}: {e}"

def load_plant_data(power_plant, source='default'):
    cache = data_cache[source]
    if power_plant in cache['preprocessed_dfs']:
        return cache['preprocessed_dfs'][power_plant], cache['date_col'], None
    if source == 'default':
        return load_default_data(power_plant)
    return pd.DataFrame(), None, f"No data available for {power_plant}."

def get_available_columns(df, date_col, power_plant, source='default'):
    if df.empty or date_col is None:
        return [], [], None, None
    
    # Ensure unique index before processing
    if df.index.duplicated().any():
        df = df.reset_index(drop=True)
    
    # Initialize feature columns
    if source == 'default' and power_plant in POWER_PLANT_DATA:
        # Start with predefined feature columns, excluding PRECTOTCORR, T2M, and ALLSKY_SFC_SW_DWN
        feature_cols = [col for col in POWER_PLANT_DATA[power_plant]['feature_cols'] 
                        if col in df.columns 
                        and pd.api.types.is_numeric_dtype(df[col]) 
                        and col not in ['PRECTOTCORR', 'T2M', 'ALLSKY_SFC_SW_DWN']]
    else:
        # Identify feature columns based on prefixes, excluding PRECTOTCORR, T2M, and ALLSKY_SFC_SW_DWN
        feature_cols = [col for col in df.columns 
                        if any(p.lower() in col.lower() for p in FEATURE_COL_PREFIXES) 
                        and pd.api.types.is_numeric_dtype(df[col]) 
                        and col not in ['PRECTOTCORR', 'T2M', 'ALLSKY_SFC_SW_DWN']]
    
    # Add lag features if columns exist and data is sufficient
    if len(df) > 110:  # Ensure enough data for the longest lag (110 days)
        # Add 110-day lag for PRECTOTCORR
        precip_cols = [col for col in df.columns if 'prectotcorr' in col.lower() and pd.api.types.is_numeric_dtype(df[col])]
        if precip_cols:
            df['PRECTOTCORR_lag110'] = df[precip_cols[0]].shift(110)
            if pd.api.types.is_numeric_dtype(df['PRECTOTCORR_lag110']):
                feature_cols.append('PRECTOTCORR_lag110')
        
        # Add 60-day lag for T2M
        if len(df) > 60:  # Check for T2M lag
            t2m_cols = [col for col in df.columns if 't2m' in col.lower() and pd.api.types.is_numeric_dtype(df[col])]
            if t2m_cols:
                df['T2M_lag60'] = df[t2m_cols[0]].shift(60)
                if pd.api.types.is_numeric_dtype(df['T2M_lag60']):
                    feature_cols.append('T2M_lag60')
        
        # Add 90-day lag for ALLSKY_SFC_SW_DWN
        if len(df) > 90:  # Check for ALLSKY lag
            allsky_cols = [col for col in df.columns if 'allsky_sfc_sw_dwn' in col.lower() and pd.api.types.is_numeric_dtype(df[col])]
            if allsky_cols:
                df['ALLSKY_SFC_SW_DWN_lag90'] = df[allsky_cols[0]].shift(90)
                if pd.api.types.is_numeric_dtype(df['ALLSKY_SFC_SW_DWN_lag90']):
                    feature_cols.append('ALLSKY_SFC_SW_DWN_lag90')
    
    # Add water_level_last_year if water_level or similar column exists
    water_level_cols = [col for col in df.columns if 'water_level' in col.lower() and pd.api.types.is_numeric_dtype(df[col])]
    if water_level_cols and len(df) > 365:  # Ensure enough data for yearly lag
        water_level_col = water_level_cols[0]  # Use the first matching water level column
        df['water_level_last_year'] = df[water_level_col].shift(365)
        if pd.api.types.is_numeric_dtype(df['water_level_last_year']):
            feature_cols.append('water_level_last_year')
    
    # Identify potential target columns
    potential_targets = [col for col in df.columns if col not in [date_col] + feature_cols + EXCLUDE_COLS and 'date ec' not in col.lower() and any(k in col.lower() for k in ALLOWED_TARGET_KEYWORDS)]
    numeric_targets = [col for col in potential_targets if pd.api.types.is_numeric_dtype(df[col])]
    categorical_targets = [col for col in potential_targets if not pd.api.types.is_numeric_dtype(df[col])]
    target_options = numeric_targets + categorical_targets
    
    # Select default target column
    if source == 'default' and power_plant in POWER_PLANT_DATA:
        target_cols = [col for col in POWER_PLANT_DATA[power_plant]['target_vars'] if col in target_options]
        target_col = target_cols[0] if target_cols else (numeric_targets[0] if numeric_targets else (categorical_targets[0] if categorical_targets else None))
    else:
        target_col = numeric_targets[0] if numeric_targets else (categorical_targets[0] if categorical_targets else None)
    
    # Determine task type
    task_type = 'regression' if target_col in numeric_targets else ('classification' if target_col in categorical_targets else None)
    
    return feature_cols, target_options, target_col, task_type

def create_features(df_input, date_col):
    df = df_input.copy()
    # Ensure unique index before feature creation
    if df.index.duplicated().any():
        df = df.reset_index(drop=True)
    df['year'] = df[date_col].dt.year
    df['month'] = df[date_col].dt.month
    df['day'] = df[date_col].dt.day
    df['dayofyear'] = df[date_col].dt.dayofyear
    df['weekday'] = df[date_col].dt.weekday
    return df

def evaluate_regression_models(X_train, X_test, y_train, y_test):
    models = {
        'Decision Tree': DecisionTreeRegressor(max_depth=5, random_state=42),
        'SVR': SVR(kernel='rbf', C=1.0),
        'KNN': KNeighborsRegressor(n_neighbors=5),
        'XGBoost': XGBRegressor(n_estimators=30, learning_rate=0.1, max_depth=5, n_jobs=-1, random_state=42),
        'Random Forest': RandomForestRegressor(n_estimators=30, max_depth=10, n_jobs=-1, random_state=42),
        'Linear Regression': LinearRegression(),
    }
    results = []
    for name, model in models.items():
        try:
            model.fit(X_train, y_train)
            y_train_pred, y_test_pred = model.predict(X_train), model.predict(X_test)
            results.append({
                'Model': name,
                'R2_Train': r2_score(y_train, y_train_pred),
                'R2_Test': r2_score(y_test, y_test_pred),
                'MAE_Train': mean_absolute_error(y_train, y_train_pred),
                'MAE_Test': mean_absolute_error(y_test, y_test_pred),
                'RMSE_Train': np.sqrt(mean_squared_error(y_train, y_train_pred)),
                'RMSE_Test': np.sqrt(mean_squared_error(y_test, y_test_pred)),
                '_model_obj': model,
                '_y_test_pred': y_test_pred,
            })
        except Exception as e:
            print(f"Model {name} failed: {e}")
    if not results:
        return (None, None), pd.DataFrame(), []
    results_df = pd.DataFrame(results).sort_values(by='R2_Test', ascending=False).reset_index(drop=True)
    best_model_row = results_df.iloc[0]
    best_model = (best_model_row['Model'], best_model_row['_model_obj'])
    y_test_predictions = best_model_row['_y_test_pred']
    return best_model, results_df.drop(columns=['_model_obj', '_y_test_pred']), y_test_predictions

def evaluate_classification_models(X_train, X_test, y_train, y_test):
    models = {
        'Logistic Regression': LogisticRegression(max_iter=1000, n_jobs=-1),
        'Decision Tree': DecisionTreeClassifier(max_depth=5, random_state=42),
        'Random Forest': RandomForestClassifier(n_estimators=30, max_depth=10, n_jobs=-1, random_state=42),
        'SVC': SVC(probability=True, random_state=42),
        'KNN': KNeighborsClassifier(n_neighbors=5, n_jobs=-1),
        'XGBoost': XGBClassifier(n_estimators=30, learning_rate=0.1, max_depth=5, n_jobs=-1, use_label_encoder=False, eval_metric='mlogloss', random_state=42),
    }
    results = []
    for name, model in models.items():
        try:
            model.fit(X_train, y_train)
            y_train_pred, y_test_pred = model.predict(X_train), model.predict(X_test)
            results.append({
                'Model': name,
                'Accuracy_Train': accuracy_score(y_train, y_train_pred),
                'Accuracy_Test': accuracy_score(y_test, y_test_pred),
                'F1_Train': f1_score(y_train, y_train_pred, average='weighted', zero_division=0),
                'F1_Test': f1_score(y_test, y_test_pred, average='weighted', zero_division=0),
                '_model_obj': model,
            })
        except Exception as e:
            print(f"Classification model {name} failed: {e}")
    if not results:
        return (None, None), pd.DataFrame()
    results_df = pd.DataFrame(results).sort_values(by='F1_Test', ascending=False).reset_index(drop=True)
    best_model_row = results_df.iloc[0]
    best_model = (best_model_row['Model'], best_model_row['_model_obj'])
    return best_model, results_df.drop(columns=['_model_obj'])

# --- Dash App Setup ---
app = JupyterDash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP, 'https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.4/css/all.min.css'])
app.title = "Ethiopian Power Plant Forecast Dashboard"

app.layout = html.Div([
    dcc.Store(id='intermediate-data-store'),
    dbc.Modal([
        dbc.ModalHeader("High Deviation Forecast Alerts"),
        dbc.ModalBody(id='notification-body'),
        dbc.ModalFooter(dbc.Button("Close", id="close-notification-modal", className="ml-auto")),
    ], id="notification-modal", is_open=False, size="xl"),
    
    html.Div([
        html.H2("📊 Ethiopian Electric Power Plant Forecast", style={'textAlign': 'left', 'fontWeight': 'bold', 'display': 'inline-block'}),
        html.Div([
            html.Label("Upload Dataset", style={'fontWeight': 'bold', 'color': 'red'}),
            dcc.Upload(
                id='upload-data',
                children=html.Button('Upload File', style={'padding': '5px', 'backgroundColor': '#007BFF', 'color': 'white', 'borderRadius': '5px'}),
                multiple=False, accept='.csv,.xlsx,.xls,.json,.parquet'
            ),
            html.Div(id='upload-status', style={'marginTop': '5px', 'color': 'green'}),
        ], style={'display': 'inline-block', 'marginLeft': 'auto', 'marginRight': '20px', 'textAlign': 'right'}),
        html.Div([
            dbc.Button(id='notification-icon', n_clicks=0, color="secondary", className="position-relative", style={'fontSize': '1.2rem', 'padding': '5px 10px'}),
        ], style={'display': 'inline-block', 'verticalAlign': 'top', 'paddingTop': '5px'})
    ], style={'display': 'flex', 'alignItems': 'center', 'padding': '20px'}),
    
    html.Div([
        html.Div([html.Label("Select Power Plant:", style={'fontWeight': 'bold'}), dcc.Dropdown(id='power-plant-selector', options=[{'label': p, 'value': p} for p in POWER_PLANT_DATA.keys()], value='Gibe 1', clearable=False)], style={'width': '24%', 'display': 'inline-block', 'marginRight': '1%'}),
        html.Div([html.Label("Select Target Variable:", style={'fontWeight': 'bold'}), dcc.Dropdown(id='target-selector', placeholder="Select a target variable")], style={'width': '24%', 'display': 'inline-block', 'marginRight': '1%'}),
        html.Div([html.Label("Train-Test Split:", style={'fontWeight': 'bold'}), dcc.Dropdown(id='split-ratio-selector', options=[{'label': f'{i}% Train', 'value': i/100} for i in range(60, 91, 10)], value=0.7, clearable=False)], style={'width': '24%', 'display': 'inline-block', 'marginRight': '1%'}),
        html.Div([html.Label("Forecast Period:", style={'fontWeight': 'bold'}), dcc.Dropdown(id='horizon-period-selector', options=[{'label': 'Daily', 'value': 'daily'}, {'label': 'Weekly', 'value': 'weekly'}, {'label': 'Monthly', 'value': 'monthly'}, {'label': 'Yearly', 'value': 'yearly'}], value='monthly', clearable=False), html.Div(id='daily-options', children=[dcc.Dropdown(id='day-of-month-selector', placeholder='Select Day(s) of Month', options=[{'label': str(d), 'value': d} for d in range(1, 32)], multi=True)], style={'display': 'none', 'marginTop': '5px'}), html.Div(id='month-year-options', children=[dcc.Dropdown(id='month-selector', placeholder='Select Months', options=[{'label': datetime(2000, i, 1).strftime('%B'), 'value': i} for i in range(1, 13)], multi=True)], style={'display': 'none', 'marginTop': '5px'}), html.Div(id='year-options', children=[dcc.Dropdown(id='year-selector', placeholder='Select Year', options=[{'label': str(y), 'value': y} for y in range(2025, 2031)], value=2025)], style={'marginTop': '5px'})], style={'width': '24%', 'display': 'inline-block'})
    ], style={'padding': '20px', 'display': 'flex', 'justifyContent': 'space-between', 'alignItems': 'flex-start'}),
    
    html.Hr(),
    html.H3("Action Center", className="text-center my-4"),
    dbc.Row([dbc.Col(dcc.Dropdown(id='action-selector', options=[
        {'label': '💬 Ask Any Questions (AI Assistant)', 'value': 'ask_questions'},
        {'label': '🛠️ Data Preprocessing', 'value': 'data_preprocessing'},
        {'label': '🔍 Feature Selection', 'value': 'feature_selection'},
        {'label': '📈 Data Visualization/Analysis', 'value': 'data_visualization'},
        {'label': '📄 Report Generating', 'value': 'report_generating'},
        {'label': '📄 Recommendation', 'value': 'recommendation'},
    ], placeholder="Select an action to perform..."), width=12)]),
    dbc.Card(dbc.CardBody(id='action-output'), className="mt-4"),
    
    dcc.Graph(id='forecast-graph'),
    
    html.Div([
        html.Button("Download Forecast (CSV)", id='download-btn', style={'padding': '10px 20px', 'backgroundColor': '#28A745', 'color': 'white', 'borderRadius': '5px', 'marginRight': '10px'}),
        dcc.Download(id="download-forecast"),
        html.Button("View Forecast Data", id='view-btn', style={'padding': '10px 20px', 'backgroundColor': '#007BFF', 'color': 'white', 'borderRadius': '5px', 'marginRight': '10px'}),
        html.Button("Download Report (PDF)", id='download-pdf-btn', style={'padding': '10px 20px', 'backgroundColor': '#FF5733', 'color': 'white', 'borderRadius': '5px'}),
        dcc.Download(id="download-pdf"),
    ], style={'padding': '10px', 'textAlign': 'center'}),
    
    html.Div(id='view-data-container', children=[
        html.Div(id='view-data-display', style={'marginTop': '10px', 'padding': '20px', 'overflowX': 'auto', 'backgroundColor': '#F8F9FA', 'borderRadius': '8px', 'boxShadow': '2px 2px 8px rgba(0,0,0,0.1)'}),
        html.Button("Close View", id='close-view-btn', style={'padding': '5px 10px', 'backgroundColor': '#DC3545', 'color': 'white', 'borderRadius': '5px', 'marginTop': '10px'})
    ], style={'display': 'none', 'padding': '20px'}),

    html.Div(id='model-metrics', style={'padding': '20px'}),
    html.Div([
        html.Label("Raw Dataset Inspector:", style={'fontWeight': 'bold', 'padding': '5px', 'backgroundColor': '#007BFF', 'color': 'white', 'borderRadius': '5px'}),
        dcc.Dropdown(id='data-display-selector', options=[{'label': 'Full Dataset', 'value': 'full'}, {'label': 'First 5 Rows', 'value': 'head'}, {'label': 'Last 5 Rows', 'value': 'tail'}, {'label': 'Info', 'value': 'info'}, {'label': 'Describe', 'value': 'describe'}], value='head', clearable=False)
    ], style={'padding': '10px', 'width': '30%', 'margin': '0 auto'}),
    html.Div(id='data-display', style={'padding': '20px', 'overflowX': 'auto', 'backgroundColor': '#F8F9FA', 'borderRadius': '8px', 'boxShadow': '2px 2px 8px rgba(0,0,0,0.1)'})
])

# --- Notification and Badge Callbacks ---
@app.callback(
    Output('notification-modal', 'is_open'),
    [Input('notification-icon', 'n_clicks'), Input('close-notification-modal', 'n_clicks')],
    [State('notification-modal', 'is_open')],
    prevent_initial_call=True
)
def toggle_notification_modal(n1, n2, is_open):
    ctx = dash.callback_context
    if not ctx.triggered:
        return is_open
    button_id = ctx.triggered[0]['prop_id'].split('.')[0]
    if button_id in ['notification-icon', 'close-notification-modal']:
        return not is_open
    return is_open

@app.callback(
    Output('notification-body', 'children'),
    Input('notification-modal', 'is_open'),
    State('intermediate-data-store', 'data')
)
def update_notification_body(is_open, stored_data):
    if not is_open or not stored_data or 'anomaly_details' not in stored_data or not stored_data['anomaly_details']:
        return html.P("No high-deviation alerts to display.")
    notifications = stored_data['anomaly_details']
    df = pd.DataFrame(notifications)
    df.rename(columns={'gc_date_actual': 'GC Date of Actual', 'actual_data': 'Actual Data', 'forecasted_data': 'Forecasted Data', 'error': 'Error', 'gc_date_forecast': 'GC Date of Forecasted Data'}, inplace=True)
    return dash_table.DataTable(data=df.to_dict('records'), columns=[{'name': i, 'id': i} for i in df.columns], page_size=10, style_cell={'textAlign': 'left'}, style_header={'fontWeight': 'bold', 'backgroundColor': '#f8f9fa'}, style_data_conditional=[{'if': {'row_index': 'odd'}, 'backgroundColor': 'rgb(240, 240, 240)'}])

@app.callback(
    Output('notification-icon', 'children'),
    Input('intermediate-data-store', 'data')
)
def update_notification_badge(stored_data):
    base_icon = html.I(className="fas fa-bell")
    if not stored_data or 'anomaly_details' not in stored_data or not stored_data['anomaly_details']:
        return base_icon
    count = len(stored_data['anomaly_details'])
    if count == 0:
        return base_icon
    return html.Span([base_icon, dbc.Badge(f"{count}", color="danger", pill=True, className="position-absolute top-0 start-100 translate-middle")])

# --- File Upload Callback ---
@app.callback(
    Output('power-plant-selector', 'options'),
    Output('power-plant-selector', 'value'),
    Output('upload-status', 'children'),
    Input('upload-data', 'contents'),
    State('upload-data', 'filename'),
    State('power-plant-selector', 'value')
)
def handle_file_upload(contents, filename, current_plant):
    if not contents:
        return [{'label': p, 'value': p} for p in POWER_PLANT_DATA.keys()], current_plant or 'Gibe 1', None
    file_type, df, power_plants, date_col, error = parse_uploaded_file(contents, filename, None)
    if error:
        return [{'label': p, 'value': p} for p in POWER_PLANT_DATA.keys()], current_plant or 'Gibe 1', html.P(error, style={'color': 'red'})
    data_cache['uploaded']['source'] = filename
    data_cache['uploaded']['type'] = file_type
    data_cache['uploaded']['df'] = df
    data_cache['uploaded']['power_plants'] = power_plants
    data_cache['uploaded']['date_col'] = date_col
    options = [{'label': p, 'value': p} for p in POWER_PLANT_DATA.keys()] + [{'label': p, 'value': f'uploaded_{p}'} for p in power_plants]
    return options, power_plants[0] if power_plants else current_plant, html.P(f"Uploaded {filename} successfully!", style={'color': 'green'})

# --- Main Forecasting Callback ---
@app.callback(
    Output('forecast-graph', 'figure'),
    Output('model-metrics', 'children'),
    Output('intermediate-data-store', 'data'),
    Input('power-plant-selector', 'value'),
    Input('target-selector', 'value'),
    Input('split-ratio-selector', 'value'),
    Input('horizon-period-selector', 'value'),
    Input('year-selector', 'value'),
    State('month-selector', 'value'),
    State('day-of-month-selector', 'value')
)
def update_forecast(power_plant, target, split, horizon, year, months, days):
    if not all([power_plant, target, year]):
        return go.Figure(), "Please select all required options.", {}

    source = 'uploaded' if power_plant.startswith('uploaded_') else 'default'
    actual_plant = power_plant.replace('uploaded_', '')
    df, date_col, err = load_plant_data(actual_plant, source)
    if df.empty:
        return go.Figure(), f"Error: {err}", {}

    try:
        # Ensure unique index before processing
        if df.index.duplicated().any():
            df = df.reset_index(drop=True)

        if pd.api.types.is_object_dtype(df[target]):
            df[target] = pd.to_numeric(df[target].astype(str).str.replace(',', ''), errors='coerce')
        df.dropna(subset=[target], inplace=True)
        
        features, _, _, task = get_available_columns(df, date_col, actual_plant, source)
        if not features:
            return go.Figure(), "No valid feature columns found.", {}
        df_model = create_features(df[[date_col] + features + [target]].dropna(), date_col)
        if len(df_model) < 20:
            return go.Figure(), "Insufficient data to train model.", {}
        
        ext_features = features + ['year', 'month', 'day', 'dayofyear', 'weekday']
        train, test = train_test_split(df_model, train_size=split, shuffle=False)
        X_train, X_test = train[ext_features], test[ext_features]
        y_train_raw, y_test_raw = train[target], test[target]

        fig, metrics_display, model_name, forecast_df, notifications = go.Figure(), html.Div(), "N/A", pd.DataFrame(), []
        metrics_data_for_pdf = []

        if task == 'classification':
            le = LabelEncoder()
            y_train, y_test = le.fit_transform(y_train_raw), le.transform(y_test_raw)
            best, results_df = evaluate_classification_models(X_train, X_test, y_train, y_test)
            if not best or best[1] is None or results_df.empty:
                return go.Figure(), "Classification models failed.", {}
            model_name, model = best
            best_model_name = results_df.iloc[0]['Model']

            metrics_data = []
            for i, row in results_df.iterrows():
                metrics_data.append({
                    "Model": row['Model'],
                    "Accuracy_Train": f"{row['Accuracy_Train']:.4f}", "Accuracy_Test": f"{row['Accuracy_Test']:.4f}",
                    "F1_Train": f"{row['F1_Train']:.4f}", "F1_Test": f"{row['F1_Test']:.4f}",
                    "Level": i + 1, "Best": '★' if row['Model'] == best_model_name else ''
                })
            metrics_data_for_pdf = metrics_data

            metrics_display = html.Div([
                html.H4(f"Model Performance (Best: {best_model_name})"),
                dash_table.DataTable(
                    columns=[
                        {"name": ["", "Model"], "id": "Model"},
                        {"name": ["Accuracy", "Train"], "id": "Accuracy_Train"}, {"name": ["Accuracy", "Test"], "id": "Accuracy_Test"},
                        {"name": ["F1 Score", "Train"], "id": "F1_Train"}, {"name": ["F1 Score", "Test"], "id": "F1_Test"},
                        {"name": ["", "Level"], "id": "Level"}, {"name": ["", "Best"], "id": "Best"}
                    ],
                    data=metrics_data, style_cell={'textAlign': 'center'},
                    style_header={'fontWeight': 'bold'}, merge_duplicate_headers=True,
                    style_data_conditional=[{'if': {'filter_query': '{Best} = "★"'}, 'backgroundColor': 'rgba(0, 255, 0, 0.2)'}]
                )
            ])
        else: # Regression
            best, results_df, y_test_pred = evaluate_regression_models(X_train, X_test, y_train_raw, y_test_raw)
            if not best or best[1] is None or results_df.empty:
                return go.Figure(), "Regression models failed.", {}
            model_name, model = best
            best_model_name = results_df.iloc[0]['Model']

            metrics_data = []
            for i, row in results_df.iterrows():
                metrics_data.append({
                    "Model": row['Model'],
                    "R2_Train": f"{row['R2_Train']:.4f}", "R2_Test": f"{row['R2_Test']:.4f}",
                    "MAE_Train": f"{row['MAE_Train']:.4f}", "MAE_Test": f"{row['MAE_Test']:.4f}",
                    "RMSE_Train": f"{row['RMSE_Train']:.4f}", "RMSE_Test": f"{row['RMSE_Test']:.4f}",
                    "Level": i + 1, "Best": '★' if row['Model'] == best_model_name else ''
                })
            metrics_data_for_pdf = metrics_data
            
            metrics_display = html.Div([
                html.H4(f"Model Performance (Best: {best_model_name})"),
                dash_table.DataTable(
                    columns=[
                        {"name": ["", "Model"], "id": "Model"},
                        {"name": ["R²", "Train"], "id": "R2_Train"}, {"name": ["R²", "Test"], "id": "R2_Test"},
                        {"name": ["MAE", "Train"], "id": "MAE_Train"}, {"name": ["MAE", "Test"], "id": "MAE_Test"},
                        {"name": ["RMSE", "Train"], "id": "RMSE_Train"}, {"name": ["RMSE", "Test"], "id": "RMSE_Test"},
                        {"name": ["", "Level"], "id": "Level"}, {"name": ["", "Best"], "id": "Best"}
                    ],
                    data=metrics_data, style_cell={'textAlign': 'center'},
                    style_header={'fontWeight': 'bold'}, merge_duplicate_headers=True,
                    style_data_conditional=[{'if': {'filter_query': '{Best} = "★"'}, 'backgroundColor': 'rgba(0, 255, 0, 0.2)'}]
                )
            ])

        future_dates = pd.to_datetime([])
        if horizon == 'yearly':
            future_dates = pd.date_range(start=f"{year}-01-01", end=f"{year}-12-31", freq='D')
        elif months and year:
            all_dates = pd.date_range(start=f"{year}-01-01", end=f"{year}-12-31", freq='D')
            future_dates = all_dates[all_dates.month.isin(months)]
            if horizon == 'daily' and days:
                future_dates = future_dates[future_dates.day.isin(days)]
        
        if not future_dates.empty:
            future_df_features = create_features(pd.DataFrame({date_col: future_dates}), date_col)
            for col in features:
                future_df_features[col] = df_model[col].median()
            future_pred = model.predict(future_df_features[ext_features])
            if task == 'classification':
                future_pred = le.inverse_transform(future_pred)
            forecast_df = pd.DataFrame({'Date': future_dates, f'Forecast_{target}': future_pred})
            if task == 'regression':
                actual_val = df_model[target].median()
                for idx, row in forecast_df.iterrows():
                    forecast_val = row[f'Forecast_{target}']
                    error = abs(actual_val - forecast_val)
                    if error > 5:
                        notifications.append({
                            'gc_date_actual': "Historical Median",
                            'actual_data': f"{actual_val:.2f}",
                            'forecasted_data': f"{forecast_val:.2f}",
                            'error': f"{error:.2f}",
                            'gc_date_forecast': row['Date'].strftime('%Y-%m-%d')
                        })

        if task == 'regression':
            fig.add_trace(go.Scatter(x=train[date_col], y=y_train_raw, name='Training Data', mode='lines', line={'color': 'blue'}))
            fig.add_trace(go.Scatter(x=test[date_col], y=y_test_raw, name='Test Data (Actual)', mode='lines', line={'color': 'green'}))
            fig.add_trace(go.Scatter(x=test[date_col], y=y_test_pred, name='Test Data (Predicted)', mode='lines', line={'color': 'orange', 'dash': 'dot'}))
            if not forecast_df.empty:
                fig.add_trace(go.Scatter(x=forecast_df['Date'], y=forecast_df[f'Forecast_{target}'], name='Forecast', mode='lines', line={'color': 'red', 'dash': 'dash'}))
        else:
            classes = le.classes_
            fig.add_trace(go.Bar(x=classes, y=y_train_raw.value_counts().reindex(classes, fill_value=0), name='Training Data'))
            fig.add_trace(go.Bar(x=classes, y=y_test_raw.value_counts().reindex(classes, fill_value=0), name='Test Data'))
            if not forecast_df.empty:
                fig.add_trace(go.Bar(x=classes, y=pd.Series(future_pred).value_counts().reindex(classes, fill_value=0), name='Forecast'))

        fig.update_layout(title=f'<b>{actual_plant} - {target} Forecast</b>', template='plotly_white')
        stored_data = {
            'forecast': forecast_df.to_json(orient='split', date_format='iso'),
            'metrics': json.dumps(metrics_data_for_pdf),
            'historical': test[[date_col, target]].to_json(orient='split', date_format='iso'),
            'best_model': best_model_name,
            'task_type': task,
            'anomaly_details': notifications
        }
        return fig, metrics_display, stored_data

    except Exception as e:
        return go.Figure(), f"An error occurred: {e}", {}

# --- Action Center Callback ---
@app.callback(
    Output('action-output', 'children'),
    Input('action-selector', 'value'),
    State('power-plant-selector', 'value'),
    State('target-selector', 'value')  # Ensure target variable is captured
)
def render_action_content(action, power_plant, target_var):
    if not action:
        return html.P("Please select an action from the dropdown above.", className="text-center text-muted")
    
    if action == 'ask_questions':
        return html.Div([
            html.H5("💬 AI Assistant", className="mb-3"),
            dbc.InputGroup([
                dbc.Input(id='question-input', placeholder='Ask about the data, models, or forecasts...'),
                dbc.Button("Send ➤", id='send-question-btn', color='primary')
            ]),
            dcc.Loading(html.Div(id='ai-response-area', className="mt-3 p-3 border rounded bg-light"))
        ])
    
    if action == 'data_preprocessing':
        if not power_plant:
            return html.P("Please select a power plant first.", className="text-muted")
        
        source = 'uploaded' if power_plant.startswith('uploaded_') else 'default'
        actual_plant = power_plant.replace('uploaded_', '')
        df, date_col, _ = load_plant_data(actual_plant, source)
        if df.empty:
            return html.P(f"No data available for {actual_plant}.", className="text-muted")
        
        # Get available target variables
        _, target_options, _, _ = get_available_columns(df, date_col, actual_plant, source)
        if not target_options:
            return html.P("No valid target variables found for preprocessing.", className="text-muted")
        
        return html.Div([
            html.H5(f"🛠️ Data Preprocessing for {actual_plant}", className="mb-3"),
            html.P(f"Preprocess the data for {actual_plant}. Select a target variable and preprocessing options below."),
            # Add target variable dropdown for preprocessing
            dbc.Row([
                dbc.Col(dbc.Label("Select Target Variable:"), width=4),
                dbc.Col(dcc.Dropdown(
                    id='preprocess-target-selector',
                    options=[{'label': col, 'value': col} for col in target_options],
                    value=target_var if target_var in target_options else target_options[0] if target_options else None,
                    placeholder="Select a target variable",
                    clearable=False
                ), width=8)
            ], className="mb-3"),
            dbc.Row([
                dbc.Col(dbc.Label("Handle Missing Values:"), width=4),
                dbc.Col(dcc.Dropdown(
                    id='missing-value-strat',
                    options=[
                        {'label': 'Drop Missing Values', 'value': 'drop'},
                        {'label': 'Impute with Mean', 'value': 'mean'},
                        {'label': 'Impute with Median', 'value': 'median'}
                    ],
                    value='median',
                    placeholder="Select a strategy"
                ), width=8)
            ], className="mb-2"),
            dbc.Label("Select Additional Preprocessing Steps:"),
            dbc.Checklist(
                options=[
                    {"label": "Remove Duplicate Rows", "value": "remove_duplicates"},
                    {"label": "Handle Outliers (IQR Method)", "value": "handle_outliers"}
                ],
                value=[], id="preprocess-options", inline=True, className="mb-3"
            ),
            dbc.Button("Run Preprocessing", id="run-preprocess-btn", color="primary", className="mt-3 w-100"),
            dcc.Loading(html.Div(id="preprocess-output-display", className="mt-4"))
        ])
    
    if action == 'feature_selection':
        return html.Div([
            html.H5("🔍 Feature Selection Analysis", className="mb-3"),
            html.P("Analyze the features of the currently selected power plant data to identify the most relevant and remove redundant ones."),
            dbc.Checklist(
                options=[{"label": "Target variables", "value": "target_variables"}, {"label": "Label variables", "value": "label_variables"}],
                id="feature-options", inline=True, className="mb-3"
            ),
            dbc.Button("Analyze Features", id="run-feature-select-btn", color="primary", className="w-100"),
            dcc.Loading(html.Div(id="feature-select-output", className="mt-4"))
        ])
    
    if action == 'data_visualization':
        if not power_plant:
            return html.P("Please select a power plant first.")
        source = 'uploaded' if power_plant.startswith('uploaded_') else 'default'
        actual_plant = power_plant.replace('uploaded_', '')
        df, date_col, _ = load_plant_data(actual_plant, source)
        if df.empty:
            return html.P("No data available for visualization.")
        numeric_cols = df.select_dtypes(include=np.number).columns.tolist()
        return html.Div([
            html.H5("📈 Exploratory Data Analysis", className="mb-3"),
            dbc.Row([
                dbc.Col(dbc.Label("Plot Type:"), width=2),
                dbc.Col(dcc.Dropdown(
                    id='plot-type-selector',
                    options=[
                        {"label": "Histogram", "value": "Histogram"},
                        {"label": "Scatter Plot", "value": "Scatter Plot"},
                        {"label": "Correlation Heatmap", "value": "Correlation Heatmap"}
                    ],
                    value='Histogram'
                ), width=10)
            ]),
            html.Div(id='plot-controls', children=[
                dcc.Dropdown(id='hist-col', options=[{'label': col, 'value': col} for col in numeric_cols],
                             placeholder="Select column", value=numeric_cols[0] if numeric_cols else None, style={'marginTop': '10px'}),
                html.Div([
                    dcc.Dropdown(id='scatter-x', options=[{'label': col, 'value': col} for col in numeric_cols],
                                 placeholder="X-axis", value=numeric_cols[0] if len(numeric_cols) > 0 else None),
                    dcc.Dropdown(id='scatter-y', options=[{'label': col, 'value': col} for col in numeric_cols],
                                 placeholder="Y-axis", value=numeric_cols[1] if len(numeric_cols) > 1 else None),
                ], style={'display': 'none'}, id='scatter-controls')
            ]),
            dcc.Loading(dcc.Graph(id='eda-plot'))
        ])
    
    if action == 'report_generating':
        return html.Div([
            html.H5("📄 Report Generator", className="mb-3"),
            html.P("Generate a summary report for the selected power plant including recent data and forecasts."),
            dbc.Row([
                dbc.Col(dbc.Label("Report Period:"), width=3),
                dbc.Col(dcc.Dropdown(id='report-period', options=['Monthly', 'Quarterly', 'Yearly'], value='Monthly'), width=9),
            ], className="mb-2"),
            dbc.Row([
                dbc.Col(dbc.Label("File format:"), width=3),
                dbc.Col(dcc.Dropdown(id='file-format', options=['pdf'], value='pdf'), width=9),
            ], className="mb-2"),
            dbc.Button("Generate Report", id="generate-report-btn", color="primary", className="w-100 mt-3"),
            html.Div(id='report-status', className="mt-3")
        ])
    
    if action == 'recommendation':
        return html.Div([
            html.H5("📄 Recommendations", className="mb-3"),
            html.P("Automated recommendations based on forecast trends will appear here.")
        ])
    
    return html.P(f"Content for {action} will be loaded here.")

# --- AI and Preprocessing Callbacks ---
@app.callback(
    Output('ai-response-area', 'children'),
    Input('send-question-btn', 'n_clicks'),
    [State('question-input', 'value'), State('power-plant-selector', 'value'), State('target-selector', 'value'), State('intermediate-data-store', 'data')]
)
def answer_question(n_clicks, question, power_plant, target_var, model_results):
    if not n_clicks or not question:
        return "Please type a question and click 'Send'."
    source = 'uploaded' if power_plant.startswith('uploaded_') else 'default'
    actual_plant = power_plant.replace('uploaded_', '')
    df, _, _ = load_plant_data(actual_plant, source)
    context = {
        'question': question,
        'power_plant': actual_plant,
        'target_var': target_var,
        'model_results': model_results or {},
        'df_head': df.head().to_string() if not df.empty else "No data.",
        'df_describe': df.describe().to_string() if not df.empty else "No data."
    }
    ai_answer = query_ai_model(question, 'google_gemini', context)
    return dcc.Markdown(ai_answer, dangerously_allow_html=True)

@app.callback(
    Output('preprocess-output-display', 'children'),
    Input('run-preprocess-btn', 'n_clicks'),
    [
        State('power-plant-selector', 'value'),
        State('preprocess-target-selector', 'value'),  # Updated to use preprocess-target-selector
        State('missing-value-strat', 'value'),
        State('preprocess-options', 'value')
    ]
)
def run_preprocessing(n_clicks, power_plant, target_var, missing_val_strat, options):
    if not n_clicks or not power_plant or not target_var:
        return html.P("Please select a power plant, target variable, and click the button to start.", className="text-muted")
    
    try:
        source = 'uploaded' if power_plant.startswith('uploaded_') else 'default'
        actual_plant = power_plant.replace('uploaded_', '')
        df, date_col, _ = load_plant_data(actual_plant, source)
        if df.empty:
            return html.P(f"No data available for {actual_plant}.", className="text-muted")
        
        processing_log = [f"Original shape: {df.shape}"]
        rows_before = len(df)
        
        # Handle missing values
        if missing_val_strat == 'drop':
            df = df.dropna(subset=[target_var])
            processing_log.append(f"Dropped rows with missing values in {target_var}. New shape: {df.shape}")
        elif missing_val_strat in ['mean', 'median']:
            if pd.api.types.is_numeric_dtype(df[target_var]):
                impute_value = df[target_var].mean() if missing_val_strat == 'mean' else df[target_var].median()
                df[target_var] = df[target_var].fillna(impute_value)
                processing_log.append(f"Imputed missing values in {target_var} with {missing_val_strat}: {impute_value:.2f}")
            else:
                return html.P(f"Cannot impute {target_var} with {missing_val_strat} as it is not numeric.", className="text-danger")
        
        # Handle duplicate rows
        if 'remove_duplicates' in options:
            df = df.drop_duplicates()
            processing_log.append(f"Removed {rows_before - len(df)} duplicate rows. New shape: {df.shape}")
        
        # Handle outliers using IQR method
        if 'handle_outliers' in options and pd.api.types.is_numeric_dtype(df[target_var]):
            Q1 = df[target_var].quantile(0.25)
            Q3 = df[target_var].quantile(0.75)
            IQR = Q3 - Q1
            lower_bound = Q1 - 1.5 * IQR
            upper_bound = Q3 + 1.5 * IQR
            rows_before_outliers = len(df)
            df = df[(df[target_var] >= lower_bound) & (df[target_var] <= upper_bound)]
            processing_log.append(f"Removed {rows_before_outliers - len(df)} outliers in {target_var}. New shape: {df.shape}")
        
        # Update cache
        data_cache[source]['preprocessed_dfs'][actual_plant] = df
        
        return html.Div([
            dbc.Alert(f"Preprocessing Complete for {actual_plant} - {target_var}!", color="success"),
            html.P(f"Rows before preprocessing: {rows_before}"),
            html.P(f"Rows after preprocessing: {len(df)}"),
            html.Pre('\n'.join(processing_log)),
            dash_table.DataTable(
                data=df.head().to_dict('records'),
                columns=[{'name': i, 'id': i} for i in df.columns],
                page_size=10,
                style_table={'overflowX': 'auto'}
            )
        ])
    except Exception as e:
        return dbc.Alert(f"An error occurred during preprocessing: {e}", color="danger")

# --- Feature Selection and Visualization Callbacks ---
@app.callback(
    Output('feature-select-output', 'children'),
    Input('run-feature-select-btn', 'n_clicks'),
    [State('power-plant-selector', 'value'), State('target-selector', 'value')]
)
def run_feature_selection(n_clicks, power_plant, target_col):
    if not n_clicks or not power_plant or not target_col:
        return html.P("Please select a power plant and target variable, then click Analyze.", className="text-muted")
    source = 'uploaded' if power_plant.startswith('uploaded_') else 'default'
    actual_plant = power_plant.replace('uploaded_', '')
    df, date_col, _ = load_plant_data(actual_plant, source)
    if df.empty:
        return dbc.Alert("No data available to analyze.", color="warning")
    feature_cols, _, _, task_type = get_available_columns(df, date_col, actual_plant, source)
    if not feature_cols:
        return dbc.Alert("No feature columns found for analysis.", color="warning")
    df_model = df[feature_cols + [target_col]].dropna()
    X, y = df_model[feature_cols], df_model[target_col]
    corr_matrix = X.corr().abs()
    upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
    redundant_features = [col for col in upper_tri.columns if any(upper_tri[col] > 0.9)]
    model = XGBRegressor(objective='reg:squarederror', n_estimators=30, max_depth=5, n_jobs=-1) if task_type == 'regression' else XGBClassifier(use_label_encoder=False, eval_metric='logloss', n_estimators=30, max_depth=5, n_jobs=-1)
    if task_type == 'classification':
        y = LabelEncoder().fit_transform(y)
    model.fit(X, y)
    importances = pd.Series(model.feature_importances_, index=X.columns).sort_values(ascending=False)
    return html.Div([
        dbc.Row([
            dbc.Col([html.H6("Feature Importance (from XGBoost)"), dbc.Table.from_dataframe(importances.reset_index().rename(columns={'index': 'Feature', 0: 'Importance'}).head(10), striped=True)], width=6),
            dbc.Col([html.H6("Potentially Redundant Features (Correlation > 0.9)"), dbc.ListGroup([dbc.ListGroupItem(f) for f in redundant_features] if redundant_features else [dbc.ListGroupItem("None found.")])], width=6)
        ])
    ])

@app.callback(
    Output('plot-controls', 'children'),
    Input('plot-type-selector', 'value'),
    State('power-plant-selector', 'value')
)
def update_plot_controls(plot_type, power_plant):
    if not power_plant:
        return None
    source = 'uploaded' if power_plant.startswith('uploaded_') else 'default'
    actual_plant = power_plant.replace('uploaded_', '')
    df, _, _ = load_plant_data(actual_plant, source)
    if df.empty:
        return None
    numeric_cols = df.select_dtypes(include=np.number).columns.tolist()
    if plot_type == 'Histogram':
        return dcc.Dropdown(id='hist-col', options=[{'label': col, 'value': col} for col in numeric_cols], value=numeric_cols[0] if numeric_cols else None, placeholder="Select column")
    if plot_type == 'Scatter Plot':
        return [
            dcc.Dropdown(id='scatter-x', options=[{'label': col, 'value': col} for col in numeric_cols], value=numeric_cols[0] if len(numeric_cols) > 0 else None, placeholder="X-axis"),
            dcc.Dropdown(id='scatter-y', options=[{'label': col, 'value': col} for col in numeric_cols], value=numeric_cols[1] if len(numeric_cols) > 1 else None, placeholder="Y-axis")
        ]
    return None

@app.callback(
    Output('eda-plot', 'figure'),
    [Input('plot-type-selector', 'value'), Input('hist-col', 'value'), Input('scatter-x', 'value'), Input('scatter-y', 'value')],
    [State('power-plant-selector', 'value')],
    prevent_initial_call=True
)
def update_eda_plot(plot_type, hist_col, scatter_x, scatter_y, power_plant):
    try:
        if not power_plant:
            return go.Figure()
        source = 'uploaded' if power_plant.startswith('uploaded_') else 'default'
        actual_plant = power_plant.replace('uploaded_', '')
        df, _, _ = load_plant_data(actual_plant, source)
        if df.empty:
            return go.Figure()
        if plot_type == 'Histogram' and hist_col:
            return px.histogram(df, x=hist_col, title=f'Distribution of {hist_col}')
        if plot_type == 'Scatter Plot' and scatter_x and scatter_y:
            return px.scatter(df, x=scatter_x, y=scatter_y, title=f'{scatter_x} vs. {scatter_y}')
        if plot_type == 'Correlation Heatmap':
            numeric_df = df.select_dtypes(include=np.number)
            if numeric_df.empty:
                return go.Figure()
            return px.imshow(numeric_df.corr(), text_auto=True, title='Feature Correlation Heatmap')
        return go.Figure()
    except Exception as e:
        print(f"Error in update_eda_plot: {e}")
        return go.Figure()

# --- UI and Download Callbacks ---
@app.callback(
    Output('target-selector', 'options'),
    Output('target-selector', 'value'),
    Input('power-plant-selector', 'value')
)
def update_target_dropdown(pp):
    if not pp:
        return [], None
    s = 'uploaded' if pp.startswith('uploaded_') else 'default'
    ap = pp.replace('uploaded_', '')
    df, dc, _ = load_plant_data(ap, s)
    if df.empty:
        return [], None
    _, to, tc, _ = get_available_columns(df, dc, ap, s)
    return [{'label': c, 'value': c} for c in to], tc

@app.callback(
    Output('daily-options', 'style'),
    Output('month-year-options', 'style'),
    Input('horizon-period-selector', 'value')
)
def toggle_horizon_options(p):
    ds = {'display': 'block' if p == 'daily' else 'none', 'marginTop': '5px'}
    ms = {'display': 'block' if p in ['daily', 'weekly', 'monthly'] else 'none', 'marginTop': '5px'}
    return ds, ms

@app.callback(
    Output('view-data-container', 'style'),
    Output('view-data-display', 'children'),
    Input('view-btn', 'n_clicks'),
    Input('close-view-btn', 'n_clicks'),
    State('view-data-container', 'style'),
    State('intermediate-data-store', 'data'),
    prevent_initial_call=True
)
def toggle_and_display_forecast_data(view_clicks, close_clicks, current_style, stored_data):
    ctx = dash.callback_context
    triggered_id = ctx.triggered[0]['prop_id'].split('.')[0]
    if triggered_id == 'view-btn' and view_clicks and current_style.get('display') == 'none':
        if not stored_data or 'forecast' not in stored_data:
            return {'display': 'block', 'padding': '20px'}, html.P("Forecast data not available.")
        try:
            df = pd.read_json(stored_data['forecast'], orient='split')
            df['Date'] = pd.to_datetime(df['Date']).dt.strftime('%Y-%m-%d')
            children = dash_table.DataTable(data=df.to_dict('records'), columns=[{'name': i, 'id': i} for i in df.columns], page_size=10, style_table={'overflowX': 'auto'})
            return {'display': 'block', 'padding': '20px'}, children
        except Exception as e:
            return {'display': 'block', 'padding': '20px'}, html.P(f"Could not display data. Error: {e}")
    if triggered_id == 'close-view-btn' and close_clicks:
        return {'display': 'none'}, None
    return current_style, dash.no_update

@app.callback(
    Output('data-display', 'children'),
    Input('power-plant-selector', 'value'),
    Input('data-display-selector', 'value')
)
def update_data_display(pp, o):
    if not pp:
        return "Select a plant."
    s = 'uploaded' if pp.startswith('uploaded_') else 'default'
    ap = pp.replace('uploaded_', '')
    df, _, e = load_plant_data(ap, s)
    if df.empty:
        return f"Error: {e}"
    t = f"Inspector for {ap}"
    if o == 'head':
        return html.Div([html.H5(t), dash_table.DataTable(data=df.head().to_dict('records'), columns=[{'name': i, 'id': i} for i in df.columns])])
    if o == 'tail':
        return html.Div([html.H5(t), dash_table.DataTable(data=df.tail().to_dict('records'), columns=[{'name': i, 'id': i} for i in df.columns])])
    if o == 'info':
        b = io.StringIO()
        df.info(buf=b)
        return html.Div([html.H5(t), html.Pre(b.getvalue())])
    if o == 'describe':
        return html.Div([html.H5(t), dash_table.DataTable(data=df.describe().reset_index().to_dict('records'), columns=[{'name': i, 'id': i} for i in df.describe().reset_index().columns])])
    if o == 'full':
        df_c = df.copy()
        for col in df_c.select_dtypes(include=['datetime64[ns]']).columns:
            df_c[col] = df_c[col].dt.strftime('%Y-%m-%d %H:%M:%S')
        return html.Div([html.H5(t), dash_table.DataTable(data=df_c.to_dict('records'), columns=[{'name': i, 'id': i} for i in df_c.columns], page_size=15)])
    return None

@app.callback(
    Output("download-forecast", "data"),
    Input("download-btn", "n_clicks"),
    State('intermediate-data-store', 'data'),
    State('power-plant-selector', 'value'),
    State('target-selector', 'value'),
    prevent_initial_call=True
)
def download_csv(n, d, pp, t):
    if not d or 'forecast' not in d:
        return None
    df = pd.read_json(d['forecast'], orient='split')
    return dcc.send_data_frame(df.to_csv, f"{pp.replace('uploaded_','')}_{t}_forecast.csv", index=False)

@app.callback(
    Output("download-pdf", "data"),
    Input("download-pdf-btn", "n_clicks"),
    State('intermediate-data-store', 'data'),
    State('power-plant-selector', 'value'),
    State('target-selector', 'value'),
    prevent_initial_call=True
)
def download_analysis_pdf(n_clicks, stored_data, power_plant, target):
    if not n_clicks or not stored_data or 'forecast' not in stored_data:
        return None
    try:
        forecast_df = pd.read_json(stored_data['forecast'], orient='split')
        forecast_df['Date'] = pd.to_datetime(forecast_df['Date']).dt.strftime('%Y-%m-%d')
        metrics_data = json.loads(stored_data['metrics'])
        historical_df = pd.read_json(stored_data['historical'], orient='split')
        historical_df.columns = ['Date', 'Actual']
        historical_df['Date'] = pd.to_datetime(historical_df['Date']).dt.strftime('%Y-%m-%d')
        best_model_name = stored_data['best_model']
        task_type = stored_data['task_type']

        pdf_buffer = io.BytesIO()
        doc = SimpleDocTemplate(pdf_buffer, pagesize=letter)
        styles = getSampleStyleSheet()
        elements = []

        elements.append(Paragraph(f"Analysis & Decision-Making Report", styles['h1']))
        elements.append(Paragraph(f"<b>Power Plant:</b> {power_plant.replace('uploaded_','')} | <b>Target:</b> {target}", styles['h2']))
        elements.append(Spacer(1, 12))
        elements.append(Paragraph(f"<b>Date Generated:</b> {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", styles['Normal']))
        elements.append(Paragraph(f"<b>Best Performing Model:</b> {best_model_name}", styles['Normal']))
        elements.append(Spacer(1, 24))

        elements.append(Paragraph("Model Performance Metrics", styles['h3']))
        if metrics_data:
            headers = list(metrics_data[0].keys())
            table_data = [headers] + [[str(row.get(col, '')) for col in headers] for row in metrics_data]
            style = TableStyle([
                ('BACKGROUND', (0, 0), (-1, 0), colors.darkblue), ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
                ('ALIGN', (0, 0), (-1, -1), 'CENTER'), ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
                ('BOTTOMPADDING', (0, 0), (-1, 0), 12), ('BACKGROUND', (0, 1), (-1, -1), colors.beige),
                ('GRID', (0, 0), (-1, -1), 1, colors.black)
            ])
            for i, row in enumerate(metrics_data):
                if row.get('Best') == '★':
                    style.add('BACKGROUND', (0, i + 1), (-1, i + 1), colors.lightgreen)
            metrics_table = Table(table_data)
            metrics_table.setStyle(style)
            elements.append(metrics_table)
        elements.append(Spacer(1, 24))

        elements.append(Paragraph("Forecast Data Summary", styles['h3']))
        forecast_table_data = [forecast_df.columns.tolist()] + forecast_df.head().values.tolist()
        forecast_table = Table(forecast_table_data)
        forecast_table.setStyle(TableStyle([
            ('BACKGROUND', (0, 0), (-1, 0), colors.grey), ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
            ('ALIGN', (0, 0), (-1, -1), 'CENTER'), ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
            ('GRID', (0, 0), (-1, -1), 1, colors.black),
        ]))
        elements.append(forecast_table)
        elements.append(Paragraph(f"(Showing first 5 of {len(forecast_df)} forecasted points)", styles['Italic']))
        elements.append(Spacer(1, 24))

        elements.append(Paragraph("Automated Recommendations", styles['h3']))
        if task_type == 'regression':
            forecast_values = forecast_df[f'Forecast_{target}'].values
            slope, _ = np.polyfit(np.arange(len(forecast_values)), forecast_values, 1)
            if slope > 0.05:
                elements.append(Paragraph("• <b>Uptrend Detected:</b> Forecast shows an increasing trend. Plan for higher levels/loads.", styles['Normal']))
            elif slope < -0.05:
                elements.append(Paragraph("• <b>Downtrend Detected:</b> Forecast indicates a decreasing trend. Prepare for reduced levels/loads.", styles['Normal']))
            else:
                elements.append(Paragraph("• <b>Stable Trend:</b> Forecast shows stable values. Maintain current operational plans.", styles['Normal']))
            forecast_variance = np.var(forecast_values)
            if forecast_variance > np.var(historical_df['Actual']):
                elements.append(Paragraph("• <b>High Variance Alert:</b> Forecasted values show higher variability than historical data. Consider risk mitigation strategies.", styles['Normal']))     
            if 'anomaly_details' in stored_data and stored_data['anomaly_details']:
                elements.append(Paragraph("• <b>Anomaly Alerts:</b> High-deviation forecasts detected. Review the following for operational adjustments:", styles['Normal']))
                anomaly_data = stored_data['anomaly_details']
                anomaly_table_data = [['GC Date Actual', 'Actual Data', 'Forecasted Data', 'Error', 'GC Date Forecast']] + [
                    [row['gc_date_actual'], row['actual_data'], row['forecasted_data'], row['error'], row['gc_date_forecast']]
                    for row in anomaly_data
                ]
                anomaly_table = Table(anomaly_table_data)
                anomaly_table.setStyle(TableStyle([
                    ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
                    ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
                    ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
                    ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
                    ('GRID', (0, 0), (-1, -1), 1, colors.black),
                ]))
                elements.append(anomaly_table)
            else:
                elements.append(Paragraph("• <b>No Anomalies:</b> No significant deviations detected in the forecast.", styles['Normal']))

        elements.append(Spacer(1, 24))
        elements.append(Paragraph("Historical Data Summary", styles['h3']))
        historical_table_data = [historical_df.columns.tolist()] + historical_df.head().values.tolist()
        historical_table = Table(historical_table_data)
        historical_table.setStyle(TableStyle([
            ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
            ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
            ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
            ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
            ('GRID', (0, 0), (-1, -1), 1, colors.black),
        ]))
        elements.append(historical_table)
        elements.append(Paragraph(f"(Showing first 5 of {len(historical_df)} historical points)", styles['Italic']))

        doc.build(elements)
        pdf_buffer.seek(0)
        return dcc.send_bytes(pdf_buffer.getvalue(), f"{power_plant.replace('uploaded_','')}_{target}_analysis_report.pdf")
    except Exception as e:
        print(f"Error generating PDF: {e}")
        return None
if __name__ == '__main__':
    try:
        app.run(mode='inline', port=5021, debug=True)
    except Exception as e:
        print(f"Error running Dash server: {e}")

In [3]:
# Install required packages (run this in a separate cell if not already installed)
# !pip install xgboost jupyter_dash dash dash-bootstrap-components pandas numpy plotly scikit-learn

import pandas as pd
import numpy as np
import os
import warnings
from io import StringIO
from xgboost import XGBRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from jupyter_dash import JupyterDash
import dash
from dash import dcc, html, Input, Output, State, dash_table, callback_context
import dash_bootstrap_components as dbc
import plotly.graph_objects as go
import plotly.express as px
from datetime import datetime
import base64
import json

warnings.filterwarnings('ignore')

# =================================================================================
# === HARD-CODED POWER PLANT DATA CONFIGURATION (FULLY FUNCTIONAL) ===
# =================================================================================
POWER_PLANT_DATA_FULL_ANALYSIS = {
    'Gibe 1': {
        'path': r"C:\Users\hp\Documents\code for internship 3rd year\CLEANED EEP DATASET 2017\data last\Gibe1.csv",
        'target_vars': ['Water_Level', 'Total_pr', 'Max_ALoad', 'Min_ALoad'],
        'date_col': 'Date_GC'
    },
    'Gibe 2': {
        'path': r"C:\Users\hp\Documents\code for internship 3rd year\CLEANED EEP DATASET 2017\data last\Gibe2.csv",
        'target_vars': ['water_level', 'total-pr', 'max_load', 'min_load'],
        'date_col': 'Date_GC'
    },
    'Gibe3': {
        'path': r"C:\Users\hp\Documents\code for internship 3rd year\CLEANED EEP DATASET 2017\data last\Gibe3.csv",
        'target_vars': ['water_level', 'total-pr', 'max_load', 'min_load'],
        'date_col': 'Date_GC'
    },
    'Amerti': {
        'path': r"C:\Users\hp\Documents\code for internship 3rd year\CLEANED EEP DATASET 2017\data last\Amerti Neshi.csv",
        'target_vars': ['water_level', 'total-pr', 'max_load', 'min_load'],
        'date_col': 'Date_GC'
    },
    'GERD': {
        'path': r"C:\Users\hp\Documents\code for internship 3rd year\CLEANED EEP DATASET 2017\data last\GERD.csv",
        'target_vars': ['water_level', 'total-pr', 'max_load', 'min_load'],
        'date_col': 'Date_GC'
    },
    'Finchaa': {
        'path': r"C:\Users\hp\Documents\code for internship 3rd year\CLEANED EEP DATASET 2017\data last\fincha.csv",
        'target_vars': ['water level', 'total-pr', 'max load', 'min load'],
        'date_col': 'Date_GC'
    },
    'Koka': {
        'path': r"C:\Users\hp\Documents\code for internship 3rd year\CLEANED EEP DATASET 2017\data last\Koka Plant.csv",
        'target_vars': ['water_level', 'total-pr', 'max_load', 'min_load'],
        'date_col': 'Date_GC'
    },
    'Tana Beles': {
        'path': r"C:\Users\hp\Documents\code for internship 3rd year\CLEANED EEP DATASET 2017\data last\Tana_Beles.csv",
        'target_vars': ['water_level', 'total-pr', 'max load', 'min load'],
        'date_col': 'Date_GC'
    },
    'Tekeze': {
        'path': r"C:\Users\hp\Documents\code for internship 3rd year\CLEANED EEP DATASET 2017\data last\Tekeze.csv",
        'target_vars': ['water_level', 'total-pr', 'max load', 'min load'],
        'date_col': 'Date_GC'
    },
    'Awash 2': {
        'path': r"C:\Users\hp\Documents\code for internship 3rd year\CLEANED EEP DATASET 2017\data last\Awash2.csv",
        'target_vars': ['water_level', 'total-pr', 'max load', 'min load'],
        'date_col': 'Date_GC'
    },
    'Awash 3': {
        'path': r"C:\Users\hp\Documents\code for internship 3rd year\CLEANED EEP DATASET 2017\data last\Awash3.csv",
        'target_vars': ['water_level', 'total-pr', 'max load', 'min load'],
        'date_col': 'Date_GC'
    },
    'Melka Wakena': {
        'path': r"C:\Users\hp\Documents\code for internship 3rd year\CLEANED EEP DATASET 2017\data last\wakena.csv",
        'target_vars': ['water_level', 'total-pr', 'max load', 'min load'],
        'date_col': 'Date_GC'
    },
    'Genale': {
        'path': r"C:\Users\hp\Documents\code for internship 3rd year\CLEANED EEP DATASET 2017\data last\Genale .csv",
        'target_vars': ['water_level', 'total-pr', 'max load', 'min load'],
        'date_col': 'Date_GC'
    }
}

ALLOWED_TARGET_KEYWORDS = ['level', 'pr', 'energy', 'discharge', 'auxiliary', 'load', 'u1', 'u2', 'u3', 'u4', 'u5', 'u6']

# Global cache
data_cache = {}
GLOBAL_BEST_RESULTS = {'R2_Test': -np.inf, 'Power Plant': 'N/A', 'Model': 'N/A', 'Split': 'N/A', 'Imputation Method': 'N/A'}
ALL_PLANTS_RESULTS = {}

# =================================================================================
# === CORE DATA LOADING & PREPROCESSING FUNCTIONS ===
# =================================================================================
def load_plant_data(plant_name):
    """Load and preprocess data for a specific power plant"""
    if plant_name not in POWER_PLANT_DATA_FULL_ANALYSIS:
        return pd.DataFrame(), None, "Plant not found"
    
    config = POWER_PLANT_DATA_FULL_ANALYSIS[plant_name]
    file_path = config['path']
    
    if not os.path.exists(file_path):
        return pd.DataFrame(), None, f"File not found: {file_path}"
    
    try:
        df = pd.read_csv(file_path)
        date_col = config['date_col']
        
        if date_col in df.columns:
            df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
            df = df.dropna(subset=[date_col]).sort_values(date_col).reset_index(drop=True)
        
        # Auto-detect target columns from hard-coded list first, then keywords
        target_candidates = []
        for target in config['target_vars']:
            if target in df.columns:
                target_candidates.append(target)
        
        if not target_candidates:
            target_candidates = [col for col in df.columns 
                               if any(keyword in col.lower() for keyword in ALLOWED_TARGET_KEYWORDS) 
                               and pd.api.types.is_numeric_dtype(df[col])]
        
        return df, date_col, target_candidates[0] if target_candidates else None
        
    except Exception as e:
        return pd.DataFrame(), None, f"Error loading data: {str(e)}"

def preprocess_data(df, target_col, imputation_method='median'):
    """Preprocess data with selected imputation method"""
    df_processed = df.copy()
    
    # Drop columns with >90% missing values
    missing_pct = df_processed.isnull().sum() / len(df_processed)
    cols_to_drop = missing_pct[missing_pct > 0.9].index
    df_processed.drop(columns=cols_to_drop, inplace=True, errors='ignore')
    
    # Impute missing values
    numeric_cols = df_processed.select_dtypes(include=[np.number]).columns
    for col in numeric_cols:
        if col == target_col:
            continue
        if imputation_method == 'mean':
            df_processed[col].fillna(df_processed[col].mean(), inplace=True)
        elif imputation_method == 'median':
            df_processed[col].fillna(df_processed[col].median(), inplace=True)
        elif imputation_method == 'auto':
            if abs(df_processed[col].skew()) > 1.0:
                df_processed[col].fillna(df_processed[col].median(), inplace=True)
            else:
                df_processed[col].fillna(df_processed[col].mean(), inplace=True)
        else:
            df_processed[col].fillna(0, inplace=True)
    
    # Impute target column
    if target_col in df_processed.columns:
        non_null_target = df_processed[target_col].dropna()
        if not non_null_target.empty:
            if imputation_method == 'mean':
                df_processed[target_col].fillna(non_null_target.mean(), inplace=True)
            elif imputation_method == 'median':
                df_processed[target_col].fillna(non_null_target.median(), inplace=True)
    
    return df_processed.dropna(subset=[target_col]).reset_index(drop=True)

# =================================================================================
# === MODEL TRAINING FUNCTION (FROM ORIGINAL JUPYTER CODE) ===
# =================================================================================
def run_regression_analysis_detailed(X, y, split_ratio):
    models = {
        'Decision Tree': DecisionTreeRegressor(max_depth=5, random_state=42),
        'SVR': SVR(kernel='rbf', C=1.0),
        'KNN': KNeighborsRegressor(n_neighbors=5),
        'XGBoost': XGBRegressor(n_estimators=30, learning_rate=0.1, max_depth=5, n_jobs=-1, random_state=42),
        'Random Forest': RandomForestRegressor(n_estimators=30, max_depth=10, n_jobs=-1, random_state=42),
        'Linear Regression': LinearRegression()
    }
    results = []
    
    if len(y) < 20:
        return pd.DataFrame()

    scaler_X = MinMaxScaler()
    X_normalized = pd.DataFrame(scaler_X.fit_transform(X), columns=X.columns, index=X.index)
    scaler_y = MinMaxScaler()
    y_normalized = pd.Series(scaler_y.fit_transform(y.values.reshape(-1, 1)).flatten(), index=y.index)

    X_train, X_test, y_train, y_test = train_test_split(
        X_normalized, y_normalized, test_size=1 - split_ratio, shuffle=False
    )
    
    if len(y_train) == 0 or len(y_test) == 0:
        return pd.DataFrame()
    
    for name, model in models.items():
        try:
            model.fit(X_train, y_train)
            y_train_pred = model.predict(X_train)
            y_test_pred = model.predict(X_test)
            
            results.append({
                'Model': name,
                'R2_Train': max(0, r2_score(y_train, y_train_pred)),
                'R2_Test': max(0, r2_score(y_test, y_test_pred)),
                'MAE_Train': mean_absolute_error(y_train, y_train_pred),
                'MAE_Test': mean_absolute_error(y_test, y_test_pred),
                'RMSE_Train': np.sqrt(mean_squared_error(y_train, y_train_pred)),
                'RMSE_Test': np.sqrt(mean_squared_error(y_test, y_test_pred)),
                'Corr_Train': pd.Series(y_train).corr(pd.Series(y_train_pred)) if np.var(y_train) > 0 and np.var(y_train_pred) > 0 else 0,
                'Corr_Test': pd.Series(y_test).corr(pd.Series(y_test_pred)) if np.var(y_test) > 0 and np.var(y_test_pred) > 0 else 0
            })
        except Exception as e:
            continue
    
    results_df = pd.DataFrame(results)
    if not results_df.empty:
        results_df['Rank (R2_Test)'] = results_df['R2_Test'].rank(method='min', ascending=False).astype(int)
        results_df = results_df.sort_values('R2_Test', ascending=False).reset_index(drop=True)
    
    return results_df

# =================================================================================
# === DASH APP INITIALIZATION ===
# =================================================================================
app = JupyterDash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
app.title = "🚀 Ethiopian Power Plant ML Dashboard - FULLY FUNCTIONAL"

# =================================================================================
# === MAIN DASHBOARD LAYOUT ===
# =================================================================================
app.layout = html.Div([
    # Header
    dbc.Container([
        html.H1("🚀 Ethiopian Power Plant ML Dashboard", 
                className="text-center mb-4", 
                style={'color': '#2c3e50', 'fontWeight': 'bold'}),
        html.P("Advanced Machine Learning Analysis with Auto-Target Detection & Model Comparison",
               className="text-center text-muted", style={'fontSize': '1.2rem'})
    ], className="mb-5"),
    
    # Main Controls Row
    dbc.Row([
        # Left Column - Plant & Target Selection
        dbc.Col([
            dbc.Card([
                dbc.CardHeader("🏭 Power Plant Selection", className="bg-primary text-white"),
                dbc.CardBody([
                    dcc.Dropdown(
                        id='plant-selector',
                        options=[{'label': plant, 'value': plant} for plant in POWER_PLANT_DATA_FULL_ANALYSIS.keys()],
                        value='Gibe 1',
                        clearable=False,
                        className="mb-3"
                    ),
                    html.Hr(),
                    html.H6("🎯 Auto-Detected Target Variables", className="text-success"),
                    html.Div(id='target-variables-display')
                ])
            ], className="mb-4")
        ], width=4),
        
        # Right Column - Analysis Controls
        dbc.Col([
            dbc.Card([
                dbc.CardHeader("⚙️ Analysis Configuration", className="bg-success text-white"),
                dbc.CardBody([
                    dcc.Dropdown(
                        id='imputation-selector',
                        options=[
                            {'label': 'Median (Recommended)', 'value': 'median'},
                            {'label': 'Mean', 'value': 'mean'},
                            {'label': 'Auto (Skew-Aware)', 'value': 'auto'}
                        ],
                        value='median',
                        clearable=False,
                        className="mb-3"
                    ),
                    dcc.Dropdown(
                        id='split-selector',
                        options=[
                            {'label': '70% Train / 30% Test', 'value': 0.7},
                            {'label': '80% Train / 20% Test', 'value': 0.8},
                            {'label': '90% Train / 10% Test', 'value': 0.9}
                        ],
                        value=0.8,
                        clearable=False
                    ),
                    dbc.Button("🚀 RUN ANALYSIS", id='run-analysis-btn', color="warning", className="mt-3 w-100", size="lg")
                ])
            ], className="mb-4")
        ], width=4),
        
        # Results Summary
        dbc.Col([
            dbc.Card([
                dbc.CardHeader("🥇 BEST MODEL RESULTS", className="bg-info text-white"),
                dbc.CardBody([
                    html.Div(id='best-model-results', className="text-center")
                ])
            ], className="mb-4")
        ], width=4)
    ], className="mb-5"),
    
    # Results Tabs
    dbc.Card([
        dbc.CardHeader([
            html.I(className="fas fa-chart-line me-2"),
            html.Span("📊 ANALYSIS RESULTS")
        ], className="bg-dark text-white"),
        dbc.CardBody([
            dbc.Tabs([
                dbc.Tab(label="📈 Model Performance", tab_id="models"),
                dbc.Tab(label="📋 All Plants Summary", tab_id="summary"),
                dbc.Tab(label="🔍 Data Preview", tab_id="data")
            ], id="results-tabs", active_tab="models"),
            html.Div(id="tab-content")
        ])
    ], className="mb-4"),
    
    # Global Best Results
    dbc.Card([
        dbc.CardHeader("🌟 GLOBAL BEST RESULT ACROSS ALL PLANTS", className="bg-danger text-white"),
        dbc.CardBody([
            html.Div(id='global-best-result')
        ])
    ])
], fluid=True)

# =================================================================================
# === MAIN CALLBACKS ===
# =================================================================================

@app.callback(
    Output('target-variables-display', 'children'),
    Input('plant-selector', 'value')
)
def update_target_display(plant_name):
    if not plant_name:
        return html.P("Select a power plant to see available targets", className="text-muted")
    
    df, date_col, default_target = load_plant_data(plant_name)
    config = POWER_PLANT_DATA_FULL_ANALYSIS.get(plant_name, {})
    targets = config.get('target_vars', [])
    
    if not targets:
        return html.P("No predefined targets found", className="text-warning")
    
    target_buttons = []
    for i, target in enumerate(targets):
        color = "success" if i == 0 else "secondary"
        target_buttons.append(
            dbc.Button(
                f"🎯 {target}",
                id={'type': 'target-btn', 'index': i},
                color=color,
                className="me-2 mb-2",
                size="sm"
            )
        )
    
    return html.Div([
        html.P(f"Default Target: <strong>{default_target}</strong>", className="text-primary fw-bold"),
        html.Div(target_buttons, className="mt-3")
    ])

@app.callback(
    Output('tab-content', 'children'),
    Output('best-model-results', 'children'),
    Output('global-best-result', 'children'),
    Input('run-analysis-btn', 'n_clicks'),
    State('plant-selector', 'value'),
    State('imputation-selector', 'value'),
    State('split-selector', 'value')
)
def run_full_analysis(n_clicks, plant_name, imputation_method, split_ratio):
    if not n_clicks or not plant_name:
        return (html.P("Click 'RUN ANALYSIS' to start", className="text-muted"), 
                html.P("No results yet", className="text-muted"), 
                html.P("No global best yet", className="text-muted"))
    
    # Load and preprocess data
    df_raw, date_col, default_target = load_plant_data(plant_name)
    if df_raw.empty or default_target is None:
        return (dbc.Alert("❌ No valid data or target found", color="danger"),
                html.P("", className="text-muted"),
                html.P("", className="text-muted"))
    
    df_processed = preprocess_data(df_raw, default_target, imputation_method)
    
    if len(df_processed) < 20:
        return (dbc.Alert("❌ Insufficient data for modeling", color="warning"),
                html.P("", className="text-muted"),
                html.P("", className="text-muted"))
    
    # Prepare features
    feature_cols = [col for col in df_processed.select_dtypes(include=np.number).columns 
                    if col != default_target]
    X = df_processed[feature_cols]
    y = df_processed[default_target]
    
    # Run model analysis
    results_df = run_regression_analysis_detailed(X, y, split_ratio)
    
    if results_df.empty:
        return (dbc.Alert("❌ No models could be trained", color="danger"),
                html.P("", className="text-muted"),
                html.P("", className="text-muted"))
    
    # Update global best
    global GLOBAL_BEST_RESULTS, ALL_PLANTS_RESULTS
    best_row = results_df.iloc[0]
    if best_row['R2_Test'] > GLOBAL_BEST_RESULTS['R2_Test']:
        GLOBAL_BEST_RESULTS = {
            'R2_Test': best_row['R2_Test'],
            'Power Plant': plant_name,
            'Model': best_row['Model'],
            'Split': f"{int(split_ratio*100)}% Train",
            'Imputation Method': imputation_method
        }
    
    ALL_PLANTS_RESULTS[plant_name] = {
        'Plant': plant_name,
        'Best Model': best_row['Model'],
        'R2 Test': best_row['R2_Test'],
        'Split': f"{int(split_ratio*100)}%",
        'Imputation': imputation_method
    }
    
    # Model results table
    model_table = dash_table.DataTable(
        data=results_df.round(4).to_dict('records'),
        columns=[{'name': i, 'id': i} for i in results_df.columns],
        page_size=10,
        style_cell={'textAlign': 'left', 'fontSize': '12px'},
        style_data_conditional=[
            {'if': {'row_index': 0}, 'backgroundColor': '#d4edda', 'color': 'darkgreen', 'fontWeight': 'bold'},
            {'if': {'column_id': 'R2_Test'}, 'fontWeight': 'bold'}
        ],
        style_header={'backgroundColor': '#007bff', 'color': 'white', 'fontWeight': 'bold'}
    )
    
    # Current plant best result
    best_result_card = dbc.Card([
        dbc.CardBody([
            html.H5(f"🥇 {best_row['Model']}", className="text-success"),
            html.H6(f"R² Test: {best_row['R2_Test']:.4f}", className="text-primary"),
            html.P(f"Train/Test Split: {int(split_ratio*100)}% / {100-int(split_ratio*100)}%"),
            html.P(f"Imputation: {imputation_method}")
        ])
    ], className="text-center")
    
    # Global best result
    global_best_card = dbc.Card([
        dbc.CardBody([
            html.H5(f"🌟 {GLOBAL_BEST_RESULTS['Model']}", className="text-danger"),
            html.H6(f"R² Test: {GLOBAL_BEST_RESULTS['R2_Test']:.4f}", className="text-warning"),
            html.P(f"Plant: {GLOBAL_BEST_RESULTS['Power Plant']}"),
            html.P(f"Split: {GLOBAL_BEST_RESULTS['Split']}")
        ])
    ], className="text-center")
    
    # All plants summary
    if ALL_PLANTS_RESULTS:
        summary_df = pd.DataFrame(list(ALL_PLANTS_RESULTS.values()))
        summary_table = dash_table.DataTable(
            data=summary_df.round(4).to_dict('records'),
            columns=[{'name': i, 'id': i} for i in summary_df.columns],
            page_size=10,
            style_cell_conditional=[
                {'if': {'column_id': 'R2 Test'}, 'fontWeight': 'bold'}
            ]
        )
    else:
        summary_table = html.P("No other plants analyzed yet")
    
    tab_content = html.Div([
        dbc.Tabs([
            dbc.Tab(label="📈 Model Performance", tab_id="models", children=[model_table]),
            dbc.Tab(label="📋 All Plants Summary", tab_id="summary", children=[summary_table]),
            dbc.Tab(label="🔍 Data Preview", tab_id="data", children=[
                html.H6(f"Plant: {plant_name} | Target: {default_target}"),
                dash_table.DataTable(
                    data=df_processed[[default_target] + feature_cols[:5]].head(10).round(2).to_dict('records'),
                    columns=[{'name': i, 'id': i} for i in df_processed[[default_target] + feature_cols[:5]].head(10).columns],
                    page_size=10
                )
            ])
        ])
    ])
    
    return tab_content, best_result_card, global_best_card

# Run the app
if __name__ == '__main__':
    app.run_server(mode='inline', debug=True, port=6051)

TypeError: The `html.Div` component (version 3.1.1) received an unexpected keyword argument: `fluid`
Allowed arguments: accessKey, aria-*, children, className, contentEditable, data-*, dir, disable_n_clicks, draggable, hidden, id, key, lang, n_clicks, n_clicks_timestamp, role, spellCheck, style, tabIndex, title