In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np
from scipy import stats
import warnings
import os
import gc
from datetime import datetime
import json
import logging

from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.holtwinters import ExponentialSmoothing

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error

warnings.filterwarnings('ignore')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['CMDSTAN_VERBOSE'] = 'false'

logging.getLogger('cmdstanpy').setLevel(logging.CRITICAL)
logging.getLogger('prophet').setLevel(logging.CRITICAL)
logging.getLogger('fbprophet').setLevel(logging.CRITICAL)
logging.getLogger('tensorflow').setLevel(logging.CRITICAL)

import tensorflow as tf
tf.get_logger().setLevel('ERROR')
tf.autograph.set_verbosity(0)

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
import tensorflow.keras.backend as K

try:
    from prophet import Prophet
    PROPHET_AVAILABLE = True
except ImportError:
    try:
        from fbprophet import Prophet
        PROPHET_AVAILABLE = True
    except ImportError:
        PROPHET_AVAILABLE = False

try:
    import pymannkendall as mk
    MK_AVAILABLE = True
except ImportError:
    MK_AVAILABLE = False

MIN_RECENT_YEAR = 2020
MIN_RECORDS = 24
MIN_YEARS_SPAN = 5
PREDICTION_YEAR = 2030

LSTM_LOOKBACK = 12
LSTM_EPOCHS = 150
LSTM_BATCH_SIZE = 16

SIGNIFICANCE_LEVEL = 0.05
CONFIDENCE_LEVEL = 0.95

output_folder = r"\assessment_of_wells_chile\data\DGA\Trend_Analysis_Predictions_Complete_v2"

for folder in ['Excel', 'Figures', 'Text_Output', 'Models', 'Individual_Wells']:
    path = os.path.join(output_folder, folder)
    if not os.path.exists(path):
        os.makedirs(path)

log_file_path = os.path.join(output_folder, 'Text_Output', 'process_log_realtime.txt')
checkpoint_file = os.path.join(output_folder, 'checkpoint_well_results.csv')
progress_file = os.path.join(output_folder, 'progress_tracker.json')

def load_progress():
    processed_wells = set()
    well_results = []
    if os.path.exists(progress_file):
        try:
            with open(progress_file, 'r') as f:
                progress_data = json.load(f)
                processed_wells = set(progress_data.get('processed_wells', []))
        except:
            processed_wells = set()
    if os.path.exists(checkpoint_file):
        try:
            df_checkpoint = pd.read_csv(checkpoint_file)
            well_results = df_checkpoint.to_dict('records')
            for result in well_results:
                processed_wells.add(result['Station_Code'])
        except:
            well_results = []
    return processed_wells, well_results

def save_progress(processed_wells, well_results, current_idx, total_wells):
    try:
        progress_data = {
            'processed_wells': list(processed_wells),
            'last_index': current_idx,
            'total_wells': total_wells,
            'last_update': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        }
        with open(progress_file, 'w') as f:
            json.dump(progress_data, f)
        if well_results:
            pd.DataFrame(well_results).to_csv(checkpoint_file, index=False)
    except Exception as e:
        print(f"Error saving progress: {e}")

def write_output(text):
    print(text)
    try:
        with open(log_file_path, 'a', encoding='utf-8') as f:
            f.write(text + "\n")
    except Exception as e:
        print(f"Error writing to log file: {e}")

if not os.path.exists(log_file_path):
    with open(log_file_path, 'w', encoding='utf-8') as f:
        f.write(f"STARTING PROCESS AT: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
        f.write("="*80 + "\n")
else:
    with open(log_file_path, 'a', encoding='utf-8') as f:
        f.write(f"\nRESUMING PROCESS AT: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
        f.write("="*80 + "\n")

path_piezometric = r"\assessment_of_wells_chile\data\DGA_aguas_subterraneas_12_2025\niveles_estaticos_pozos_historico\output\niveles_estaticos_todos.shp"

piez_codigo_col = 'COD_EST'
piez_nombre_col = 'NOM_EST'
piez_fecha_col = 'FECHA_US'
piez_nivel_col = 'NIVEL'
piez_lat_col = 'LAT_WGS84'
piez_lon_col = 'LON_WGS84'

def normalize_id_with_padding(id_value, target_length=8):
    id_str = str(id_value).strip()
    if '.' in id_str:
        id_str = id_str.split('.')[0]
    try:
        if 'e' in id_str.lower() or 'E' in id_str:
            id_str = str(int(float(id_str)))
    except:
        pass
    if id_str.isdigit():
        id_str = id_str.zfill(target_length)
    return id_str

def nash_sutcliffe_efficiency(observed, predicted):
    observed = np.array(observed).flatten()
    predicted = np.array(predicted).flatten()
    mask = ~(np.isnan(observed) | np.isnan(predicted))
    observed = observed[mask]
    predicted = predicted[mask]
    if len(observed) < 2:
        return np.nan
    numerator = np.sum((observed - predicted) ** 2)
    denominator = np.sum((observed - np.mean(observed)) ** 2)
    if denominator == 0:
        return np.nan
    return 1 - (numerator / denominator)

def parse_date(date_val):
    if pd.isna(date_val):
        return pd.NaT
    date_str = str(date_val).strip()
    formats = ['%d/%m/%Y', '%m/%d/%Y', '%Y-%m-%d', '%d-%m-%Y', '%Y/%m/%d',
               '%d/%m/%y', '%m/%d/%y', '%Y%m%d']
    for fmt in formats:
        try:
            return pd.to_datetime(date_str, format=fmt)
        except:
            continue
    try:
        return pd.to_datetime(date_str, dayfirst=True)
    except:
        try:
            return pd.to_datetime(date_str)
        except:
            return pd.NaT

def calculate_linear_trend(dates, values):
    try:
        if len(dates) < 5 or len(values) < 5:
            return None
        years = (dates - dates.min()).dt.days / 365.25
        slope, intercept, r_value, p_value, std_err = stats.linregress(years.values, values.values)
        n = len(years)
        t_critical = stats.t.ppf((1 + CONFIDENCE_LEVEL) / 2, n - 2)
        slope_ci_lower = slope - t_critical * std_err
        slope_ci_upper = slope + t_critical * std_err
        years_to_prediction = PREDICTION_YEAR - dates.max().year
        predicted_change = slope * years_to_prediction
        return {
            'method': 'Linear_OLS',
            'slope_m_per_year': slope,
            'intercept': intercept,
            'r_squared': r_value**2,
            'p_value': p_value,
            'std_error': std_err,
            'slope_ci_lower': slope_ci_lower,
            'slope_ci_upper': slope_ci_upper,
            'significant': p_value < SIGNIFICANCE_LEVEL,
            'trend_direction': 'Decreasing' if slope > 0 else 'Increasing',
            'predicted_change': predicted_change
        }
    except:
        return None

def mann_kendall_test(values):
    try:
        if not MK_AVAILABLE:
            return None
        values_clean = values.dropna()
        if len(values_clean) < 10:
            return None
        result = mk.original_test(values_clean)
        return {
            'method': 'Mann_Kendall',
            'trend': result.trend,
            'h': result.h,
            'p_value': result.p,
            'z_score': result.z,
            'tau': result.Tau,
            's': result.s,
            'significant': result.h,
            'trend_direction': 'Decreasing' if result.slope > 0 else 'Increasing' if result.slope < 0 else 'No Trend'
        }
    except:
        return None

def sens_slope_estimator(dates, values):
    try:
        if not MK_AVAILABLE:
            values_clean = values.dropna()
            dates_clean = dates[values.notna()]
            if len(values_clean) < 5:
                return None
            years = (dates_clean - dates_clean.min()).dt.days / 365.25
            slopes = []
            for i in range(len(years)):
                for j in range(i + 1, len(years)):
                    if years.iloc[j] != years.iloc[i]:
                        slope = (values_clean.iloc[j] - values_clean.iloc[i]) / (years.iloc[j] - years.iloc[i])
                        slopes.append(slope)
            if len(slopes) == 0:
                return None
            sen_slope = np.median(slopes)
        else:
            result = mk.original_test(values.dropna())
            sen_slope = result.slope
        years_to_prediction = PREDICTION_YEAR - dates.max().year
        predicted_change = sen_slope * years_to_prediction
        return {
            'method': 'Sens_Slope',
            'slope_m_per_year': sen_slope,
            'trend_direction': 'Decreasing' if sen_slope > 0 else 'Increasing',
            'predicted_change': predicted_change
        }
    except:
        return None

def seasonal_kendall_test(values, period=12):
    try:
        if not MK_AVAILABLE:
            return None
        values_clean = values.dropna()
        if len(values_clean) < period * 2:
            return None
        result = mk.seasonal_test(values_clean, period=period)
        return {
            'method': 'Seasonal_Kendall',
            'trend': result.trend,
            'h': result.h,
            'p_value': result.p,
            'z_score': result.z,
            'significant': result.h,
            'slope': result.slope,
            'trend_direction': 'Decreasing' if result.slope > 0 else 'Increasing' if result.slope < 0 else 'No Trend'
        }
    except:
        return None

def get_last_value(data):
    if data is None:
        return np.nan
    if isinstance(data, pd.Series):
        return float(data.iloc[-1]) if len(data) > 0 else np.nan
    elif isinstance(data, np.ndarray):
        return float(data[-1]) if len(data) > 0 else np.nan
    elif isinstance(data, (list, tuple)):
        return float(data[-1]) if len(data) > 0 else np.nan
    else:
        return float(data)

def fit_arima_model(series, forecast_periods=60):
    try:
        series = series.dropna().astype(float)
        if len(series) < 24:
            return None
        try:
            adf_result = adfuller(series, autolag='AIC')
            d = 0 if adf_result[1] < 0.05 else 1
        except:
            d = 1
        best_aic = np.inf
        best_model = None
        best_order = None
        orders_to_try = [
            (2, d, 2), (1, d, 1), (2, d, 1), (1, d, 2),
            (0, d, 1), (1, d, 0), (0, d, 2), (2, d, 0),
            (3, d, 1), (1, d, 3), (3, d, 2), (2, d, 3)
        ]
        for order in orders_to_try:
            try:
                model = ARIMA(series, order=order)
                fitted = model.fit()
                if fitted.aic < best_aic:
                    best_aic = fitted.aic
                    best_model = fitted
                    best_order = order
            except:
                continue
        if best_model is None:
            return None
        forecast_result = best_model.get_forecast(steps=forecast_periods)
        forecast = forecast_result.predicted_mean
        conf_int = forecast_result.conf_int(alpha=1-CONFIDENCE_LEVEL)
        in_sample_pred = best_model.fittedvalues
        rmse = np.sqrt(mean_squared_error(series[1:], in_sample_pred[1:]))
        mae = mean_absolute_error(series[1:], in_sample_pred[1:])
        nse = nash_sutcliffe_efficiency(series[1:], in_sample_pred[1:])
        forecast_values = forecast.values if hasattr(forecast, 'values') else np.array(forecast)
        forecast_lower_values = conf_int.iloc[:, 0].values if hasattr(conf_int.iloc[:, 0], 'values') else np.array(conf_int.iloc[:, 0])
        forecast_upper_values = conf_int.iloc[:, 1].values if hasattr(conf_int.iloc[:, 1], 'values') else np.array(conf_int.iloc[:, 1])
        return {
            'forecast': forecast_values,
            'forecast_lower': forecast_lower_values,
            'forecast_upper': forecast_upper_values,
            'aic': best_aic,
            'bic': best_model.bic,
            'order': best_order,
            'rmse': rmse,
            'mae': mae,
            'nse': nse
        }
    except:
        return None

def fit_holtwinters_model(series, forecast_periods=60, seasonal_periods=12):
    try:
        series = series.dropna().astype(float)
        if len(series) < seasonal_periods * 2:
            try:
                model = ExponentialSmoothing(
                    series,
                    trend='add',
                    seasonal=None
                )
                fitted = model.fit()
            except:
                return None
        else:
            try:
                model = ExponentialSmoothing(
                    series,
                    trend='add',
                    seasonal='add',
                    seasonal_periods=seasonal_periods
                )
                fitted = model.fit()
            except:
                try:
                    model = ExponentialSmoothing(
                        series,
                        trend='add',
                        seasonal=None
                    )
                    fitted = model.fit()
                except:
                    return None
        forecast = fitted.forecast(steps=forecast_periods)
        residual_std = fitted.resid.std()
        forecast_lower = forecast - 1.96 * residual_std
        forecast_upper = forecast + 1.96 * residual_std
        in_sample_pred = fitted.fittedvalues
        rmse = np.sqrt(mean_squared_error(series, in_sample_pred))
        mae = mean_absolute_error(series, in_sample_pred)
        nse = nash_sutcliffe_efficiency(series, in_sample_pred)
        forecast_values = forecast.values if hasattr(forecast, 'values') else np.array(forecast)
        forecast_lower_values = forecast_lower.values if hasattr(forecast_lower, 'values') else np.array(forecast_lower)
        forecast_upper_values = forecast_upper.values if hasattr(forecast_upper, 'values') else np.array(forecast_upper)
        return {
            'forecast': forecast_values,
            'forecast_lower': forecast_lower_values,
            'forecast_upper': forecast_upper_values,
            'aic': fitted.aic if hasattr(fitted, 'aic') else np.nan,
            'residual_std': residual_std,
            'rmse': rmse,
            'mae': mae,
            'nse': nse
        }
    except:
        return None

def fit_prophet_model(df_series, forecast_periods=60):
    try:
        if not PROPHET_AVAILABLE:
            return None
        df_prophet = df_series.reset_index()
        df_prophet.columns = ['ds', 'y']
        df_prophet['ds'] = pd.to_datetime(df_prophet['ds'])
        df_prophet = df_prophet.dropna()
        if len(df_prophet) < 24:
            return None
        
        logging.getLogger('cmdstanpy').disabled = True
        logging.getLogger('prophet').disabled = True
        logging.getLogger('stan').disabled = True
        
        model = Prophet(
            yearly_seasonality=True,
            weekly_seasonality=False,
            daily_seasonality=False,
            seasonality_mode='multiplicative',
            interval_width=CONFIDENCE_LEVEL
        )
        model.fit(df_prophet)
        last_date = df_prophet['ds'].max()
        future_dates = pd.date_range(start=last_date, periods=forecast_periods+1, freq='MS')[1:]
        future = pd.DataFrame({'ds': future_dates})
        forecast = model.predict(future)
        in_sample = model.predict(df_prophet[['ds']])
        rmse = np.sqrt(mean_squared_error(df_prophet['y'], in_sample['yhat']))
        mae = mean_absolute_error(df_prophet['y'], in_sample['yhat'])
        nse = nash_sutcliffe_efficiency(df_prophet['y'], in_sample['yhat'])
        return {
            'forecast': forecast['yhat'].values,
            'forecast_lower': forecast['yhat_lower'].values,
            'forecast_upper': forecast['yhat_upper'].values,
            'trend': forecast['trend'].values,
            'rmse': rmse,
            'mae': mae,
            'nse': nse
        }
    except:
        return None

def fit_lstm_model(series, forecast_periods=60, lookback=12):
    try:
        series = series.dropna().astype(float)
        if len(series) < lookback + 36:
            return None
        scaler = MinMaxScaler(feature_range=(0, 1))
        scaled_data = scaler.fit_transform(series.values.reshape(-1, 1))
        X, y = [], []
        for i in range(lookback, len(scaled_data)):
            X.append(scaled_data[i-lookback:i, 0])
            y.append(scaled_data[i, 0])
        X, y = np.array(X), np.array(y)
        X = np.reshape(X, (X.shape[0], X.shape[1], 1))
        if len(X) < 20:
            return None
        train_size = int(len(X) * 0.8)
        X_train, X_test = X[:train_size], X[train_size:]
        y_train, y_test = y[:train_size], y[train_size:]
        model = Sequential([
            LSTM(64, return_sequences=True, input_shape=(lookback, 1)),
            Dropout(0.2),
            LSTM(32, return_sequences=False),
            Dropout(0.2),
            Dense(16, activation='relu'),
            Dense(1)
        ])
        model.compile(
            optimizer=Adam(learning_rate=0.001),
            loss='mean_squared_error'
        )
        early_stop = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=0.0001)
        model.fit(
            X_train, y_train,
            batch_size=LSTM_BATCH_SIZE,
            epochs=LSTM_EPOCHS,
            validation_split=0.15,
            callbacks=[early_stop, reduce_lr],
            verbose=0
        )
        if len(X_test) > 0:
            predictions_test = model.predict(X_test, verbose=0)
            predictions_test = scaler.inverse_transform(predictions_test)
            y_test_inv = scaler.inverse_transform(y_test.reshape(-1, 1))
            rmse = np.sqrt(mean_squared_error(y_test_inv, predictions_test))
            mae = mean_absolute_error(y_test_inv, predictions_test)
            nse = nash_sutcliffe_efficiency(y_test_inv, predictions_test)
        else:
            rmse = np.nan
            mae = np.nan
            nse = np.nan
        last_sequence = scaled_data[-lookback:]
        forecasts = []
        current_seq = last_sequence.flatten()
        for _ in range(forecast_periods):
            input_seq = current_seq[-lookback:].reshape(1, lookback, 1)
            pred = model.predict(input_seq, verbose=0)[0, 0]
            forecasts.append(pred)
            current_seq = np.append(current_seq, pred)
        forecasts = scaler.inverse_transform(np.array(forecasts).reshape(-1, 1)).flatten()
        forecast_lower = forecasts - 1.96 * rmse if not np.isnan(rmse) else forecasts
        forecast_upper = forecasts + 1.96 * rmse if not np.isnan(rmse) else forecasts
        K.clear_session()
        del model
        return {
            'forecast': forecasts,
            'forecast_lower': forecast_lower,
            'forecast_upper': forecast_upper,
            'rmse': rmse,
            'mae': mae,
            'nse': nse
        }
    except:
        K.clear_session()
        return None

def fit_bilstm_model(series, forecast_periods=60, lookback=12):
    try:
        series = series.dropna().astype(float)
        if len(series) < lookback + 48:
            return None
        scaler = MinMaxScaler(feature_range=(0, 1))
        scaled_data = scaler.fit_transform(series.values.reshape(-1, 1))
        X, y = [], []
        for i in range(lookback, len(scaled_data)):
            X.append(scaled_data[i-lookback:i, 0])
            y.append(scaled_data[i, 0])
        X, y = np.array(X), np.array(y)
        X = np.reshape(X, (X.shape[0], X.shape[1], 1))
        if len(X) < 30:
            return None
        train_size = int(len(X) * 0.8)
        X_train, X_test = X[:train_size], X[train_size:]
        y_train, y_test = y[:train_size], y[train_size:]
        model = Sequential([
            Bidirectional(LSTM(64, return_sequences=True), input_shape=(lookback, 1)),
            Dropout(0.3),
            Bidirectional(LSTM(32)),
            Dropout(0.3),
            Dense(16, activation='relu'),
            Dense(1)
        ])
        model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
        early_stop = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)
        model.fit(
            X_train, y_train,
            batch_size=LSTM_BATCH_SIZE,
            epochs=LSTM_EPOCHS,
            validation_split=0.15,
            callbacks=[early_stop],
            verbose=0
        )
        if len(X_test) > 0:
            pred_test = model.predict(X_test, verbose=0)
            pred_test = scaler.inverse_transform(pred_test)
            y_test_inv = scaler.inverse_transform(y_test.reshape(-1, 1))
            rmse = np.sqrt(mean_squared_error(y_test_inv, pred_test))
            mae = mean_absolute_error(y_test_inv, pred_test)
            nse = nash_sutcliffe_efficiency(y_test_inv, pred_test)
        else:
            rmse = np.nan
            mae = np.nan
            nse = np.nan
        last_seq = scaled_data[-lookback:]
        forecasts = []
        current_seq = last_seq.flatten()
        for _ in range(forecast_periods):
            input_seq = current_seq[-lookback:].reshape(1, lookback, 1)
            pred = model.predict(input_seq, verbose=0)[0, 0]
            forecasts.append(pred)
            current_seq = np.append(current_seq, pred)
        forecasts = scaler.inverse_transform(np.array(forecasts).reshape(-1, 1)).flatten()
        forecast_lower = forecasts - 1.96 * rmse if not np.isnan(rmse) else forecasts
        forecast_upper = forecasts + 1.96 * rmse if not np.isnan(rmse) else forecasts
        K.clear_session()
        del model
        return {
            'forecast': forecasts,
            'forecast_lower': forecast_lower,
            'forecast_upper': forecast_upper,
            'rmse': rmse,
            'mae': mae,
            'nse': nse
        }
    except:
        K.clear_session()
        return None

def calculate_ensemble_prediction(predictions_dict):
    try:
        valid_predictions = {}
        valid_uncertainties = {}
        for model_name, pred_data in predictions_dict.items():
            if pred_data is not None and 'forecast' in pred_data:
                forecast = pred_data['forecast']
                final_value = get_last_value(forecast)
                if not np.isnan(final_value):
                    valid_predictions[model_name] = final_value
                    if 'forecast_lower' in pred_data and 'forecast_upper' in pred_data:
                        lower = get_last_value(pred_data['forecast_lower'])
                        upper = get_last_value(pred_data['forecast_upper'])
                        if not np.isnan(lower) and not np.isnan(upper):
                            valid_uncertainties[model_name] = (upper - lower) / 2
        if len(valid_predictions) == 0:
            return None
        values = list(valid_predictions.values())
        ensemble_mean = np.mean(values)
        ensemble_std = np.std(values) if len(values) > 1 else 0
        if valid_uncertainties:
            combined_uncertainty = np.sqrt(np.sum([u**2 for u in valid_uncertainties.values()])) / len(valid_uncertainties)
            total_uncertainty = np.sqrt(ensemble_std**2 + combined_uncertainty**2)
        else:
            total_uncertainty = ensemble_std
        return {
            'ensemble_mean': ensemble_mean,
            'ensemble_std': ensemble_std,
            'ensemble_lower': ensemble_mean - 1.96 * total_uncertainty,
            'ensemble_upper': ensemble_mean + 1.96 * total_uncertainty,
            'n_models': len(valid_predictions),
            'model_predictions': valid_predictions
        }
    except:
        return None

def process_single_well(well_dict, df_piez_subset, piez_codigo_col_local):
    try:
        station_code = well_dict['Station_Code']
        
        well_data = df_piez_subset[df_piez_subset[piez_codigo_col_local] == station_code].copy()
        well_data = well_data.sort_values('Date')
        
        if len(well_data) < 10:
            return None
        
        monthly_data = well_data.groupby('YearMonth').agg({
            'WaterLevel_m': 'mean',
            'Date': 'first'
        }).reset_index()
        monthly_data['Date'] = monthly_data['YearMonth'].dt.to_timestamp()
        monthly_data = monthly_data.sort_values('Date')
        monthly_data = monthly_data.set_index('Date')
        
        result = {
            'Station_Code': station_code,
            'Station_Name': well_dict['Station_Name'],
            'Latitude': well_dict['Latitude'],
            'Longitude': well_dict['Longitude'],
            'N_Records': int(well_dict['N_Records']),
            'N_Monthly_Records': len(monthly_data),
            'Year_Start': int(well_dict['Year_Start']),
            'Year_End': int(well_dict['Year_End']),
            'Years_Span': int(well_dict['Years_Span']),
            'Records_Per_Year': float(well_dict['Records_Per_Year']),
            'WL_Mean': float(well_dict['WL_Mean']),
            'WL_Std': float(well_dict['WL_Std']) if pd.notna(well_dict['WL_Std']) else np.nan,
            'WL_Min': float(well_dict['WL_Min']),
            'WL_Max': float(well_dict['WL_Max']),
            'WL_Current': float(monthly_data['WaterLevel_m'].iloc[-1]) if len(monthly_data) > 0 else np.nan,
            'WL_First': float(monthly_data['WaterLevel_m'].iloc[0]) if len(monthly_data) > 0 else np.nan,
            'Total_Change_m': float(monthly_data['WaterLevel_m'].iloc[-1] - monthly_data['WaterLevel_m'].iloc[0]) if len(monthly_data) > 1 else np.nan
        }
        
        linear_result = calculate_linear_trend(well_data['Date'], well_data['WaterLevel_m'])
        if linear_result:
            result.update({
                'Linear_Slope_m_yr': linear_result['slope_m_per_year'],
                'Linear_R2': linear_result['r_squared'],
                'Linear_PValue': linear_result['p_value'],
                'Linear_StdErr': linear_result['std_error'],
                'Linear_CI_Lower': linear_result['slope_ci_lower'],
                'Linear_CI_Upper': linear_result['slope_ci_upper'],
                'Linear_Significant': linear_result['significant'],
                'Linear_Trend': linear_result['trend_direction']
            })
        else:
            result.update({
                'Linear_Slope_m_yr': np.nan, 'Linear_R2': np.nan, 'Linear_PValue': np.nan,
                'Linear_StdErr': np.nan, 'Linear_CI_Lower': np.nan, 'Linear_CI_Upper': np.nan,
                'Linear_Significant': False, 'Linear_Trend': 'Unknown'
            })
        
        mk_result = mann_kendall_test(monthly_data['WaterLevel_m'])
        if mk_result:
            result.update({
                'MK_Trend': mk_result['trend'],
                'MK_PValue': mk_result['p_value'],
                'MK_ZScore': mk_result['z_score'],
                'MK_Tau': mk_result['tau'],
                'MK_Significant': mk_result['significant'],
                'MK_Direction': mk_result['trend_direction']
            })
        else:
            result.update({
                'MK_Trend': 'N/A', 'MK_PValue': np.nan, 'MK_ZScore': np.nan,
                'MK_Tau': np.nan, 'MK_Significant': False, 'MK_Direction': 'Unknown'
            })
        
        sens_result = sens_slope_estimator(well_data['Date'], well_data['WaterLevel_m'])
        if sens_result:
            result.update({
                'Sens_Slope_m_yr': sens_result['slope_m_per_year'],
                'Sens_Trend': sens_result['trend_direction'],
                'Sens_Predicted_Change': sens_result['predicted_change']
            })
        else:
            result.update({
                'Sens_Slope_m_yr': np.nan, 'Sens_Trend': 'Unknown', 'Sens_Predicted_Change': np.nan
            })
        
        if len(monthly_data) >= 24:
            sk_result = seasonal_kendall_test(monthly_data['WaterLevel_m'])
            if sk_result:
                result.update({
                    'SK_Trend': sk_result['trend'],
                    'SK_PValue': sk_result['p_value'],
                    'SK_Significant': sk_result['significant']
                })
            else:
                result.update({'SK_Trend': 'N/A', 'SK_PValue': np.nan, 'SK_Significant': False})
        else:
            result.update({'SK_Trend': 'N/A', 'SK_PValue': np.nan, 'SK_Significant': False})
        
        trends = []
        if result.get('Linear_Trend') in ['Decreasing', 'Increasing']:
            trends.append(result['Linear_Trend'])
        if result.get('MK_Direction') in ['Decreasing', 'Increasing']:
            trends.append(result['MK_Direction'])
        if result.get('Sens_Trend') in ['Decreasing', 'Increasing']:
            trends.append(result['Sens_Trend'])
        
        if trends:
            result['Consensus_Trend'] = max(set(trends), key=trends.count)
        else:
            result['Consensus_Trend'] = 'Unknown'
        
        last_date = monthly_data.index.max()
        target_date = pd.Timestamp(f'{PREDICTION_YEAR}-12-31')
        months_to_forecast = max(1, (target_date.year - last_date.year) * 12 + (target_date.month - last_date.month))
        
        predictions = {}
        
        arima_result = fit_arima_model(monthly_data['WaterLevel_m'], months_to_forecast)
        if arima_result:
            predictions['ARIMA'] = arima_result
            result.update({
                'ARIMA_Pred_2030': float(arima_result['forecast'][-1]),
                'ARIMA_Lower_2030': float(arima_result['forecast_lower'][-1]),
                'ARIMA_Upper_2030': float(arima_result['forecast_upper'][-1]),
                'ARIMA_Order': str(arima_result['order']),
                'ARIMA_AIC': float(arima_result['aic']),
                'ARIMA_RMSE': float(arima_result['rmse']),
                'ARIMA_MAE': float(arima_result['mae']),
                'ARIMA_NSE': float(arima_result['nse'])
            })
        else:
            result.update({
                'ARIMA_Pred_2030': np.nan, 'ARIMA_Lower_2030': np.nan, 'ARIMA_Upper_2030': np.nan,
                'ARIMA_Order': 'Failed', 'ARIMA_AIC': np.nan, 'ARIMA_RMSE': np.nan,
                'ARIMA_MAE': np.nan, 'ARIMA_NSE': np.nan
            })
        
        hw_result = fit_holtwinters_model(monthly_data['WaterLevel_m'], months_to_forecast)
        if hw_result:
            predictions['HoltWinters'] = hw_result
            result.update({
                'HW_Pred_2030': float(hw_result['forecast'][-1]),
                'HW_Lower_2030': float(hw_result['forecast_lower'][-1]),
                'HW_Upper_2030': float(hw_result['forecast_upper'][-1]),
                'HW_RMSE': float(hw_result['rmse']),
                'HW_MAE': float(hw_result['mae']),
                'HW_NSE': float(hw_result['nse'])
            })
        else:
            result.update({
                'HW_Pred_2030': np.nan, 'HW_Lower_2030': np.nan, 'HW_Upper_2030': np.nan,
                'HW_RMSE': np.nan, 'HW_MAE': np.nan, 'HW_NSE': np.nan
            })
        
        if PROPHET_AVAILABLE and len(monthly_data) >= 24:
            prophet_result = fit_prophet_model(monthly_data['WaterLevel_m'], months_to_forecast)
            if prophet_result:
                predictions['Prophet'] = prophet_result
                result.update({
                    'Prophet_Pred_2030': float(prophet_result['forecast'][-1]),
                    'Prophet_Lower_2030': float(prophet_result['forecast_lower'][-1]),
                    'Prophet_Upper_2030': float(prophet_result['forecast_upper'][-1]),
                    'Prophet_RMSE': float(prophet_result['rmse']),
                    'Prophet_MAE': float(prophet_result['mae']),
                    'Prophet_NSE': float(prophet_result['nse'])
                })
            else:
                result.update({
                    'Prophet_Pred_2030': np.nan, 'Prophet_Lower_2030': np.nan, 'Prophet_Upper_2030': np.nan,
                    'Prophet_RMSE': np.nan, 'Prophet_MAE': np.nan, 'Prophet_NSE': np.nan
                })
        else:
            result.update({
                'Prophet_Pred_2030': np.nan, 'Prophet_Lower_2030': np.nan, 'Prophet_Upper_2030': np.nan,
                'Prophet_RMSE': np.nan, 'Prophet_MAE': np.nan, 'Prophet_NSE': np.nan
            })
        
        if len(monthly_data) >= 48:
            lstm_result = fit_lstm_model(monthly_data['WaterLevel_m'], months_to_forecast, LSTM_LOOKBACK)
            if lstm_result:
                predictions['LSTM'] = lstm_result
                result.update({
                    'LSTM_Pred_2030': float(lstm_result['forecast'][-1]),
                    'LSTM_Lower_2030': float(lstm_result['forecast_lower'][-1]),
                    'LSTM_Upper_2030': float(lstm_result['forecast_upper'][-1]),
                    'LSTM_RMSE': float(lstm_result['rmse']) if not np.isnan(lstm_result['rmse']) else np.nan,
                    'LSTM_MAE': float(lstm_result['mae']) if not np.isnan(lstm_result['mae']) else np.nan,
                    'LSTM_NSE': float(lstm_result['nse']) if not np.isnan(lstm_result['nse']) else np.nan
                })
            else:
                result.update({
                    'LSTM_Pred_2030': np.nan, 'LSTM_Lower_2030': np.nan, 'LSTM_Upper_2030': np.nan,
                    'LSTM_RMSE': np.nan, 'LSTM_MAE': np.nan, 'LSTM_NSE': np.nan
                })
        else:
            result.update({
                'LSTM_Pred_2030': np.nan, 'LSTM_Lower_2030': np.nan, 'LSTM_Upper_2030': np.nan,
                'LSTM_RMSE': np.nan, 'LSTM_MAE': np.nan, 'LSTM_NSE': np.nan
            })
        
        if len(monthly_data) >= 60:
            bilstm_result = fit_bilstm_model(monthly_data['WaterLevel_m'], months_to_forecast, LSTM_LOOKBACK)
            if bilstm_result:
                predictions['BiLSTM'] = bilstm_result
                result.update({
                    'BiLSTM_Pred_2030': float(bilstm_result['forecast'][-1]),
                    'BiLSTM_Lower_2030': float(bilstm_result['forecast_lower'][-1]),
                    'BiLSTM_Upper_2030': float(bilstm_result['forecast_upper'][-1]),
                    'BiLSTM_RMSE': float(bilstm_result['rmse']) if not np.isnan(bilstm_result['rmse']) else np.nan,
                    'BiLSTM_MAE': float(bilstm_result['mae']) if not np.isnan(bilstm_result['mae']) else np.nan,
                    'BiLSTM_NSE': float(bilstm_result['nse']) if not np.isnan(bilstm_result['nse']) else np.nan
                })
            else:
                result.update({
                    'BiLSTM_Pred_2030': np.nan, 'BiLSTM_Lower_2030': np.nan, 'BiLSTM_Upper_2030': np.nan,
                    'BiLSTM_RMSE': np.nan, 'BiLSTM_MAE': np.nan, 'BiLSTM_NSE': np.nan
                })
        else:
            result.update({
                'BiLSTM_Pred_2030': np.nan, 'BiLSTM_Lower_2030': np.nan, 'BiLSTM_Upper_2030': np.nan,
                'BiLSTM_RMSE': np.nan, 'BiLSTM_MAE': np.nan, 'BiLSTM_NSE': np.nan
            })
        
        ensemble = calculate_ensemble_prediction(predictions)
        if ensemble:
            result.update({
                'Ensemble_Pred_2030': float(ensemble['ensemble_mean']),
                'Ensemble_Lower_2030': float(ensemble['ensemble_lower']),
                'Ensemble_Upper_2030': float(ensemble['ensemble_upper']),
                'Ensemble_Std': float(ensemble['ensemble_std']),
                'Ensemble_N_Models': int(ensemble['n_models'])
            })
        else:
            result.update({
                'Ensemble_Pred_2030': np.nan, 'Ensemble_Lower_2030': np.nan,
                'Ensemble_Upper_2030': np.nan, 'Ensemble_Std': np.nan, 'Ensemble_N_Models': 0
            })
        
        current_level = result['WL_Current']
        for model in ['ARIMA', 'HW', 'Prophet', 'LSTM', 'BiLSTM', 'Ensemble']:
            pred_col = f'{model}_Pred_2030'
            change_col = f'{model}_Change_2030'
            if pred_col in result and not np.isnan(result.get(pred_col, np.nan)):
                result[change_col] = float(result[pred_col] - current_level)
            else:
                result[change_col] = np.nan
        
        return result
    
    except Exception as e:
        print(f"Error processing well: {e}")
        return None

write_output("="*80)
write_output("GROUNDWATER LEVEL TREND ANALYSIS AND PREDICTION")
write_output("="*80)
write_output(f"Analysis started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

processed_wells, well_results = load_progress()
if len(processed_wells) > 0:
    write_output(f"\nRESUMING FROM PREVIOUS RUN: {len(processed_wells)} wells already processed")

write_output("\n" + "="*80)
write_output("SECTION 1: DATA LOADING AND PREPROCESSING")
write_output("="*80)

write_output("\nLoading piezometric data...")
try:
    gdf_piezometric = gpd.read_file(path_piezometric)
    write_output(f"   Loaded {len(gdf_piezometric):,} piezometric records")
    write_output(f"   Detected CRS: {gdf_piezometric.crs}")
except Exception as e:
    write_output(f"   Error loading piezometric data: {e}")
    raise

write_output("\n" + "-"*60)
write_output("DATA PREPROCESSING")
write_output("-"*60)

df_piez = gdf_piezometric.copy()
df_piez['Date'] = df_piez[piez_fecha_col].apply(parse_date)
df_piez['WaterLevel_m'] = pd.to_numeric(df_piez[piez_nivel_col], errors='coerce')

if piez_lat_col in df_piez.columns and piez_lon_col in df_piez.columns:
    df_piez['Latitude'] = pd.to_numeric(df_piez[piez_lat_col], errors='coerce')
    df_piez['Longitude'] = pd.to_numeric(df_piez[piez_lon_col], errors='coerce')
else:
    df_piez['Longitude'] = df_piez.geometry.x
    df_piez['Latitude'] = df_piez.geometry.y

if piez_nombre_col in df_piez.columns:
    df_piez['Station_Name'] = df_piez[piez_nombre_col]
else:
    df_piez['Station_Name'] = df_piez[piez_codigo_col]

write_output("Cleaning data...")
initial_count = len(df_piez)

df_piez_clean = df_piez.dropna(subset=['Date', 'WaterLevel_m']).copy()
write_output(f"After removing null dates/levels: {len(df_piez_clean):,}")

df_piez_clean = df_piez_clean[
    (df_piez_clean['Latitude'] >= -56) & (df_piez_clean['Latitude'] <= -17) &
    (df_piez_clean['Longitude'] >= -76) & (df_piez_clean['Longitude'] <= -66)
]
write_output(f"After geographic filtering: {len(df_piez_clean):,}")

df_piez_clean = df_piez_clean[
    (df_piez_clean['WaterLevel_m'] > 0) &
    (df_piez_clean['WaterLevel_m'] < 500)
]
write_output(f"After water level filtering: {len(df_piez_clean):,}")

df_piez_clean['Year'] = df_piez_clean['Date'].dt.year
df_piez_clean['Month'] = df_piez_clean['Date'].dt.month
df_piez_clean['YearMonth'] = df_piez_clean['Date'].dt.to_period('M')
df_piez_clean['DayOfYear'] = df_piez_clean['Date'].dt.dayofyear

write_output(f"\nTotal cleaned records: {len(df_piez_clean):,} ({len(df_piez_clean)/initial_count*100:.1f}% retained)")
write_output(f"Date range: {df_piez_clean['Date'].min().strftime('%Y-%m-%d')} to {df_piez_clean['Date'].max().strftime('%Y-%m-%d')}")

write_output("\n" + "-"*60)
write_output("ID NORMALIZATION")
write_output("-"*60)

id_lengths = df_piez_clean[piez_codigo_col].astype(str).str.strip().str.len().value_counts()
target_length = id_lengths.idxmax()
write_output(f"ID length distribution: {id_lengths.to_dict()}")
write_output(f"Using target length: {target_length}")

df_piez_clean['ID_Normalized'] = df_piez_clean[piez_codigo_col].apply(
    lambda x: normalize_id_with_padding(x, target_length)
)

unique_before = df_piez_clean[piez_codigo_col].nunique()
unique_after = df_piez_clean['ID_Normalized'].nunique()
write_output(f"Unique IDs before normalization: {unique_before:,}")
write_output(f"Unique IDs after normalization: {unique_after:,}")

piez_codigo_col = 'ID_Normalized'

write_output("ID normalization complete")

write_output("\n" + "-"*60)
write_output("FILTERING WELLS WITH RECENT DATA")
write_output("-"*60)

write_output("Calculating well statistics...")

well_stats = df_piez_clean.groupby(piez_codigo_col).agg({
    'Date': ['min', 'max', 'count'],
    'Year': ['min', 'max', 'nunique'],
    'Latitude': 'first',
    'Longitude': 'first',
    'Station_Name': 'first',
    'WaterLevel_m': ['mean', 'std', 'min', 'max', 'median']
}).reset_index()

well_stats.columns = [
    'Station_Code', 'Date_Start', 'Date_End', 'N_Records',
    'Year_Start', 'Year_End', 'N_Years_With_Data',
    'Latitude', 'Longitude', 'Station_Name',
    'WL_Mean', 'WL_Std', 'WL_Min', 'WL_Max', 'WL_Median'
]

well_stats['Years_Span'] = well_stats['Year_End'] - well_stats['Year_Start']
well_stats['Records_Per_Year'] = well_stats['N_Records'] / (well_stats['Years_Span'] + 1)
well_stats['Data_Completeness'] = well_stats['N_Years_With_Data'] / (well_stats['Years_Span'] + 1)

write_output(f"\nTotal unique wells in database: {len(well_stats):,}")

write_output(f"\nDistribution of wells by last year of data:")
year_dist = well_stats.groupby('Year_End').size()
for year in sorted(year_dist.index)[-10:]:
    write_output(f"   {year}: {year_dist[year]:,} wells")

write_output(f"\nApplying filters:")
write_output(f"   - Data extending to >= {MIN_RECENT_YEAR}")
write_output(f"   - Minimum {MIN_RECORDS} records")
write_output(f"   - Minimum {MIN_YEARS_SPAN} years span")

wells_filtered = well_stats[
    (well_stats['Year_End'] >= MIN_RECENT_YEAR) &
    (well_stats['N_Records'] >= MIN_RECORDS) &
    (well_stats['Years_Span'] >= MIN_YEARS_SPAN)
].copy()

write_output(f"\nWells meeting all criteria: {len(wells_filtered):,}")

if len(wells_filtered) < 10:
    write_output("\nWARNING: Too few wells meet criteria. Relaxing constraints...")
    relaxation_levels = [
        (2018, 20, 4),
        (2016, 15, 3),
        (2015, 12, 3),
        (2010, 10, 2)
    ]
    for min_year, min_rec, min_span in relaxation_levels:
        wells_filtered = well_stats[
            (well_stats['Year_End'] >= min_year) &
            (well_stats['N_Records'] >= min_rec) &
            (well_stats['Years_Span'] >= min_span)
        ].copy()
        if len(wells_filtered) >= 10:
            write_output(f"Using relaxed criteria: year>={min_year}, records>={min_rec}, span>={min_span}")
            write_output(f"Wells meeting relaxed criteria: {len(wells_filtered):,}")
            MIN_RECENT_YEAR = min_year
            MIN_RECORDS = min_rec
            MIN_YEARS_SPAN = min_span
            break

if len(wells_filtered) == 0:
    write_output("ERROR: No wells meet any criteria. Please check data quality.")
    wells_filtered = well_stats[well_stats['N_Records'] >= 5].copy()
    write_output(f"Using all wells with >= 5 records: {len(wells_filtered):,}")

write_output("\n" + "="*80)
write_output("SECTION 2: INDIVIDUAL WELL ANALYSIS")
write_output("="*80)

total_wells = len(wells_filtered)
successful_analyses = len([r for r in well_results if 'Station_Code' in r])
failed_analyses = 0
skipped_wells = 0

write_output(f"\nAnalyzing {total_wells} wells with multiple models...")
write_output(f"Previously processed: {len(processed_wells)} wells")

wells_to_process = []
for _, well in wells_filtered.iterrows():
    station_code = well['Station_Code']
    if station_code not in processed_wells:
        wells_to_process.append(well.to_dict())
    else:
        skipped_wells += 1

write_output(f"Wells to process: {len(wells_to_process)}")
write_output(f"Wells skipped (already processed): {skipped_wells}")

if len(wells_to_process) > 0:
    df_piez_subset = df_piez_clean[[piez_codigo_col, 'Date', 'WaterLevel_m', 'YearMonth']].copy()
    
    write_output("\nProcessing wells...")
    
    for idx, well in enumerate(wells_to_process):
        station_code = well['Station_Code']
        
        if (idx + 1) % 10 == 0 or idx == 0:
            pct = (idx + 1) / len(wells_to_process) * 100
            write_output(f"Progress: {idx + 1}/{len(wells_to_process)} ({pct:.1f}%) - Current: {station_code}")
        
        try:
            result = process_single_well(well, df_piez_subset, piez_codigo_col)
            
            if result is not None:
                well_results.append(result)
                processed_wells.add(station_code)
                successful_analyses += 1
            else:
                failed_analyses += 1
                processed_wells.add(station_code)
        
        except Exception as e:
            write_output(f"Error processing {station_code}: {e}")
            failed_analyses += 1
            processed_wells.add(station_code)
        
        if (idx + 1) % 10 == 0:
            save_progress(processed_wells, well_results, idx + 1, len(wells_to_process))
            gc.collect()
            K.clear_session()

save_progress(processed_wells, well_results, len(wells_to_process), len(wells_to_process))

df_well_results = pd.DataFrame(well_results)

write_output(f"\nAnalysis complete!")
write_output(f"Successfully analyzed: {successful_analyses:,} wells")
write_output(f"Failed analyses: {failed_analyses:,} wells")
write_output(f"Skipped (already processed): {skipped_wells:,} wells")

write_output(f"\nModel availability summary:")
write_output(f"   - Linear trend: {df_well_results['Linear_Slope_m_yr'].notna().sum():,} wells")
write_output(f"   - Mann-Kendall: {(df_well_results['MK_Trend'] != 'N/A').sum():,} wells")
write_output(f"   - Sen's Slope: {df_well_results['Sens_Slope_m_yr'].notna().sum():,} wells")
write_output(f"   - ARIMA: {df_well_results['ARIMA_Pred_2030'].notna().sum():,} wells")
write_output(f"   - Holt-Winters: {df_well_results['HW_Pred_2030'].notna().sum():,} wells")
write_output(f"   - Prophet: {df_well_results['Prophet_Pred_2030'].notna().sum():,} wells")
write_output(f"   - LSTM: {df_well_results['LSTM_Pred_2030'].notna().sum():,} wells")
write_output(f"   - BiLSTM: {df_well_results['BiLSTM_Pred_2030'].notna().sum():,} wells")
write_output(f"   - Ensemble: {df_well_results['Ensemble_Pred_2030'].notna().sum():,} wells")

write_output("\n" + "="*80)
write_output("SECTION 3: SUMMARY STATISTICS")
write_output("="*80)

total_wells_analyzed = len(df_well_results)
wells_with_decreasing_linear = len(df_well_results[df_well_results['Linear_Trend'] == 'Decreasing'])
wells_with_decreasing_mk = len(df_well_results[df_well_results['MK_Direction'] == 'Decreasing'])
wells_with_decreasing_consensus = len(df_well_results[df_well_results['Consensus_Trend'] == 'Decreasing'])
wells_with_significant_linear = len(df_well_results[df_well_results['Linear_Significant'] == True])
wells_with_significant_mk = len(df_well_results[df_well_results['MK_Significant'] == True])

write_output(f"\nOVERALL STATISTICS:")
write_output(f"Total wells analyzed: {total_wells_analyzed:,}")
write_output(f"\nTrend Detection Results:")
write_output(f"Linear Regression - Decreasing: {wells_with_decreasing_linear:,} ({wells_with_decreasing_linear/total_wells_analyzed*100:.1f}%)")
write_output(f"Linear Regression - Significant: {wells_with_significant_linear:,} ({wells_with_significant_linear/total_wells_analyzed*100:.1f}%)")
write_output(f"Mann-Kendall - Decreasing: {wells_with_decreasing_mk:,} ({wells_with_decreasing_mk/total_wells_analyzed*100:.1f}%)")
write_output(f"Mann-Kendall - Significant: {wells_with_significant_mk:,} ({wells_with_significant_mk/total_wells_analyzed*100:.1f}%)")
write_output(f"Consensus - Decreasing: {wells_with_decreasing_consensus:,} ({wells_with_decreasing_consensus/total_wells_analyzed*100:.1f}%)")

valid_linear_slopes = df_well_results['Linear_Slope_m_yr'].dropna()
valid_sens_slopes = df_well_results['Sens_Slope_m_yr'].dropna()

write_output(f"\nTREND MAGNITUDE STATISTICS:")
write_output(f"Linear OLS Trend (n={len(valid_linear_slopes)}):")
write_output(f"   Mean: {valid_linear_slopes.mean():.4f} m/year")
write_output(f"   Median: {valid_linear_slopes.median():.4f} m/year")
write_output(f"   Std Dev: {valid_linear_slopes.std():.4f} m/year")

write_output(f"\nSen's Slope (robust, n={len(valid_sens_slopes)}):")
write_output(f"   Mean: {valid_sens_slopes.mean():.4f} m/year")
write_output(f"   Median: {valid_sens_slopes.median():.4f} m/year")

write_output(f"\nMODEL PERFORMANCE SUMMARY (NSE):")
for model in ['ARIMA', 'HW', 'Prophet', 'LSTM', 'BiLSTM']:
    nse_col = f'{model}_NSE'
    if nse_col in df_well_results.columns:
        valid_nse = df_well_results[nse_col].dropna()
        if len(valid_nse) > 0:
            write_output(f"   {model}: Mean NSE = {valid_nse.mean():.3f}, Median NSE = {valid_nse.median():.3f} (n={len(valid_nse)})")

write_output(f"\nPREDICTION SUMMARY (to {PREDICTION_YEAR}):")
for model_name in ['ARIMA', 'HW', 'Prophet', 'LSTM', 'BiLSTM', 'Ensemble']:
    change_col = f'{model_name}_Change_2030'
    if change_col in df_well_results.columns:
        valid_preds = df_well_results[change_col].dropna()
        if len(valid_preds) > 0:
            write_output(f"\n{model_name} (n={len(valid_preds)}):")
            write_output(f"   Mean change: {valid_preds.mean():+.2f} m")
            write_output(f"   Median change: {valid_preds.median():+.2f} m")

write_output("\n" + "="*80)
write_output("SECTION 4: SAVING RESULTS")
write_output("="*80)

excel_path = os.path.join(output_folder, 'Excel', 'Groundwater_Trend_Analysis_Complete.xlsx')

write_output(f"\nSaving Excel file: {os.path.basename(excel_path)}")

with pd.ExcelWriter(excel_path, engine='openpyxl') as writer:
    summary_data = {
        'Parameter': [
            'Analysis Date',
            'Total Wells Analyzed',
            'Wells with Decreasing Trend (Linear)',
            'Wells with Decreasing Trend (Mann-Kendall)',
            'Wells with Decreasing Trend (Consensus)',
            'Percentage Decreasing (Consensus)',
            'Wells with Significant Linear Trend',
            'Wells with Significant MK Trend',
            'Mean Linear Slope (m/year)',
            'Median Linear Slope (m/year)',
            'Mean Sens Slope (m/year)',
            'Mean ARIMA NSE',
            'Mean LSTM NSE',
            'Mean Ensemble Predicted Change (m)',
            'Minimum Recent Year Required',
            'Minimum Records Required',
            'Minimum Years Span Required',
            'Prediction Target Year',
            'Significance Level',
            'Confidence Level'
        ],
        'Value': [
            datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
            total_wells_analyzed,
            wells_with_decreasing_linear,
            wells_with_decreasing_mk,
            wells_with_decreasing_consensus,
            f"{wells_with_decreasing_consensus/total_wells_analyzed*100:.2f}%",
            wells_with_significant_linear,
            wells_with_significant_mk,
            f"{valid_linear_slopes.mean():.4f}",
            f"{valid_linear_slopes.median():.4f}",
            f"{valid_sens_slopes.mean():.4f}",
            f"{df_well_results['ARIMA_NSE'].mean():.3f}" if df_well_results['ARIMA_NSE'].notna().any() else "N/A",
            f"{df_well_results['LSTM_NSE'].mean():.3f}" if df_well_results['LSTM_NSE'].notna().any() else "N/A",
            f"{df_well_results['Ensemble_Change_2030'].mean():.2f}" if df_well_results['Ensemble_Change_2030'].notna().any() else "N/A",
            MIN_RECENT_YEAR,
            MIN_RECORDS,
            MIN_YEARS_SPAN,
            PREDICTION_YEAR,
            SIGNIFICANCE_LEVEL,
            CONFIDENCE_LEVEL
        ]
    }
    pd.DataFrame(summary_data).to_excel(writer, sheet_name='Summary', index=False)
    df_well_results.to_excel(writer, sheet_name='All_Wells_Details', index=False)

write_output("Excel file saved successfully")

csv_path = os.path.join(output_folder, 'Excel', 'Groundwater_Trend_Analysis_Complete.csv')
df_well_results.to_csv(csv_path, index=False)
write_output(f"CSV file saved: {os.path.basename(csv_path)}")

txt_path = os.path.join(output_folder, 'Text_Output', 'Groundwater_Trend_Analysis_Complete_Report.txt')
try:
    import shutil
    shutil.copy(log_file_path, txt_path)
    write_output(f"Text report saved: {os.path.basename(txt_path)}")
except:
    pass

if os.path.exists(progress_file):
    try:
        os.remove(progress_file)
        write_output("Progress tracker file cleaned up")
    except:
        pass

write_output("\n" + "="*80)
write_output("ANALYSIS COMPLETED SUCCESSFULLY")
write_output("="*80)
write_output(f"Output Directory: {output_folder}")
write_output(f"Analysis completed: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
write_output("="*80)