In [24]:
## Daylength
import numpy as np

def day_length(day_of_year, latitude):
    # Ensure arrays are numpy arrays for vectorized operations
    day_of_year = np.array(day_of_year)
    latitude_rad = np.deg2rad(latitude)
    
    P = np.arcsin(0.39795 * np.cos(0.2163108 + 2 * np.arctan(0.9671396 * np.tan(0.00860 * (day_of_year - 186)))))
    
    numerator = np.sin(np.deg2rad(0.8333)) + np.sin(latitude_rad) * np.sin(P)
    denominator = np.cos(latitude_rad) * np.cos(P)
    acos_arg = numerator / denominator

    # Clip the value to avoid NaN due to domain errors in arccos
    acos_arg = np.clip(acos_arg, -1.0, 1.0)

    day_light_hours = 24 - (24 / np.pi) * np.arccos(acos_arg)
    return day_light_hours


In [25]:
## CDD
def CDD_model(par, data):
    if len(par) != 2:
        raise ValueError("model parameter(s) out of range (too many, too few)")

    T_base, F_crit = par
    Tmini = data['Tmini']  # shape: (366, num_sites)
    # Step 1: Calculate chilling rate (Rf)
    Rf = Tmini - T_base
    Rf[Rf > 0] = 0

    # Step 2: Determine start day (t0) for chilling accumulation after day 200
    t0 = []
    for col in range(Tmini.shape[1]):
        temp_series = Tmini[:, col]
        below_base = np.where(temp_series < T_base)[0]
        later_days = below_base[below_base > 173]
        if len(later_days) == 0:
            t0.append(174)  # Default to day 201 if none found
        else:
            t0.append(later_days[0])

    # Step 3: Nullify values before t0
    for col in range(Tmini.shape[1]):
        Rf[:t0[col], col] = 0

    # Step 4: Compute doy when cumulative Rf exceeds F_crit
    doy = []
    # print(Rf.shape[1])
    for col in range(Rf.shape[1]):
        cumulative = np.cumsum(Rf[:, col])
        valid_days = np.where(cumulative <= F_crit)[0]
        doy_val = valid_days[0] + 1 if len(valid_days) > 0 else np.nan
        doy.append(doy_val)
    # print(doy) 
    return np.array(doy)

In [26]:
## DM
def DM_model(par, data):
    if len(par) != 3:
        raise ValueError("model parameter(s) out of range (too many, too few)")

    T_base, P_base, F_crit = par
    Tmini = data['Tmini']      # shape: (days, sites)
    Li = data['Li']            # shape: (days, sites)

    # Rate function: only negative values contribute
    Rf = (Tmini - T_base)# * (Li / P_base)
    Rf[Rf > 0] = 0

    # Step 2: Determine start day (t0) for chilling accumulation after day 200
    t0 = []
    for col in range(Tmini.shape[1]):
        temp_series = Tmini[:, col]
        below_base = np.where(temp_series < T_base)[0]
        later_days = below_base[below_base > 173]
        if len(later_days) == 0:
            t0.append(174)  # Default to day 201 if none found
        else:
            t0.append(later_days[0])

    # Step 3: Nullify values before t0
    for col in range(Tmini.shape[1]):
        Rf[:t0[col], col] = 0

    # Step 4: Compute doy when cumulative Rf exceeds F_crit
    doy = []
    for col in range(Rf.shape[1]):
        cumulative = np.cumsum(Rf[:, col])
        valid_days = np.where(cumulative <= F_crit)[0]
        doy_val = valid_days[0] + 1 if len(valid_days) > 0 else np.nan
        doy.append(doy_val)
    return np.array(doy)

In [27]:
## SIAM
def SIAM_model(par, predictor, data):
    if len(par) != 4:
        raise ValueError("model parameter(s) out of range (too many, too few)")

    T_base, P_base, a, b = par
    Tmini = data["Tmini"]
    Li = data["Li"]

    Rf = (Tmini - T_base) * (Li / P_base)
    Rf[Rf > 0] = 0

    # Step 2: Determine start day (t0) for chilling accumulation after day 200
    t0 = []
    for col in range(Tmini.shape[1]):
        temp_series = Tmini[:, col]
        below_base = np.where(temp_series < T_base)[0]
        later_days = below_base[below_base > 173]
        if len(later_days) == 0:
            t0.append(174)  # Default to day 201 if none found
        else:
            t0.append(later_days[0])

    # Step 3: Nullify values before t0
    for col in range(Tmini.shape[1]):
        Rf[:t0[col], col] = 0

    # Step 4: Compute doy when cumulative Rf exceeds F_crit
    doy = []
    predictor = predictor.flatten()
    baseline_mean = np.nanmean(predictor)
    predictor = predictor - baseline_mean
    for col in range(Rf.shape[1]):
        cumulative = np.cumsum(Rf[:, col])
        S_a = predictor[col]
        valid_days = np.where(cumulative <= -(a + b * S_a))[0]
        doy_val = valid_days[0] + 1 if len(valid_days) > 0 else np.nan
        doy.append(doy_val)
    # print(doy) 
    return np.array(doy)

In [28]:
## SIAMN
def SIAMN_model(par, predictor, data):
    if len(par) != 5:
        raise ValueError("Expected 3 parameters: T_b, a, F_crit")

    T_b, P_base, a, a1, b1 = par
    Tmini = data['Tmini']  # (days, sites)
    Li = data['Li']            # shape: (days, sites)

    # Chilling response function (sigmoid form, negative values only)
    Rf = -(1 - 1 / (1 + np.exp(-a * (Tmini - T_b)))) * (Li / P_base)
    Rf[Rf > 0] = 0  # Only chilling (negative) values allowed
    # Determine t0: first day after day 173 where Tmean < T_b
    t0 = []
    days = np.arange(1, Tmini.shape[0] + 1)

    for c in range(Tmini.shape[1]):
        below_base = np.where(Tmini[:, c] < T_b)[0]
        after_173 = below_base[below_base > 173]

        if len(after_173) == 0:
            t0.append(Tmini.shape[0])  # No valid chilling period
        else:
            t0.append(after_173[0])

    # Zero out chilling accumulation before t0
    for c, t0_day in enumerate(t0):
        Rf[:t0_day+1, c] = 0  # Include t0

    # Determine DOY: first day when cumulative chilling <= F_crit
    predictor = predictor.flatten()
    baseline_mean = np.nanmean(predictor)
    predictor = predictor - baseline_mean
    doy = []
    for col in range(Rf.shape[1]):
        cumulative = np.cumsum(Rf[:, col])
        S_a = predictor[col]
        valid_days = np.where(cumulative <= -(a1 + b1 * S_a))[0]
        doy_val = valid_days[0] + 1 if len(valid_days) > 0 else np.nan
        doy.append(doy_val)
    # print(doy) 
    return np.array(doy)


In [29]:
## SIAMNP
def SIAMNP_model(par, predictor, data):
    if len(par) != 7:
        raise ValueError("Expected 3 parameters: T_b, a, F_crit")

    T_b, P_base, a, a1, b1, c, p_opt = par
    Tmini = data['Tmini']  # (days, sites)
    Li = data['Li']            # shape: (days, sites)
    Pmean = data['Pmean']  # (days, sites)

    # p_opt = 1.25
    P_mod = 1 - np.exp(-((Pmean - p_opt) ** 2) / (2 * c ** 2))  # shape: (days, sites)

    # Chilling response function (sigmoid form, negative values only)
    Rf = -(1 - 1 / (1 + np.exp(-a * (Tmini - T_b)))) * (Li / P_base)
    Rf[Rf > 0] = 0  # Only chilling (negative) values allowed
    # Determine t0: first day after day 173 where Tmean < T_b
    t0 = []
    days = np.arange(1, Tmini.shape[0] + 1)

    for c in range(Tmini.shape[1]):
        below_base = np.where(Tmini[:, c] < T_b)[0]
        after_173 = below_base[below_base > 173]

        if len(after_173) == 0:
            t0.append(Tmini.shape[0])  # No valid chilling period
        else:
            t0.append(after_173[0])

    # Zero out chilling accumulation before t0
    for c, t0_day in enumerate(t0):
        Rf[:t0_day+1, c] = 0  # Include t0

    # Determine DOY: first day when cumulative chilling <= F_crit
    predictor = predictor.flatten()
    P_mod = P_mod.flatten()

    baseline_mean = np.mean(predictor)
    predictor = predictor - baseline_mean
    
    doy = []
    for col in range(Rf.shape[1]):
        cumulative = np.cumsum(Rf[:, col])
        S_a = predictor[col]
        valid_days = np.where(cumulative <= -(a1 + b1 * S_a) * (1 - P_mod[col]))[0]
        doy_val = valid_days[0] + 1 if len(valid_days) > 0 else np.nan
        doy.append(doy_val)
    return np.array(doy)


In [None]:
## Prepare data
def read_data():
    import pandas as pd
    import numpy as np
    from scipy.optimize import dual_annealing
    from sklearn.metrics import mean_squared_error, r2_score
    from sklearn.linear_model import LinearRegression
    
    # ---------------------------------
    # 1. Read and Prepare Data
    # Set random seeds for reproducibility
    np.random.seed(123)
    
    eos_sos_data = pd.read_csv('../data/tables/spruce/sos_eos_2016_2023_ORNL.csv')

    # Drop columns matching eos_2014, eos_2015, sos_2014, sos_2015
    eos_sos_data = eos_sos_data.drop(
        columns=[col for col in eos_sos_data.columns if any(x in col for x in ['eos_2014', 'eos_2015', 'sos_2014', 'sos_2015'])])
 
    exclude_plots = [4, 10, 11, 16, 19]
    eos_sos_data = eos_sos_data[~eos_sos_data['plot'].isin(exclude_plots)].dropna()
    # print(eos_sos_data)
    
    envir_data = pd.read_csv('../data/tables/spruce/envir_data.csv')
    # Drop columns matching certain years and variables
    drop_patterns = ['TA_2_0_1_min_2014', 'TA_2_0_1_min_2015', 'TA_2_0_1_min_2024', 'TA_2_0_1_min_2025',
                     'TA_2_0_1_max_2014', 'TA_2_0_1_max_2015', 'TA_2_0_1_max_2024', 'TA_2_0_1_max_2025']
  
    cols_to_drop = [col for col in envir_data.columns if any(p in col for p in drop_patterns)]
    envir_data = envir_data.drop(columns=cols_to_drop)
    envir_data.columns = (envir_data.columns
                  .str.replace(r'^TA_2_0_1_min_', 'daily_min_t_', regex=True)
                  .str.replace(r'^TA_2_0_1_max_', 'daily_max_t_', regex=True))
    # print(envir_data)
    
    # Add day length (Li) column
    envir_data['Li'] = envir_data.apply(lambda row: day_length(row['doy'], 47.51), axis=1)
    eos_sos_data['site_index'] = eos_sos_data.index
    # print(eos_sos_data)
    site_index_by_plot = eos_sos_data.groupby('plot')['site_index'].apply(list)
    rows = []
    for _, row in envir_data.iterrows():
        plot = row['plot']
        if plot in site_index_by_plot:
            for site_index in site_index_by_plot[plot]:
                new_row = row.copy()
                new_row['site_index'] = site_index
                rows.append(new_row)
    repeated_envir_data = pd.DataFrame(rows).sort_values(by=['site_index', 'doy']).reset_index(drop=True)
    # print(repeated_envir_data)
    envir_data = repeated_envir_data
    # print(envir_data)
    
    prcp_data = pd.read_csv('../data/tables/spruce/spruce_prcp.csv')#.to_numpy()
    exclude_mask = (
        prcp_data.columns.str.contains(r'_(?:2015|2024)$', regex=True) |
        prcp_data.columns.isin(['latitude', 'longitude'])
    )

    prcp_data = prcp_data.loc[:, ~exclude_mask]
    # print(prcp_data)
    repeats = len(eos_sos_data)
    repeated_df = pd.concat([prcp_data] * repeats, ignore_index=True)
    site_index = pd.Series(range(repeats)).repeat(len(prcp_data)).reset_index(drop=True)
    repeated_df['site_index'] = site_index
    prcp_data = repeated_df
    # print(prcp_data)
    return eos_sos_data, envir_data, prcp_data

In [None]:
## Split data
def split_data(eos_sos_data_train, eos_sos_data_test, envir_data_train, envir_data_test, prcp_data_train, prcp_data_test):
    import numpy as np
    import pandas as pd
    import re
    from scipy.optimize import dual_annealing
    from scipy.stats import pearsonr
    from scipy.optimize import differential_evolution
    from scipy.optimize import minimize
    from sklearn.model_selection import train_test_split
    
    ## Sort columns
    def sort_pixel_year_columns(columns):
        def extract_pixel_year(col):
            # Try to extract pixel and year
            match = re.search(r'pixel_(\d+).*_(\d{4})', col)
            if match:
                pixel = int(match.group(1))
                year = int(match.group(2))
                return (pixel, year)
            elif col == "var_new":
                return (-1, -1)  # Keep var_new at the front
            else:
                return (float('inf'), float('inf'))  # Other columns go last
    
        return sorted(columns, key=extract_pixel_year)
        
    def merge_eos_sos_data(df):
        df_flat = df.copy()
        df_flat.set_index("site_index", inplace=True)
        new_columns = [f"pixel_{idx}_{col}" for idx in df_flat.index for col in df_flat.columns]
        df_single_row = pd.DataFrame([df_flat.values.flatten()], columns=new_columns)
        return df_single_row
    # print(eos_sos_data_train)
    eos_sos_train = merge_eos_sos_data(eos_sos_data_train)
    eos_sos_test = merge_eos_sos_data(eos_sos_data_test)

    ## EOS & SOS train
    eos_cols_train = [col for col in eos_sos_train.columns if 'eos' in col]
    eos_cols_test = [col for col in eos_sos_test.columns if 'eos' in col]
    sos_cols_train = [col for col in eos_sos_train.columns if 'sos' in col]
    sos_cols_test = [col for col in eos_sos_test.columns if 'sos' in col]
    eos_train = eos_sos_train[eos_cols_train]
    eos_test = eos_sos_test[eos_cols_test]
    sos_train = eos_sos_train[sos_cols_train]
    sos_test = eos_sos_test[sos_cols_test]
    
    # print(eos[sort_pixel_year_columns(eos.columns)])
    eos_train = eos_train[sort_pixel_year_columns(eos_train.columns)].to_numpy()
    eos_test = eos_test[sort_pixel_year_columns(eos_test.columns)].to_numpy()
    
    sos_train = sos_train[sort_pixel_year_columns(sos_train.columns)].to_numpy()
    sos_test = sos_test[sort_pixel_year_columns(sos_test.columns)].to_numpy()
    
    # print(sos_train)
    
    prcp_train = merge_eos_sos_data(prcp_data_train)
    prcp_cols_train = [col for col in prcp_train.columns if 'annual_p' in col]
    prcp_train = prcp_train[prcp_cols_train]#.to_numpy()
    prcp_train = prcp_train[sort_pixel_year_columns(prcp_train.columns)].to_numpy()
    
    prcp_test = merge_eos_sos_data(prcp_data_test)
    prcp_cols_test = [col for col in prcp_test.columns if 'annual_p' in col]
    prcp_test = prcp_test[prcp_cols_test]#.to_numpy()
    prcp_test = prcp_test[sort_pixel_year_columns(prcp_test.columns)].to_numpy()
    
    def merge_envir_data(df):
        df_long = df.melt(
            id_vars=['site_index', 'doy'],
            var_name='variable',
            value_name='value'
        )
        df_long['var_new'] =  'pixel_' + df_long['site_index'].astype(str) + '_' + df_long['variable']
        df_wide = df_long.pivot(index='doy', columns='var_new', values='value')
        return df_wide.reset_index()
    # print(envir_data_train)
    envir_data_train = merge_envir_data(envir_data_train)
    envir_data_test = merge_envir_data(envir_data_test)
    
    ## T train
    min_t_cols_train = [col for col in envir_data_train.columns if 'daily_min_t' in col]
    max_t_cols_train = [col for col in envir_data_train.columns if 'daily_max_t' in col]
    min_t_train = envir_data_train[min_t_cols_train]
    max_t_train = envir_data_train[max_t_cols_train]
    min_t_train = min_t_train[sort_pixel_year_columns(min_t_train.columns)].to_numpy()
    max_t_train = max_t_train[sort_pixel_year_columns(max_t_train.columns)].to_numpy()
    
    min_t_cols_test = [col for col in envir_data_test.columns if 'daily_min_t' in col]
    max_t_cols_test = [col for col in envir_data_test.columns if 'daily_max_t' in col]
    min_t_test = envir_data_test[min_t_cols_test]
    max_t_test = envir_data_test[max_t_cols_test]
    min_t_test = min_t_test[sort_pixel_year_columns(min_t_test.columns)].to_numpy()
    max_t_test = max_t_test[sort_pixel_year_columns(max_t_test.columns)].to_numpy()
    
    Li_cols = [col for col in envir_data_train.columns if 'Li' in col]
    Li = envir_data_train[Li_cols]
    Li = Li.iloc[:, [0]].rename(columns={Li.columns[0]: 'Li'})
    Li_train = pd.concat([Li] * eos_train.shape[1], axis=1)
    Li_train.columns = [f'Li_{i}' for i in range(eos_train.shape[1])]
    Li_train = Li_train.to_numpy()
    Li_test = pd.concat([Li] * eos_test.shape[1], axis=1)
    Li_test.columns = [f'Li_{i}' for i in range(eos_test.shape[1])]
    Li_test = Li_test.to_numpy()

    
    # Return everything as a dictionary
    return {
        'eos_train': eos_train,
        'eos_test': eos_test,
        'sos_train': sos_train,
        'sos_test': sos_test,
        'prcp_train': prcp_train,
        'prcp_test': prcp_test,
        'min_t_train': min_t_train,
        'min_t_test': min_t_test,
        'max_t_train': max_t_train,
        'max_t_test': max_t_test,
        'Li_train': Li_train,
        'Li_test': Li_test
    }

In [None]:
## Split year by year
def kfold_split_years(eos_sos_data, envir_data, prcp_data, n_folds, random_seed=42):
    import numpy as np
    import pandas as pd
    from sklearn.model_selection import KFold

    # Full list of years
    all_years = np.array([2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023])

    # Shuffle and split years into K folds
    kf = KFold(n_splits=n_folds, shuffle=True, random_state=random_seed)
    folds = list(kf.split(all_years))
    
    def remove_year_cols(df, years_to_remove):
        cols_to_drop = []
        for year in years_to_remove:
            cols_to_drop.extend([col for col in df.columns if str(year) in col])
        return df.drop(columns=cols_to_drop)
    
    all_fold_data = []

    for fold_index, (train_idx, test_idx) in enumerate(folds):
        train_years = all_years[train_idx]
        test_years = all_years[test_idx]
        

        eos_sos_data_train = remove_year_cols(eos_sos_data, test_years)
        eos_sos_data_test = remove_year_cols(eos_sos_data, train_years)
        envir_data_train = remove_year_cols(envir_data, test_years)
        envir_data_test = remove_year_cols(envir_data, train_years)
        prcp_data_train = remove_year_cols(prcp_data, test_years)
        prcp_data_test = remove_year_cols(prcp_data, train_years)
        # Now call your split_data function
        processed = split_data(eos_sos_data_train, eos_sos_data_test,
                               envir_data_train, envir_data_test,
                               prcp_data_train, prcp_data_test)

        all_fold_data.append({
            "fold": fold_index + 1,
            "train_years": train_years,
            "test_years": test_years,
            "data": processed
        })

    return all_fold_data

In [None]:
## Train and test model
def train(model_list, eos_train, sos_train, min_t_train, prcp_train, Li_train, max_t_train):
    model_results = {}

    for model_name, model_info in model_list.items():
        # print(model_name)
        # print(prcp_train)
        if model_name in ['NCDD2', 'PC_NCDD2']:
            data_list_train = {
                'Tmaxi': max_t_train,
                'Tmini': min_t_train,
                'Li': Li_train,
                'transition_dates': eos_train
            }
        elif model_name in ['SIAMP', 'SIAMNP']:
            data_list_train = {
                'Tmaxi': max_t_train,
                'Tmini': min_t_train,
                'Pmean': prcp_train,
                'Li': Li_train,
                'transition_dates': eos_train
            }
            # print(prcp_sub[mean_p_train_cols].to_numpy())
        else:
            data_list_train = {
                'Tmini': min_t_train,
                'Li': Li_train,
                'transition_dates': eos_train
            }
        # print(sos_train)
        def cost_function(par):
            if not np.all(np.isfinite(par)):
                return 1e10
            # predicted = model_info['fun'](par, data_list_train)
            if model_name in ['SIAM', 'SIAMN', 'SIAMNP', 'SIAMP']:
                # print(sos_train)
                predicted = model_info['fun'](par, sos_train, data_list_train)
                # print(predicted)
            else:
                predicted = model_info['fun'](par, data_list_train)
            if predicted is None or not np.all(np.isfinite(predicted)):
                return 1e10
            # print(predicted)
            return np.sqrt(np.mean((predicted - eos_train) ** 2))
    
        bounds = list(zip(model_info['lower'], model_info['upper']))
        result = dual_annealing(cost_function, bounds=bounds, maxiter=maxiteration, seed=42)
        model_results[model_name] = {
            'params': result.x,
            'fun': result.fun,
            'success': result.success,
            'message': result.message
        }
    return model_results    
from scipy.stats import pearsonr, linregress
pixel_results = []

def test(model_list, eos_test, sos_test, min_t_test, prcp_test, Li_test, max_t_test):
    pixel_test_rmse_all_folds = {model: [] for model in model_list}
    pixel_test_r2_all_folds = {model: [] for model in model_list}
    pixel_test_slope_all_folds = {model: [] for model in model_list}
    pixel_test_p_all_folds = {model: [] for model in model_list}
    ## Test model
    site_results = []
    eos_test = eos_test.flatten()
    pixel_rmse = {}
    pixel_test_aic = {}
    for model_name, model_info in model_list.items():
        # print(model_list)
        # print(model_name)
        if model_name in ['NCDD2', 'PC_NCDD2', 'SIAMN']:
            # print('PC_NCDD2')
            data_list_test = {
                'Tmaxi': max_t_test,
                'Tmini': min_t_test,
                'Li': Li_test,
                'transition_dates': eos_test
            }
        elif model_name in ['SIAMP', 'SIAMNP']:
            data_list_test = {
                'Tmaxi': max_t_test,
                'Tmini': min_t_test,
                'Pmean': prcp_test,
                'Li': Li_test,
                'transition_dates': eos_test
            }
        else:
            data_list_test = {
                'Tmini': min_t_test,
                'Li': Li_test,
                'transition_dates': eos_test
            }
        params = model_results[model_name]['params']
    
        # predicted_test = model_info['fun'](result.x, data_list_test)
        if model_name in ['SIAM', 'SIAMN', 'SIAMNP', 'SIAMP']:
            predicted_test = model_info['fun'](params, sos_test, data_list_test)
            # print(params)
            # print(predicted_test)
        else:
            predicted_test = model_info['fun'](params, data_list_test)
        # print(predicted_test)
        shape = predicted_test.shape  # or len(arr)
        nan_count = np.isnan(predicted_test).sum()
        # print("Shape:", shape)
        # print("Number of NaNs:", nan_count)
        # print(predicted_test)
        valid_mask = (~np.isnan(eos_test)) & (~np.isnan(predicted_test))
        # print(eos_test)
        # print(predicted_test)
        if np.any(valid_mask):
            obs = eos_test[valid_mask]
            pred = predicted_test[valid_mask]
            residuals = pred - obs
            if np.array_equal(pred, obs):
                print("Arrays are identical:", pred, obs)

            rmse = np.sqrt(np.mean(residuals ** 2))
            # r, p_value = pearsonr(pred, obs)
            if np.std(pred) == 0 or np.std(obs) == 0:
                # print(predictor1)
                # print(model_name, obs, pred)
                # print(observed_DOY_test, predicted_test)
                r2 = np.nan
                slope = np.nan
            else:
                slope, intercept, r_value, p_val, std_err = linregress(pred, obs)
                r2 = r_value**2
        else:
            rmse, r2, slope, p_val = np.nan, np.nan, np.nan, np.nan

        pixel_test_rmse_all_folds[model_name].append(rmse)
        pixel_test_r2_all_folds[model_name].append(r2)
        pixel_test_slope_all_folds[model_name].append(slope)
        pixel_test_p_all_folds[model_name].append(p_val)

    avg_rmse = {k: np.nanmean(v) for k, v in pixel_test_rmse_all_folds.items()}
    avg_r2 = {k: np.nanmean(v) for k, v in pixel_test_r2_all_folds.items()}
    avg_slope = {k: np.nanmean(v) for k, v in pixel_test_slope_all_folds.items()}
    avg_p = {k: np.nanmean(v) for k, v in pixel_test_p_all_folds.items()}

    pixel_results.append({
        "fold_index": float(fold_num),
        'test_rmse': avg_rmse,
        'test_r2': avg_r2,
        'test_slope': avg_slope,
        'test_p': avg_p
    })
    return pixel_results

In [None]:
# 1. Define models dictionary
model_list = {
    'CDD': {'fun': CDD_model, 'init': [20, -20], 'lower': [10, -5000], 'upper': [40, 0]},
    'DM': {'fun': DM_model, 'init': [20, 15, -20], 'lower': [10, 10, -5000], 'upper': [40, 16, 0]},
    'SIAM': {'fun': SIAM_model, 'init': [20, 15, 20, 0], 'lower': [10, 10, 0, -1], 'upper': [40, 16, 5000, 1]},
    'SIAMN': {'fun': SIAMN_model, 'init': [20, 15, 2, 20, 0], 'lower': [10, 10, 0.1, 0.1, -1], 'upper': [40, 16, 8, 500, 1]},  
    'SIAMNP': {'fun': SIAMNP_model, 'init': [20, 12, 0.5, 20, 0, 2, 1], 'lower': [15, 10, 0.01, 1, -1, 1, 0.05], 'upper': [30, 16, 2, 500, 1, 10, 4]},  
}

from scipy.optimize import dual_annealing

maxiteration = 100
eos_sos_data, envir_data, prcp_data = read_data()

In [35]:
## Run models
n_folds = 8 ## Not include year 2024

all_fold_data = kfold_split_years(eos_sos_data, envir_data, prcp_data, n_folds)
results = []
# print(all_fold_data)
for fold_info in all_fold_data:
    # print(fold_info)
    fold_num = fold_info["fold"]
    train_years = fold_info["train_years"]
    test_years = fold_info["test_years"]
    data = fold_info["data"]

    # Example: Access specific data pieces
    eos_train = data["eos_train"]
    sos_train = data["sos_train"]
    min_t_train = data["min_t_train"]
    prcp_train = data["prcp_train"]
    Li_train = data["Li_train"]
    max_t_train = data["max_t_train"]

    model_results = train(model_list, eos_train, sos_train, min_t_train, prcp_train, Li_train, max_t_train)

    eos_test = data["eos_test"]
    sos_test = data["sos_test"]
    min_t_test = data["min_t_test"]
    prcp_test = data["prcp_test"]
    Li_test = data["Li_test"]
    max_t_test = data["max_t_test"]
    # print(eos_test)
    site_results = test(model_list, eos_test, sos_test, min_t_test, prcp_test, Li_test, max_t_test)
    results.append(site_results)
# print(results)

In [37]:
import pandas as pd
pd.to_pickle(results[0], "..\\data\\tables\\params\\spruce\\evaluation_results_spruce.pkl")