##### Loading and Processing Data

In [None]:
import os
import sys
import pandas as pd
module_path = os.path.abspath(os.path.join('../../../../..'))
if module_path not in sys.path:
    sys.path.append(module_path)

### Define paths to experiments

In [None]:
covid_path = os.path.join(os.getcwd(), "covid/multivariate/run-6")
rotifers_algae_coherent_path = os.path.join(os.getcwd(), "algae-rotifers/multivariate/run-4")
rotifers_algae_incoherent_path = os.path.join(os.getcwd(), "algae-rotifers/multivariate/run-8")
lynx_hares_path = os.path.join(os.getcwd(), "lynx-hares/multivariate/run-18")
paths = {'covid': covid_path, 'rotifers_algae_coherent': rotifers_algae_coherent_path, 'rotifers_algae_incoherent': rotifers_algae_incoherent_path, 'lynx_hares': lynx_hares_path}

lynx_hares_ode_prediction_path = os.path.join(os.getcwd(), "lynx-hares/multivariate/run-10/ODE_prediction")
covid_ode_prediction_path = os.path.join(os.getcwd(), "covid/multivariate/run-1/ODE_prediction")
rotifers_algae_coherent_ode_prediction_path = os.path.join(os.getcwd(), "algae-rotifers/multivariate/run-1/ODE_prediction")
rotifers_algae_incoherent_ode_prediction_path = os.path.join(os.getcwd(), "algae-rotifers/multivariate/run-5/ODE_prediction")

In [None]:
experiment_folder = 'runs_transfer_learning'
metrics = ['mean_absolute_scaled_error', 'mean_directional_accuracy', 'mean_absolute_error' ,'normalized_root_mean_squared_error']
COLORS = ["#596065", "#B1053A", "#E3681D", "#FAA719", "#00799D"]

### Create DL baseline results

In [None]:
baselines = {}

for exp in paths.keys():
    baseline = pd.DataFrame()
    for dir in os.listdir(os.path.join(paths[exp], "runs_baseline")):
        try:
            current_baseline = pd.read_csv(os.path.join(paths[exp], "runs_baseline", dir, "baseline.csv"))
            current_baseline["S"] = dir.replace("S", "")
            baseline = pd.concat([baseline, current_baseline])
        except:
            continue
    baseline = baseline.rename(columns={baseline.columns[0]: 'Models'})
    baseline = baseline.rename(columns={"mean_absolute_error": "MAE", "prediction_trend_accuracy": "PTA", "mean_absolute_scaled_error": "MASE", "mean_directional_accuracy":"MDA", "normalized_root_mean_squared_error": "NRMSE"})
    baseline['MDA'] = 1 - baseline['MDA']
    baseline = baseline.rename(columns={'MDA': '1-MDA'})
    baseline['PTA'] = 1 - baseline['PTA']
    baseline = baseline.rename(columns={'PTA': '1-PTA'})
    baseline['Models'] = baseline['Models'].str.replace('PyTorch_Lightning_', '')
    baseline['TYPE'] = 'Baseline'
    # Group by 'Models', 'TS', 'IC', and 'P' and calculate the mean and standard deviation for each group
    baseline = baseline.groupby(['Models', 'TYPE'], as_index=False).agg({'MASE': ['mean', 'median', 'std'],
                                                                        '1-MDA': ['mean', 'median', 'std'],
                                                                        '1-PTA': ['mean', 'median', 'std'],
                                                                        'MAE': ['mean', 'median', 'std'],
                                                                        'NRMSE': ['mean', 'median', 'std']})
    # Rename the columns for clarity
    baseline.columns = ['Models', 'TYPE', 'MASE', 'MASE_median', 'MASE_std', '1-MDA', '1-MDA_median', '1-MDA_std', '1-PTA', '1-PTA_median', '1-PTA_std',
                        'MAE', 'MAE_median', 'MAE_std', 'NRMSE', 'NRMSE_median', 'NRMSE_std']
    baselines[exp] = baseline

In [None]:
baselines['covid']

### Create ODE baseline results

In [None]:
ODE_baselines = {}

# load ODE predictions for lynx and hares
ODE_baseline_lynx_hares = pd.read_csv(os.path.join(lynx_hares_ode_prediction_path, "ODE_prediction.csv"))
ODE_baseline_lynx_hares = ODE_baseline_lynx_hares.rename(columns={"mean_absolute_error": "MAE", "1-prediction_trend_accuracy": "1-PTA"})
ODE_baseline_lynx_hares['Models'] = 'LV'

# load ODE prediction for covid
ODE_baseline_covid = pd.read_csv(os.path.join(covid_ode_prediction_path, "ODE_prediction_1.csv"))
ODE_baseline_covid = ODE_baseline_covid.rename(columns={"mean_absolute_error": "MAE", "1-prediction_trend_accuracy": "1-PTA"})
ODE_baseline_covid['Models'] = 'SIR'

# load ODE prediction for algae-rotifers
ODE_baseline_algae_rotifers = pd.read_csv(os.path.join(rotifers_algae_coherent_path, "ODE_prediction", "ODE_prediction.csv"))
ODE_baseline_algae_rotifers = ODE_baseline_algae_rotifers.rename(columns={"mean_absolute_error": "MAE", "1-prediction_trend_accuracy": "1-PTA"})
ODE_baseline_algae_rotifers['Models'] = 'SAR'

# load ODE prediction for incoherent algae-rotifers
ODE_baseline_algae_rotifers_incoherent = pd.read_csv(os.path.join(rotifers_algae_incoherent_path, "ODE_prediction", "ODE_prediction.csv"))
ODE_baseline_algae_rotifers_incoherent = ODE_baseline_algae_rotifers_incoherent.rename(columns={"mean_absolute_error": "MAE", "1-prediction_trend_accuracy": "1-PTA"})
ODE_baseline_algae_rotifers_incoherent['Models'] = 'SAR'

# load ODE prediction for coherent algae-rotifers
ODE_baseline_algae_rotifers_coherent = pd.read_csv(os.path.join(rotifers_algae_coherent_ode_prediction_path, "ODE_prediction.csv"))
ODE_baseline_algae_rotifers_coherent = ODE_baseline_algae_rotifers_coherent.rename(columns={"mean_absolute_error": "MAE", "1-prediction_trend_accuracy": "1-PTA"})
ODE_baseline_algae_rotifers_coherent['Models'] = 'SAR'

ODE_baselines['covid'] = ODE_baseline_covid
ODE_baselines['lynx_hares'] = ODE_baseline_lynx_hares
ODE_baselines['rotifers_algae_coherent'] = ODE_baseline_algae_rotifers_coherent
ODE_baselines['rotifers_algae_incoherent'] = ODE_baseline_algae_rotifers_incoherent


### Collect and preprocess transfer learning results

In [None]:
import re

def extract_numbers(string):
    df = pd.DataFrame()
    # Remove any non-digit characters from the string
    strings = string.split("_")
    for string in strings:
        if string.startswith("IC"):
            number = re.sub('[a-zA-Z]', '', string)
            if "-" in number:
                numbers = number.split("-")
                number = int(numbers[1]) - int(numbers[0])
            else:
                number = number
            column = re.sub('[0-9]', '', string).replace("-", "")
            df[str(column)] = [int(number)]
        if string.startswith("P"):
            number = re.sub('[a-zA-Z]', '', string)
            column = re.sub('[0-9]', '', string).replace("-", "")
            df[str(column)] = [int(number)]
        if string.startswith("TS"):
            number = re.sub('[a-zA-Z]', '', string)
            column = re.sub('[0-9]', '', string).replace("-", "")
            df[str(column)] = [int(number)]
        if string.startswith("S"):
            number = re.sub('[a-zA-Z]', '', string)
            column = re.sub('[0-9]', '', string).replace("-", "")
            df[str(column)] = [int(number)]
    return df

In [None]:
transfer_learning = {}

for exp in paths.keys():
    data = []
    experiment_names = []
    for experiment in sorted(os.listdir(os.path.join(paths[exp], experiment_folder))):
        if os.path.isdir(os.path.join(paths[exp], experiment_folder, experiment)):
            for file in os.listdir(os.path.join(paths[exp], experiment_folder, experiment)):
                if file.endswith('.csv'):
                    df = pd.read_csv(os.path.join(paths[exp], experiment_folder, experiment, file))
                    df = df.rename(columns={"mean_absolute_error": "MAE", "mean_absolute_scaled_error": "MASE", "mean_directional_accuracy":"MDA", "prediction_trend_accuracy": "PTA", "normalized_root_mean_squared_error": "NRMSE"})
                    df['MDA'] = 1 - df['MDA']
                    df['PTA'] = 1 - df['PTA']
                    df = df.rename(columns={df.columns[0]: 'Models', 'MDA': '1-MDA', 'PTA': '1-PTA'})
                    df['Models'] = df['Models'].str.replace('PyTorch_Lightning_', '')
                    experiment_names.append(experiment)
                    data.append(df)

    # Extract numbers from the strings and create a dictionary
    transfer_learning_data = pd.DataFrame()
    for exp_idx, experiment_name in enumerate(experiment_names):
        df = extract_numbers(experiment_name)
        df = df.loc[df.index.repeat(len(data[exp_idx]))].reset_index(drop=True)
        transfer_learning_data = pd.concat([transfer_learning_data, pd.concat([pd.DataFrame(data[exp_idx]).reset_index(drop=True), df], axis=1)], axis=0)

    transfer_learning_data['TYPE'] = 'Transfer Learning Run'
    transfer_learning_data = transfer_learning_data[['Models', 'MAE', 'MASE', '1-MDA', '1-PTA', 'NRMSE', 'TS', 'IC', 'P', 'S', 'TYPE']]
    transfer_learning_data = transfer_learning_data.sort_values(['Models', 'TS', 'IC', 'P'])
    transfer_learning_data_with_seeds = transfer_learning_data.copy()

    # Group by 'Models', 'TS', 'IC', and 'P' and calculate the mean and standard deviation for each group
    transfer_learning_data = transfer_learning_data.groupby(['Models', 'TYPE', 'TS', 'IC', 'P'], as_index=False).agg({
                                                                            'MASE': ['mean', 'median', 'std'],
                                                                            '1-MDA': ['mean', 'median', 'std'],
                                                                            '1-PTA': ['mean', 'median', 'std'],
                                                                            'MAE': ['mean', 'median', 'std'],
                                                                            'NRMSE': ['mean', 'median', 'std']})

    # Rename the columns for clarity
    transfer_learning_data.columns = ['Models', 'TYPE', 'TS', 'IC', 'KP', 'MASE', 'MASE_median', 'MASE_std', '1-MDA', '1-MDA_median', '1-MDA_std', '1-PTA', '1-PTA_median', '1-PTA_std',
                        'MAE', 'MAE_median', 'MAE_std', 'NRMSE', 'NRMSE_median', 'NRMSE_std']

    transfer_learning[exp] = transfer_learning_data

### Preview transfer learning results

In [None]:
transfer_learning['covid']