Reserach for:

1. Applied Mathematics Msc master thesis at Kaunas university of technology.
2. P000M015 Research Project 3 course at Kaunas university of technology, study programme - Applied Mathematics MSc.
3. Research Council of Lithuania funded student research in their free time “Development and application of machine learning methods in assessing the risk of mortality of prostate cancer patients” (P-ST-22-28). (2022-2023).
Project github repo - https://github.com/vytautas9/Tiriamasis_Projektas_3

Author:
Vytautas Kraujalis

LinkedIn - https://www.linkedin.com/in/vytautaskraujalis/  
Email - vytautas.kraujalis2@gmail.com

In [1]:
import pickle
import pandas as pd
import numpy as np
from pathlib import Path

from sklearn.metrics import roc_curve, auc

import logging

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

In [2]:
# path to cleaned data file
dataFilePath = 'data/data_clean.pkl'
trainFilePath = 'data/data_train.pkl'
testFilePath = 'data/data_test.pkl'

In [3]:
# we'll log some training execution informatio to a log file 
# to check if everything goes as expected
logger = logging.getLogger()
fhandler = logging.FileHandler(filename='logs.log', mode='w')
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
fhandler.setFormatter(formatter)
logger.addHandler(fhandler)
logger.setLevel(logging.DEBUG)

In [4]:
# Read already prepared and saved data
with open(dataFilePath, 'rb') as f:
    data = pickle.load(f)

# Read already prepared and saved train/test datasets
with open(trainFilePath, 'rb') as f:
    data_train = pickle.load(f)

with open(testFilePath, 'rb') as f:
    data_test = pickle.load(f)

In [5]:
# Create dummy variables for categorical data
data_train = pd.get_dummies(data_train, columns=['clinical_stage', 'biopsy_gleason_gg', 'pathological_gleason_gg',
                                'pathologic_stage', 'lni', 'surgical_margin_status', 'persistent_psa',
                                'TRYSgrupes', 'PLNDO1'])
data_test = pd.get_dummies(data_test, columns=['clinical_stage', 'biopsy_gleason_gg', 'pathological_gleason_gg',
                                'pathologic_stage', 'lni', 'surgical_margin_status', 'persistent_psa',
                                'TRYSgrupes', 'PLNDO1'])

In [6]:
print('Shape of train: ', data_train.shape, '\n')
print('Columns of train: ', data_train.columns, '\n')
print('Shape of test: ', data_test.shape, '\n')
print('Columns of test: ', data_test.columns, '\n')

Shape of train:  (1251, 40) 

Columns of train:  Index(['age', 'psa', 'biopsy_gleason', 'survival_months', 'pathologic_gleason',
       'survival_months_bcr', 'survival_months_mts', 'patient_id', 'bcr',
       'mts', 'death_from_other_causes', 'cancer_specific_mortality',
       'clinical_stage_1', 'clinical_stage_2', 'clinical_stage_3',
       'biopsy_gleason_gg_1', 'biopsy_gleason_gg_2', 'biopsy_gleason_gg_3',
       'biopsy_gleason_gg_4', 'biopsy_gleason_gg_5',
       'pathological_gleason_gg_1', 'pathological_gleason_gg_2',
       'pathological_gleason_gg_3', 'pathological_gleason_gg_4',
       'pathological_gleason_gg_5', 'pathologic_stage_0', 'pathologic_stage_1',
       'pathologic_stage_2', 'lni_0.0', 'lni_1.0', 'lni_unknown',
       'surgical_margin_status_0', 'surgical_margin_status_1',
       'persistent_psa_0', 'persistent_psa_1', 'TRYSgrupes_0', 'TRYSgrupes_1',
       'TRYSgrupes_2', 'PLNDO1_0', 'PLNDO1_1'],
      dtype='object') 

Shape of test:  (313, 40) 

Columns of te

In [7]:
"""
Explodes the provided "df" dataset based on provided survival column "time" and
clips the data to be in a range [min_time; max_time] (). A new discrete survival column
will be created with name set as variable "time_discrete". "cum_event" boolean determines
if cumulative event column will be created or no.

clip(lower, upper) function will help us create a new discrete survival time column. 
If we specify lower=1 and upper=200, patients who experienced event earlier than 200th 
month will only have records till their event, on other side, if a patient survived past 
200th month, we will clip this information and will only keep information about him til 200th month.
Another example, if we specify lower=140 and upper=200, and if the person experienced event 
at 100th month, we will create records for him till 140th (lower boundary) month.
"""
def explode_data(df,max_time,time,target_column,min_time=1,
                 time_discrete='survival_time_discrete',cum_event=False):
    
    logging.info('Exploding the data......')
    logging.debug(f'Exploding will happen for target column: {target_column}')
    logging.debug(f'Data will be exploded with min {min_time} and max {max_time} survival times')
    logging.debug(f'Survival time range BEFORE exploding: min {df[time].min()}, max {df[time].max()}')

    target_column_discrete = target_column + '_discrete'

    # We create a new time column and clip the data by provided min and max survival times
    df[time_discrete] = df[time].clip(min_time,max_time).apply(range)

    # Exploding the dataset with the created range value in new time column
    data_exploded = df.explode(time_discrete)
    data_exploded.reset_index(drop=True, inplace=True)

    # New column starts at 0, we'll increase each value by 1
    data_exploded[time_discrete] = pd.to_numeric(data_exploded[time_discrete]) + 1
    logging.debug(f'Survival time range AFTER exploding: min {data_exploded[time_discrete].min()}, max {data_exploded[time_discrete].max()}')

    # New event column, which will indicate the last event date
    data_exploded[target_column_discrete] = (data_exploded[time_discrete] >= data_exploded[time]) * pd.to_numeric(data_exploded[target_column])
    
    if cum_event == True:
        logging.info('Cumulative event will be added.....')
        target_column_cumulative = target_column + '_cumulative'

        # Create new event column with duplicated event values from discrete column
        data_exploded[target_column_cumulative] = data_exploded[target_column_discrete]
        
        # For cumulative events, after end_time we will have NA values, we'll replace those with event indicator
        after_survival_time = data_exploded[time_discrete] > data_exploded[time]
        data_exploded.loc[after_survival_time, target_column_discrete] = -1
        data_exploded[target_column_discrete] = data_exploded[target_column_discrete].replace(-1,np.NaN)
        data_exploded.loc[(after_survival_time & (data_exploded[target_column]==0)), target_column_cumulative] = -1
        data_exploded[target_column_cumulative] = data_exploded[target_column_cumulative].replace(-1,np.NaN)

    return data_exploded


"""
Trains a single model on "df_train" data with response column "target_column". Model will be 
trained on data ranging to "max_time" (max, inclusive) or "min_time" (mininum max time, inclusive)
and before training, the dataset will be exploded. 
"""
def train_model(model, df_train, target_column, time, max_time, min_time=1, is_homogenous_dataset=False, experiment_name=None):
    df_train_copy = df_train.copy()
    
    # Extract model name
    model_name = type(model).__name__
    logging.info(f'Starting fitting a {model_name} model...')
    
    logging.debug(f'Survival time column to be used: {time}')

    # Explode the dataset
    df_train_copy_exploded = explode_data(df_train_copy, min_time=min_time, max_time=max_time, time=time, target_column=target_column)
    logging.debug(f'Column names of exploded train set: {df_train_copy_exploded.columns}')
    target_column_discrete = target_column + '_discrete'

    # Drop targets/features from feature set
    x_columns_to_drop = [target_column, target_column_discrete, 'survival_months', 'survival_months_bcr', 'survival_months_mts', 'patient_id']
    X_train = df_train_copy_exploded.drop(x_columns_to_drop, axis=1)    
    y_train = df_train_copy_exploded[target_column_discrete]
    
    # Fit model to training data
    logging.debug(f'Fitting will happen on those features: {X_train.columns}')
    logging.debug(f'Fitting response variable: {y_train.name}')
    model.fit(X_train, y_train)
    logging.info(f'Finished fitting.')

    # Save model
    if is_homogenous_dataset:
        main_path = 'results/homogenous_data'
    else:
        main_path = 'results/non_homogenous_data'
    model_path = f'{main_path}/{target_column}/{max_time}/{model_name}'
    if experiment_name is None:
        model_path = model_path + '/defaultExperiment'
    else:
        model_path = model_path + f'/{experiment_name}'
    logging.info(f'Saving model to "{model_path}"...')
    # Create directory if does not exist
    Path(model_path).mkdir(parents=True, exist_ok=True)
    
    with open(f'{model_path}/model.sav', 'wb') as f:
        pickle.dump(model, f)
    logging.info(f'Finished saving model.')
    
    return model


"""
Given an exploded dataset with instant mortality probabilities "event_probability_column"
and "id_column" for grouping (optional), cumulative hazard column will be calculated
"""
def cumulative_hazard(df, event_probability_column, id_column):
    data_copy = df.copy()
    logging.info('Starting calculated cumulative hazards......')
    if id_column is not None:
        logging.debug(f'Cumulative hazards will be grouped by: {id_column}')
        data_copy = data_copy[ [id_column, event_probability_column] ]
    else:
        logging.warning('Cumulative hazards will NOT be grouped by any ID column.')
        data_copy = data_copy[ [event_probability_column] ]
    data_copy['negative_log_prob'] = np.log( 1 - data_copy[event_probability_column] )
    if id_column is not None:
        data_copy['cumulative_hazard'] = 1 - np.exp(data_copy.groupby(id_column)['negative_log_prob'].transform(pd.Series.cumsum))
    else:
        data_copy['cumulative_hazard'] = 1 - np.exp(data_copy['negative_log_prob'].cumsum())
    return data_copy['cumulative_hazard']


"""
Given non exploded dataset, explodes the dataset based on "max_time", lower boundary for exploding
a dataset will be the same as upper, so for each patient we will create "max_time" records.
Adds predictent instant mortality probabilities as well as cumulative ones.
"""
def add_predict_probabilities(df, model, max_time, target_column, time, is_homogenous_dataset=False):
    logging.info('Starting addition of predicted probabilities......')

    df_exploded = explode_data(df, max_time=max_time, min_time=max_time, cum_event=True, time=time, target_column=target_column)

    x_columns_to_drop = [target_column, target_column+'_discrete', target_column+'_cumulative', 'survival_months', 'survival_months_bcr', 'survival_months_mts', 'patient_id']
    X_df = df_exploded.drop(x_columns_to_drop, axis=1)

    # probabilities
    y_pred = model.predict_proba(X_df)[:,1]
    df_exploded['mortality_instant_prob'] = y_pred

    # Cumulative hazard for each patient
    df_exploded['cumulative_hazard'] = cumulative_hazard(df_exploded,'mortality_instant_prob','patient_id')
    
    # TODO - homogenous dataset
    # if is_homogenous_dataset:
        
    #     # get adjustment parameters
    #     match target_column:
    #         case 'cancer_specific_mortality':
    #             pi_0 = pi_0_csm
    #             pi_1 = pi_1_csm
    #             rho_0 = rho_0_csm
    #             rho_1 = rho_1_csm
    #         case 'death_from_other_causes':
    #             pi_0 = pi_0_doc
    #             pi_1 = pi_1_doc
    #             rho_0 = rho_0_doc
    #             rho_1 = rho_1_doc
    #         case 'mts':
    #             pi_0 = pi_0_mts
    #             pi_1 = pi_1_mts
    #             rho_0 = rho_0_mts
    #             rho_1 = rho_1_mts
    #         case 'bcr':
    #             pi_0 = pi_0_bcr
    #             pi_1 = pi_1_bcr
    #             rho_0 = rho_0_bcr
    #             rho_1 = rho_1_bcr
    #         case _:
    #             pi_0 = None
    #             pi_1 = None
    #             rho_0 = None
    #             rho_1 = None
        
        
    #     # adjusted probabilities
    #     df_exploded['mortality_instant_prob_adjusted'] = \
    #         (df_exploded.mortality_instant_prob*(pi_1/rho_1)) / \
    #         ((1-df_exploded.mortality_instant_prob)*(pi_0/rho_0) + \
    #          df_exploded.mortality_instant_prob*(pi_1/rho_1))
        
    #     # Cumulative hazard for each patient (adjusted)
    #     df_exploded['cumulative_hazard_adjusted'] = cumulative_hazard(df_exploded,'mortality_instant_prob_adjusted','patient_id')
    
    return df_exploded


"""
Wrapper for all above functions to be executed at once. Runs an experiment for single model - single slice of trainable months.
"""
def run_experiment(df_train, df_test, model, max_time, target_column, min_time=1, is_homogenous_dataset=False, experiment_name=None):
    
    model_name = type(model).__name__

    logging.info('\n|-------------------------------------------------------|')
    logging.info(f'Running an experiment on model: {model_name}, experiment name: {experiment_name}')
    logging.debug(f'Training dataset type: {"homogenous" if is_homogenous_dataset else "non-homogenous"}')
    logging.debug(f'Target column: {target_column}')
    logging.debug(f'Min trainalbe time: {min_time}, Max trainable time: {max_time}')

    # mts and bcr have different survival months columns
    match target_column:
        case 'mts':
            time = 'survival_months_mts'
        case 'bcr':
            time = 'survival_months_bcr'
        case _:
            time = 'survival_months'

    model = train_model(model=model, df_train=df_train, target_column=target_column,
                   max_time=max_time, min_time=min_time, is_homogenous_dataset=is_homogenous_dataset, 
                   experiment_name=experiment_name, time=time)
    
    # Test on training data
    df_train_predicted = add_predict_probabilities(df_train, model=model, max_time=max_time, 
                                                     target_column=target_column, time=time)
    
    # Test on testing data
    df_test_predicted = add_predict_probabilities(df_test, model=model, max_time=max_time, 
                                                    target_column=target_column, time=time,
                                                  is_homogenous_dataset=is_homogenous_dataset)
    
    # AUC for each cumulative slice
    # Months at which we'll check the AUC's
    months = list(range(6, max_time, 6))

    train_auc_stats = []
    test_auc_stats = []
    test_adjusted_auc_stats = []
    for month in months:
        # --- Training data ---
        # Selecting a subset of data based on the months
        select = (df_train_predicted['survival_time_discrete'] == month) & pd.notna(df_train_predicted[target_column+'_cumulative'])
        sub_dat = df_train_predicted[select]

        # If in the sliced data there's a event, calculate AUC metric,
        # otherwise assign NaN value
        if sub_dat[target_column+'_cumulative'].max() == 1:
            fpr, tpr, thresholds = roc_curve(sub_dat[target_column+'_cumulative'], sub_dat['cumulative_hazard'])
            auc_stat = auc(fpr, tpr)
        else:
            auc_stat = float('NaN')
        train_auc_stats.append(auc_stat)

        # --- Testing data ---
        # Selecting a subset of data based on the months
        select = (df_test_predicted['survival_time_discrete'] == month) & pd.notna(df_test_predicted[target_column+'_cumulative'])
        sub_dat = df_test_predicted[select]

        # If in the sliced data there's a event, calculate AUC metric,
        # otherwise assign NaN value
        if sub_dat[target_column+'_cumulative'].max() == 1:
            fpr, tpr, thresholds = roc_curve(sub_dat[target_column+'_cumulative'], sub_dat['cumulative_hazard'])
            auc_stat = auc(fpr, tpr)
        else:
            auc_stat = float('NaN')
        test_auc_stats.append(auc_stat)
        
        # --- Testing adjusted data ---
        # TODO - homogenous dataset
        # if is_homogenous_dataset:
        #     # Selecting a subset of data based on the months
        #     select = (df_test_predicted['survival_time_discrete'] == month) & pd.notna(df_test_predicted[target_column+'_cumulative'])
        #     sub_dat = df_test_predicted[select]

        #     # If in the sliced data there's a event, calculate AUC metric,
        #     # otherwise assign NaN value
        #     if sub_dat[target_column+'_cumulative'].max() == 1:
        #         fpr, tpr, thresholds = roc_curve(sub_dat[target_column+'_cumulative'], sub_dat['cumulative_hazard_adjusted'])
        #         auc_stat = auc(fpr, tpr)
        #     else:
        #         auc_stat = float('NaN')
        #     test_adjusted_auc_stats.append(auc_stat)
            

    if is_homogenous_dataset:
        # auc_stats = pd.DataFrame(zip(months, train_auc_stats, test_auc_stats, test_adjusted_auc_stats),
        #                      columns=['month', 'train_auc', 'test_auc', 'test_adjusted_auc'])
        # TODO - homogenous dataset
        pass
    else:
        auc_stats = pd.DataFrame(zip(months, train_auc_stats, test_auc_stats),
                             columns=['month', 'train_auc', 'test_auc'])
    
    
    # Save the auc statistics
    if is_homogenous_dataset:
        main_path = 'results/homogenous_data'
    else:
        main_path = 'results/non_homogenous_data'
    model_path = f'{main_path}/{target_column}/{max_time}/{model_name}'
    if experiment_name is None:
        model_path = model_path + '/defaultExperiment'
    else:
        model_path = model_path + f'/{experiment_name}'
    logging.info(f'Saving results to "{model_path}"...')
    # Create directory if does not exist
    Path(model_path).mkdir(parents=True, exist_ok=True)

    with open(f'{model_path}/auc_statistics.pkl', 'wb') as f:
        pickle.dump(auc_stats, f)
    
    logging.info('\n|-------------------------------------------------------|')


"""
Function to read a single experiment from file
"""
def read_experiment(model_name, target_column, max_time, is_homogenous_dataset=False, experiment_name=None):
    if is_homogenous_dataset:
        main_path = 'results/homogenous_data'
    else:
        main_path = 'results/non_homogenous_data'
        
    model_path = f'{main_path}/{target_column}/{max_time}/{model_name}'

    if experiment_name is None:
        model_path = model_path + '/defaultExperiment'
    else:
        model_path = model_path + f'/{experiment_name}'
        
    # Read model
    with open(f'{model_path}/model.sav', 'rb') as f:
        model = pickle.load(f)
    
    # Read auc statistic
    with open(f'{model_path}/auc_statistics.pkl', 'rb') as f:
        auc_stats = pickle.load(f)
    return model, auc_stats

## Logistic Regression

In [10]:
# List of columns names which will be dropped from feature set before fitting the model
target_columns = ['cancer_specific_mortality', 'death_from_other_causes', 'bcr', 'mts']

# list of months which will be tried out
trainable_months = list(range(24, 225, 12))

is_homogenous_dataset = False
experiment_name = None

for target_column in ['cancer_specific_mortality', 'death_from_other_causes', 'mts', 'bcr']:
    print(f'target_column: {target_column}')

    target_columns.remove(target_column)
    df_train = data_train.copy().drop(target_columns, axis=1)
    df_test = data_test.copy().drop(target_columns, axis=1)

    for month in trainable_months:
        print(f'\tTrainable month: {month}')

        # Define the logistic regression model
        model = LogisticRegression(solver='liblinear', random_state=0)
        model_name = type(model).__name__
        
        # Run the experiment
        run_experiment(df_train=df_train, df_test=df_test, model=model, 
                    max_time=month, target_column=target_column, 
                    is_homogenous_dataset=is_homogenous_dataset,
                    experiment_name=experiment_name)

target_column: cancer_specific_mortality
	Trainable month: 24
	Trainable month: 36
	Trainable month: 48
	Trainable month: 60
	Trainable month: 72
	Trainable month: 84
	Trainable month: 96
	Trainable month: 108
	Trainable month: 120
	Trainable month: 132
	Trainable month: 144
	Trainable month: 156
	Trainable month: 168
	Trainable month: 180
	Trainable month: 192
	Trainable month: 204
	Trainable month: 216
target_column: death_from_other_causes
	Trainable month: 24
	Trainable month: 36
	Trainable month: 48
	Trainable month: 60
	Trainable month: 72
	Trainable month: 84
	Trainable month: 96
	Trainable month: 108
	Trainable month: 120
	Trainable month: 132
	Trainable month: 144
	Trainable month: 156
	Trainable month: 168
	Trainable month: 180
	Trainable month: 192
	Trainable month: 204
	Trainable month: 216
target_column: mts
	Trainable month: 24
	Trainable month: 36
	Trainable month: 48
	Trainable month: 60
	Trainable month: 72
	Trainable month: 84
	Trainable month: 96
	Trainable month: 10

## Random Forest

In [10]:
# List of columns names which will be dropped from feature set before fitting the model
target_columns = ['cancer_specific_mortality', 'death_from_other_causes', 'bcr', 'mts']

# list of months which will be tried out
trainable_months = list(range(24, 225, 12))

is_homogenous_dataset = False
experiment_name = None

for target_column in ['cancer_specific_mortality', 'death_from_other_causes', 'mts', 'bcr']:
    print(f'target_column: {target_column}')

    target_columns.remove(target_column)
    df_train = data_train.copy().drop(target_columns, axis=1)
    df_test = data_test.copy().drop(target_columns, axis=1)

    for month in trainable_months:
        print(f'\tTrainable month: {month}')

        # Define the Random Forest model
        model = RandomForestClassifier(n_estimators=400, random_state=0, criterion='entropy', max_depth=3,
                              max_samples=0.3, max_features=0.1)
        model_name = type(model).__name__
        
        # Run the experiment
        run_experiment(df_train=df_train, df_test=df_test, model=model, 
                    max_time=month, target_column=target_column, 
                    is_homogenous_dataset=is_homogenous_dataset,
                    experiment_name=experiment_name)

target_column: cancer_specific_mortality
	Trainable month: 24
	Trainable month: 36
	Trainable month: 48
	Trainable month: 60
	Trainable month: 72
	Trainable month: 84
	Trainable month: 96
	Trainable month: 108
	Trainable month: 120
	Trainable month: 132
	Trainable month: 144
	Trainable month: 156
	Trainable month: 168
	Trainable month: 180
	Trainable month: 192
	Trainable month: 204
	Trainable month: 216
target_column: death_from_other_causes
	Trainable month: 24
	Trainable month: 36
	Trainable month: 48
	Trainable month: 60
	Trainable month: 72
	Trainable month: 84
	Trainable month: 96
	Trainable month: 108
	Trainable month: 120
	Trainable month: 132
	Trainable month: 144
	Trainable month: 156
	Trainable month: 168
	Trainable month: 180
	Trainable month: 192
	Trainable month: 204
	Trainable month: 216
target_column: mts
	Trainable month: 24
	Trainable month: 36
	Trainable month: 48
	Trainable month: 60
	Trainable month: 72
	Trainable month: 84
	Trainable month: 96
	Trainable month: 10

## XGBoost

In [19]:
# List of columns names which will be dropped from feature set before fitting the model
target_columns = ['cancer_specific_mortality', 'death_from_other_causes', 'bcr', 'mts']

# list of months which will be tried out
trainable_months = list(range(24, 225, 12))

is_homogenous_dataset = False
experiment_name = None

for target_column in ['cancer_specific_mortality', 'death_from_other_causes', 'mts', 'bcr']:
    print(f'target_column: {target_column}')

    target_columns_drop = target_columns.copy()
    target_columns_drop.remove(target_column)
    df_train = data_train.copy().drop(target_columns_drop, axis=1)
    df_test = data_test.copy().drop(target_columns_drop, axis=1)

    for month in trainable_months:
        print(f'\tTrainable month: {month}')

        # Define the XGBoost model
        model = XGBClassifier(objective="binary:logistic", random_state=0, booster='gbtree',
                      colsample_bytree=0.2, gamma=0.1, learning_rate=0.1,
                      max_depth=4, n_estimators=500, subsample = 0.2,
                     min_child_weight=3, scale_pos_weight=0.7, 
                      reg_lambda=1, reg_alpha=0.001)
        model_name = type(model).__name__
        
        # Run the experiment
        run_experiment(df_train=df_train, df_test=df_test, model=model, 
                    max_time=month, target_column=target_column, 
                    is_homogenous_dataset=is_homogenous_dataset,
                    experiment_name=experiment_name)

target_column: cancer_specific_mortality
	Trainable month: 24
	Trainable month: 36
	Trainable month: 48
	Trainable month: 60
	Trainable month: 72
	Trainable month: 84
	Trainable month: 96
	Trainable month: 108
	Trainable month: 120
	Trainable month: 132
	Trainable month: 144
	Trainable month: 156
	Trainable month: 168
	Trainable month: 180
	Trainable month: 192
	Trainable month: 204
	Trainable month: 216
target_column: death_from_other_causes
	Trainable month: 24
	Trainable month: 36
	Trainable month: 48
	Trainable month: 60
	Trainable month: 72
	Trainable month: 84
	Trainable month: 96
	Trainable month: 108
	Trainable month: 120
	Trainable month: 132
	Trainable month: 144
	Trainable month: 156
	Trainable month: 168
	Trainable month: 180
	Trainable month: 192
	Trainable month: 204
	Trainable month: 216
target_column: mts
	Trainable month: 24
	Trainable month: 36
	Trainable month: 48
	Trainable month: 60
	Trainable month: 72
	Trainable month: 84
	Trainable month: 96
	Trainable month: 10