In [1]:
import ipynb.fs.full.training as training
import ipynb.fs.full.splitting as splitting
import ipynb.fs.full.misc as misc

In [2]:
import numpy as np
import pandas as pd

### ENSEMBLE CLASS

In [3]:
class create_ensemble():
    
    # STATIC VARS
    def __init__(self):
        self.models = []
    
    # ADD A MODEL TO THE ENSEMBLE
    def add_model(self, model):
        self.models.append(model)

    # PREDICT WITH ALL ENSEMBLE MODELS
    def predict(self, dataset, show_mean=False):
        
        # CONTAINER FOR UNBALANCED PREDICTIONS
        unbalanced = {}
        
        # LOOP THROUGH MODELS
        for model in self.models:
            
            # FETCH AVERAGE PREDICTION FROM CV MODELS
            if type(model) == list:
                collection = []
                
                for sub_model in model:
                    predictions = sub_model.predict(dataset)
                    collection.append(predictions)
                    
                # TRANSPOSE & CALCULATE AVERAGE PER ROW
                averages = np.array(collection).transpose().mean(axis=1)

                # ADD TO THE UNBALANCED COLLECTION
                unbalanced[model[0].name] = averages
            
            # OTHERWISE, PREDICT NORMALLY & ADD
            else:
                predictions = model.predict(dataset)
                unbalanced[model.name] = predictions
                
        # BALANCE PREDICTIONS AROUND WINDOW SIZE
        balanced = misc.balance_predictions(unbalanced)
        
        # CREATE NEW DATAFRAME
        dataframe = pd.DataFrame()
        
        # ADD PREDICTIONS TO DATAFRAME
        for key in balanced.keys():
            dataframe[key] = balanced[key]
           
        # IF SHOW MEAN IS TRUTHY
        if show_mean:
            dataframe['mean'] = dataframe.to_numpy().mean(axis=1)
                
        return dataframe

### REGRESSION ENSEMBLE

In [4]:
def regression(primary_dataset, config):
    
    # REQUESTED MODELS
    models = config['regression_ensemble']['models']
    
    # CREATE ENSEMBLE OUTLINE
    ensemble = create_ensemble()
    
    # CREATE NEW DATAFRAME
    predictions_table = pd.DataFrame()
    
    # LOOP THROUGH MODELS
    for index, item in enumerate(models):

        # MODEL PROPS
        name, settings = misc.key_value(item)
        
        # ADD INDEX TO THE NAME
        name = '{}_{}'.format(name, index)

        # FOLD CONTAINER
        folds = []

        # IF THE MODEL HAS EXTRA SETTINGS
        if settings:
            folds = splitting.timeseries(
                primary_dataset['train'],
                config['splitting']['validation_folds'],
                window=settings['morph']['window']
            )

        # OTHERWISE
        else:
            folds = splitting.timeseries(
                primary_dataset['train'],
                config['splitting']['validation_folds']
            )

        # RESULTS
        temp_models = []
        temp_predictions = []
        temp_labels = []

        # LOOP THROUGH FOLDS
        for fold_index, fold in enumerate(folds):
            
            # PRINT A MESSAGE
            print('TRAINING {} FOLD #{}'.format(name.upper(), fold_index + 1))
            
            # TRAIN THE MODEL
            model = training.start(fold['train'], name, settings)
            
            # PREDICT WITH THE MODEL
            predictions = model.predict(fold['test'])

            # APPEND TO COLLECTIONS
            temp_models.append(model)
            temp_predictions.append(predictions)
            
            # IF TRAINING WAS PERFORMED ON WINDOWED FOLDS
            if settings:
                window = settings['morph']['window']
                temp_labels.append(fold['test']['labels'][window:])
            
            # OTHERWISE, APPEND DEFAULT
            else:
                temp_labels.append(fold['test']['labels'])

        # ADD THE MODEL COLLECTION TO THE ENSEMBLE
        ensemble.add_model(temp_models)
        
        # ADD DATAFRAME COLUMNS
        predictions_table[name] = np.concatenate(temp_predictions)
        
        # PRINT A SPACE WHEN NECESSARY
        if index < len(models) - 1:
            print()
            
    # FINALLY, ADD IN LABELS
    predictions_table['label'] = np.concatenate(temp_labels)

    return ensemble, predictions_table

### CLASSIFIER ENSEMBLE

In [3]:
def classifier(dataframe, config):
    
    # CONVERT DATAFRAME TO DICT
    dataset = {
        'features': dataframe.loc[:, dataframe.columns != 'label'].to_numpy(),
        'labels': dataframe['label'].to_numpy()
    }
    
    # REQUESTED MODELS
    models = config['classification_ensemble']['models']
    
    # CREATE ENSEMBLE OUTLINE
    ensemble = create_ensemble()
    
    # CREATE A NEW DATAFRAME
    predictions_table = pd.DataFrame()

    # LOOP THROUGH MODELS
    for index, item in enumerate(config['classification_ensemble']['models']):

        # MODEL PARAMS
        name, settings = misc.key_value(item)
        
        # ADD INDEX TO THE NAME
        name = '{}_{}'.format(name, index)
        
        # PRINT A MESSAGE
        print('TRAINING {} MODEL'.format(name.upper()))
    
        # TRAIN & PREDICT WITH THE MODEL
        model = training.start(dataset, name, settings)
        predictions = model.predict(dataset)
        
        # ADD THE MODEL TO THE ENSEMBLE
        ensemble.add_model(model)
        
        # ADD PREDICTIONS TO THE TABLE
        predictions_table[name] = predictions
    
    return ensemble, predictions_table