In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

### DECONSTRUCT KEY & VALUE

In [4]:
def key_value(blob):
    key = list(blob)[0]
    value = blob[key]
    
    return key, value

### DECISION MACHINE FOR DATASET LABELS

In [1]:
class decision_machine():
    
    # CALIBRATE QUANTILE THRESHOLDS & RETURN NEW LABELS
    def calibrate(self, dataframe, settings):
        
        # GET LABEL LOG RETURN VALUES
        labels = self.log_returns(dataframe)
        
        # SET QUANTILE THRESHOLDS
        self.lower = labels.quantile(settings['lower'])
        self.upper = labels.quantile(settings['upper'])
        
        return self.vote(labels)
        
    # CONVERT & RETURN NEW LABELS
    def convert(self, dataframe):
        
        # GET LABEL LOG RETURN VALUES
        labels = self.log_returns(dataframe)
        return self.vote(labels)
        
    # DECIDE LABEL TAG VIA VOTE
    def vote(self, labels):
        container = []
        
        # BUY   = 0
        # SELL  = 1
        # HOLD  = 2
        
        # LOOP THROUGH LABELS
        for label in labels:

            # BUY
            if (label < self.lower):
                container.append(0)

            # SELL
            elif label > self.upper:
                container.append(1)

            # HOLD
            else:
                container.append(2)

        return container
    
    # GET LOG RETURN VALUE DF COLUMN
    def log_returns(self, dataframe):
        return np.log(dataframe['label'] / dataframe['label'].shift(1))

### CLONE DATAFRAME & REPLACE LABEL COLUMN

In [5]:
def replace_labels(old_dataframe, labels):
    
    # CLONE & REPLACE
    dataframe = old_dataframe.copy()
    dataframe['label'] = labels
    
    return dataframe

### RENAME DICT KEYS

In [70]:
def rename_settings(old_settings):
    new_settings = {}
    
    for key in old_settings.keys():
        new_settings['model__' + key] = old_settings[key]
        
    return new_settings

### BALANCE PREDICTIONS

In [94]:
def balance_predictions(predictions):
    collection = []
    
    # LOOP THROUGH PREDICTIONS & CHECK LENGTHS
    for values in predictions.values():
        collection.append(len(values))
    
    # FIND THE SMALLEST SIZE
    smallest =  min(collection)
    
    # CUT OFF EXTRA ELEMENTS
    for key in predictions.keys():
        predictions[key] = predictions[key][-smallest:]
    
    return predictions

### GRAPH FITTING MSE

In [3]:
def graph_mse(data):
    plt.plot(data)
    plt.ylabel('mean squared error')
    plt.xlabel('epochs')
    plt.show()

### CREATE TRAIN & TEST CONFUSION MATRIXES

In [2]:
def train_matrixes(label_table, classifier_table):
    container = {}
    
    # TRUE LABELS
    matrix_labels = label_table['label'].to_numpy()
    
    # LOOP THROUGH THE COLUMNS
    for column in classifier_table.columns:
    
        # MODEL PREDICTIONS
        predictions = classifier_table[column].to_numpy()

        # CREATE A CONFUSION MATRIX
        matrix = confusion_matrix(
            matrix_labels,
            predictions,
            labels=[0, 1, 2]
        )

        # PUSH IT TO THE CONTAINER
        container[column] = {
            'training': {
                'graph': 'matrix',
                'data': matrix.tolist()
            }
        }
        
    return container

In [9]:
def validation_matrixes(container, prediction_table, labels):
    
    # LOOP THROUGH PREDICTION COLUMNS
    for column in prediction_table.columns:
    
        # MODEL PREDICTIONS
        predictions = prediction_table[column].to_numpy()

        # CREATE A CONFUSION MATRIX
        matrix = confusion_matrix(
            labels,
            predictions,
            labels=[0, 1, 2]
        )

        # PUSH IT TO THE MATRIX CONTAINER
        container[column]['validation'] = {
            'graph': 'matrix',
            'data': matrix.tolist()
        }
    
    return container

### REGRESSION FITTING METRICS

In [10]:
def regression_fitting_metrics(regression_ensemble):
    
    # STITCH TOGETHER REGRESSION FITTING METRICS
    fitting = {}
    
    for blob in regression_ensemble.models:
        collection = {}
        for index, model in enumerate(blob):

            # DEFAULT BAR TYPE
            bar_type = 'line'

            # IF THE SCORE HAS A R2 PROPERTY
            if 'R2' in model.score:

                # CHANGE BAR TYPE
                bar_type = 'bar'

                # ADD THE SUB-PROPERTIES IF THEY DONT ALREADY EXIST
                if 'R2' not in collection:
                    for key in model.score.keys():
                        collection[key] = {}

                # LOOP IN VALUES
                for key in model.score.keys():
                    collection[key]['fold_' + str(index)] = model.score[key]

            # OTHERWISE, INJECT NORMALLY
            else:
                collection['fold_' + str(index)] = model.score

        fitting[model.name] = {
            'graph': bar_type,
            'data': collection
        }
        
    return fitting