### IMPORTS

In [7]:
import numpy as np

In [15]:
from sklearn.linear_model import LinearRegression, SGDClassifier, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import ComplementNB

In [2]:
import tensorflow as tf
from tensorflow.python.keras import Sequential, Input, Model
from tensorflow.python.keras.layers import Dense, LSTM, Dropout

In [3]:
config = tf.compat.v1.ConfigProto(gpu_options = 
                         tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=0.8)
# device_count = {'GPU': 1}
)
config.gpu_options.allow_growth = True
session = tf.compat.v1.Session(config=config)
tf.compat.v1.keras.backend.set_session(session)

In [6]:
import ipynb.fs.full.splitting as splitting
import ipynb.fs.full.misc as misc
import ipynb.fs.full.storage as storage

In [7]:
from tcn import TCN, tcn_full_summary

In [8]:
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

In [9]:
from sklearn.model_selection import GridSearchCV

In [10]:
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

In [11]:
from sklearn.pipeline import Pipeline

In [12]:
from sklearn.preprocessing import MinMaxScaler

In [1]:
from sklearn.metrics import r2_score
import math

### MODEL TYPES

In [6]:
class basic_model():
    
    # STATIC VARS
    def __init__(self, _model, _name, _settings, scaler=False):
        self.model = _model
        self.name = _name
        self.settings = _settings
        
        # CREATE SCALER IF NONE WAS GIVEN
        if scaler == False:
            self.scaler = MinMaxScaler(feature_range=(0, 1))
        else:
            self.scaler = scaler
    
    # TRAIN THE MODEL
    def train(self, dataset):
        
        # SCALE DATASET FEATURES & FIT THE MODEL
        scaled_features = self.scaler.fit_transform(dataset['features'])
        self.model.fit(scaled_features, dataset['labels'])
        
        # PREDICT & GET SCORES
        predictions = self.model.predict(scaled_features).ravel()
        self.score = reg_scores(predictions, dataset)

    # PREDICT WITH THE MODEL
    def predict(self, dataset):
        
        # SCALE DATASET FEATURES & PREDICT
        scaled_features = self.scaler.transform(dataset['features'])
        predictions = self.model.predict(scaled_features)
        return np.ndarray.flatten(predictions)
    
    # SAVE THE SCALER & MODEL
    def save(self, path):
        storage.save_pickle(self.scaler, path + '/scaler.pickle')
        storage.save_pickle(self.settings, path + '/settings.pickle')
        storage.save_pickle(self.model, path + '/model.pickle')

In [17]:
class generator_model():
    
    # STATIC VARS
    def __init__(self, _name, _settings, scaler=False, model=False):
        self.name = _name
        self.settings = _settings

        # CREATE NEW SCALER IF NONE WAS GIVEN
        if scaler == False:
            self.scaler = MinMaxScaler(feature_range=(0, 1))
        else:
            self.scaler = scaler
            
        # IF A MODEL WAS PASSED, ATTACH IT
        if model != False:
            self.model = model
    
    # CREATE BATCH GENERATOR FOR TRAINING
    def train_generator(self, dataset):
        
        # SCALE THE DATASETS FEATURES
        features = self.scaler.fit_transform(dataset['features'])
        labels = dataset['labels']
        
        # CREATE THE GENERATOR
        return splitting.generator(
            features,
            labels,
            self.settings['morph'],
            shuffle=True
        )
    
    # CREATE BATCH GENERATOR FOR TESTING
    def test_generator(self, dataset):
        
        # SCALE THE DATASETS FEATURES
        features = self.scaler.transform(dataset['features'])
        labels = [0] * len(dataset['features'])
        
        return splitting.generator(
            features,
            labels,
            self.settings['morph'],
            shuffle=False
        )
    
    # ADD THE MODEL TO THE CLASS
    def add_model(self, _model):
        self.model = _model
    
    # TRAIN THE MODEL
    def train(self, generator):
        self.model.fit_generator(
            generator,
            steps_per_epoch=len(generator),
            epochs=self.settings['epochs'],
            verbose=1
        )
        
        # SET THE TRAINING MSE SCORE
        self.score = self.model.history.history['loss']
        
    # PREDICT WITH THE MODEL
    def predict(self, dataframe):

        # CREATE GENERATOR & PREDICT
        generator = self.test_generator(dataframe)
        predictions = self.model.predict(generator)

        return np.ndarray.flatten(predictions)
    
    # SAVE THE SCALER, SETTINGS & MODEL
    def save(self, path):
        storage.save_pickle(self.scaler, path + '/scaler.pickle')
        storage.save_pickle(self.settings, path + '/settings.pickle')
        self.model.save(path + '/model.keras')

### METRIC FUNCTIONS

In [2]:
def reg_scores(predictions, dataset):
    
    # GET ROOT SQUARED SCORE
    R2 = r2_score(dataset['labels'], predictions)
    
    # CHECK DIMENSION OF FEATURES
    n_rows = len(dataset['features'])
    n_features = len(dataset['features'][0])
    
    # GET ADJUSTED ROOT SQUARED SCORE
    adjusted = (1 - (1 - R2) * ((n_rows - 1) / (n_rows - n_features - 1)))
    
    return {
        'R2': R2,
        'R2 ADJ': adjusted
    }

### LINEAR REGRESSION

In [16]:
def linear_regression(dataset, name, settings):
    
    # INSTANTIATE MODEL CLASS
    model = LinearRegression()
    model = basic_model(model, name, settings)
    
    # FIT THE MODEL USING TRAIN DATA
    model.train(dataset)
    
    return model

### LONG-SHORT TERM MEMORY

In [17]:
def long_short_term(dataset, name, settings):
    
    # INSTANTIATE MODEL & SCALER
    model = Sequential()
    outline = generator_model(name, settings)
    
    # CREATE BATCH GENERATOR
    train_generator = outline.train_generator(dataset)
    
    # LOOP THROUGH LAYERS
    for index, layer in enumerate(settings['layers']):
        
        # LAYER PROPS
        name, params = misc.key_value(layer)
        
        # ADD LAYER
        add_lstm_layer(
            name,
            params,
            model,
            index,
            train_generator[0][0].shape
        )
    
    # COMPILE THE MODEL
    model.compile(
        loss=settings['loss'],
        optimizer=settings['optimizer']
    )
    
    # ADD THE MODEL TO THE OUTLINE CLASS & OVERWRITE
    outline.add_model(model)
    model = outline
    
    # TRAIN THE MODEL
    model.train(train_generator)
    
    return model

In [18]:
def add_lstm_layer(name, settings, model, index, shape):
    
    # AVAILABLE LSTM LAYERS
    available = {
        'lstm': LSTM,
        'dropout': Dropout,
        'dense': Dense
    }

    # SELECT THE CORRECT FUNCTION
    func = available[name]

    # INJECT INPUT LAYER DIMENSIONS TO SETTINGS
    if index == 0:
        settings['input_shape'] = (shape[1], shape[2])
        model.add(func(**settings))

    # OTHERWISE, DEFAULT TO BASE SETTINGS
    else:
        model.add(func(**settings))

### TEMPORAL CONVOLUTIONAL NETWORK

In [19]:
def temporal_convolutional_network(dataset, name, settings):
    
    # MODEL OUTLINE
    outline = generator_model(name, settings)

    # TRAIN DATA GENERATOR
    train_generator = outline.train_generator(dataset)

    # INPUT & OUTPUT LAYER
    model_input = Input(batch_shape=train_generator[0][0].shape)
    model_output = []
    
    # LOOP THROUGH REQUESTED MODEL LAYERS
    for index, layer in enumerate(settings['layers']):

        # LAYER PROPS
        name, params = misc.key_value(layer)
        
        # CHURN MODEL LAYERS
        model_output = add_tcn_layers(
            name,
            params,
            index,
            model_input,
            model_output
        )

    # INSTANTIATE THE MODEL
    model = Model(inputs=[model_input], outputs=[model_output])

    # COMPILE THE MODEL
    model.compile(
        optimizer=settings['optimizer'],
        loss=settings['loss']
    )
    
    # ADD THE MODEL TO THE OUTLINE CLASS & OVERWRITE
    outline.add_model(model)
    model = outline

    # TRAIN THE MODEL
    model.train(train_generator)
    
    return model

In [20]:
def add_tcn_layers(name, settings, index, model_input, old_output):
    
    # NEW OUTPUT PLACEHOLDER
    new_output = []
    
    # AVAILABLE TNC LAYERS
    available = {
        'tcn': TCN,
        'dense': Dense,
        'dropout': Dropout
    }

    # SELECT THE CORRECT FUNCTION
    func = available[name]

    # INJECT WITH INPUT LAYER
    if index == 0:
        new_output = func(**settings)(model_input)

    # OTHERWISE, INJECT WITH OUTPUT LAYER
    else:
        new_output = func(**settings)(old_output)
        
    return new_output

### GRIDSEARCH FUNCTIONS

In [21]:
def search(model, dataset, settings):
    
    # CREATE MINI-PIPELINE WITH MODEL & SCALER
    pipeline = Pipeline([
        ('scaler',  MinMaxScaler(feature_range=(0, 1))),
        ('model', model)
    ])
    
    # ADD 'MODEL__' TO SETTINGS KEYS
    renamed_settings = misc.rename_settings(settings)
    
    # GRID SEARCH PARAMS
    grid_model = GridSearchCV(
        pipeline,
        renamed_settings,
        cv=3,
        n_jobs=-1
    )
    
    # FIT THE GRID
    grid_model.fit(
        dataset['features'],
        dataset['labels']
    )
    
    # RETURN THE BEST MODEL OUTCOME
    return grid_model.best_estimator_['model']

In [22]:
def construct_base(model, dataset, settings):
    
    # HAS STATIC PARAMETER
    if 'static' in settings:
        model = model(**settings['static'])
    
    # DOES NOT
    else:
        model = model()
    
    # HAS GRID SEARCH PARAMETERS
    if 'grid_search' in settings:
        model = search(model, dataset, settings['grid_search'])
    
    return model

### SUPPORT VECTOR MACHINE

In [23]:
def support_vector_classifier(dataset, name, settings):
    
    # INSTANTIATE MODEL & SCALER
    model = construct_base(SVC, dataset, settings)
    model = basic_model(model, name, settings)
    
    # FIT ON TRAIN DATASET
    model.train(dataset)
    
    return model

### LOGISTIC REGRESSION

In [24]:
def logistic_regression(dataset, name, settings):
    
    # INSTANTIATE MODEL & SCALER
    model = construct_base(LogisticRegression, dataset, settings)
    model = basic_model(model, name, settings)
    
    # FIT WITH TRAIN DATA
    model.train(dataset)
    
    return model

### RANDOM FOREST CLASSIFIER

In [25]:
def random_forest(dataset, name, settings):
    
    # INSTANTIATE MODEL & SCALER
    model = construct_base(RandomForestClassifier, dataset, settings)
    model = basic_model(model, name, settings)
    
    # FIT WITH TRAIN DATA
    model.train(dataset)
    
    return model

### SGD CLASSIFIER

In [9]:
def sgd(dataset, name, settings):
    
    # INSTANTIATE MODEL & SCALER
    model = construct_base(SGDClassifier, dataset, settings)
    model = basic_model(model, name, settings)
    
    # FIT WITH TRAIN DATA
    model.train(dataset)
    
    return model

### RIDGE CLASSIFIER

In [10]:
def ridge(dataset, name, settings):
    
    # INSTANTIATE MODEL & SCALER
    model = construct_base(RidgeClassifier, dataset, settings)
    model = basic_model(model, name, settings)
    
    # FIT WITH TRAIN DATA
    model.train(dataset)
    
    return model

### COMPLEMENT NB

In [13]:
def comp_nb(dataset, name, settings):
    
    # INSTANTIATE MODEL & SCALER
    model = construct_base(ComplementNB, dataset, settings)
    model = basic_model(model, name, settings)
    
    # FIT WITH TRAIN DATA
    model.train(dataset)
    
    return model

### START TRAINING A MODEL

In [14]:
def start(dataset, name, settings):
    
    # AVAILABLE MODELS
    available = {
        'linreg': linear_regression,
        'lstm': long_short_term,
        'tcn': temporal_convolutional_network,
        'svc': support_vector_classifier,
        'logreg': logistic_regression,
        'randforest': random_forest,
        'sgd': sgd,
        'ridge': ridge,
        'naivebays': comp_nb
    }
    
    # EXTRACT MODEL NAME
    model = name.split('_')[0]
    
    # SELECT THE CORRECT FUNCTION & START
    return available[model](dataset, name, settings)