<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Read-in-Data" data-toc-modified-id="Read-in-Data-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Read in Data</a></span></li><li><span><a href="#Generate-Models" data-toc-modified-id="Generate-Models-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Generate Models</a></span><ul class="toc-item"><li><span><a href="#Support-Vector-Machines" data-toc-modified-id="Support-Vector-Machines-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Support Vector Machines</a></span></li><li><span><a href="#Neural-Network" data-toc-modified-id="Neural-Network-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>Neural Network</a></span></li></ul></li><li><span><a href="#Run-Models" data-toc-modified-id="Run-Models-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Run Models</a></span></li></ul></div>

In [23]:
import pandas as pd 
import numpy as np 
import plotly.graph_objects as go 
import seaborn as sns 
import matplotlib.pyplot as plt 
from tqdm import tqdm 
import tensorflow as tf

## Read in csv 
tot_df_clean_year = pd.read_csv('model_data.csv', 
                                index_col = ['year', 'ID']) 
tot_df_clean_year.head() 

Unnamed: 0_level_0,Unnamed: 1_level_0,lead_rating,rating,IQ_TOTAL_ASSETS,IQ_TOTAL_REV,IQ_NI_CF,IQ_AR,IQ_GP,IQ_AP,IQ_TOTAL_LIAB,IQ_TOTAL_DEBT,IQ_CASH_FINAN,IQ_TOTAL_EQUITY,IQ_CASH_EQUIV
year,ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2001,JPM,-1.0,1.0,-0.945447,-0.002409,-0.000409,0.0,-0.002409,0.004809,0.001093,-0.005029,-0.009855,-0.001093,-0.026098
2001,F,-1.0,-1.0,0.143388,-0.042495,-0.004145,-0.005853,-0.008635,0.002457,0.020101,0.018932,-0.015464,-0.020101,0.004941
2001,AKAM,-1.0,-0.0,,0.068819,-1.117473,0.036566,0.020109,0.061503,0.660347,0.497948,0.001225,-0.660347,0.1646
2001,CHTR,-1.0,-0.0,1.445774,-0.000547,-0.001261,0.000173,0.003984,0.023845,0.00728,0.048779,0.004572,-0.00728,0.000449
2001,DOV,-0.0,-0.0,-0.958897,-0.013956,-0.014696,-0.01974,-0.014525,-0.008472,-0.020854,-0.027863,-0.015357,0.020854,0.010926


## Read in Data

In [24]:
## Test and train split for time series
train = tot_df_clean_year.head(
    int(tot_df_clean_year.shape[0] * 0.8 ) 
) 

test = tot_df_clean_year.tail(
    int(tot_df_clean_year.shape[0] * 0.2) 
)

## Split into x and y 
def SplitData(df): 
    y = df.lead_rating 
    x = df.drop('lead_rating', axis = 1) 
    
    ## Standardize entire data 
    x = (x  - x.mean()) / x.std() 
    return x, y 

train_x, train_y = SplitData(train) 
test_x, test_y = SplitData(test) 

## Generate Models 
### Support Vector Machines

In [25]:
from sklearn.model_selection import TimeSeriesSplit 
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score


def SVM_Fit(train_x, train_y, kernel,
            params = [10**x for x in np.arange(-1,3,0.9)]): 
    '''Fit the SVM Machine given the kernel type, parameters, 
    data''' 
    
    if kernel == 'linear': 
        parameters = {'C': params} 
    else: 
        parameters = {'C': params, 
                     'gamma': params} 
    
    cv = TimeSeriesSplit(n_splits = 5) 
    
    model = GridSearchCV(estimator = SVC(kernel = kernel), 
                        param_grid = parameters, 
                        cv = cv, 
                        verbose = 1) 
    
    model.fit(train_x, train_y) 
    return model

In [26]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV

def RandomForestsModel(train_x, train_y): 
    '''Random Forest Model'''
    rf = RandomForestClassifier(random_state = 200) 
    rf.get_params()
    
    # Number of trees in random forest
    n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
    # Number of features to consider at every split
    max_features = ['auto', 'sqrt']
    # Maximum number of levels in tree
    max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
    max_depth.append(None)
    # Minimum number of samples required to split a node
    min_samples_split = [2, 5, 10]
    # Minimum number of samples required at each leaf node
    min_samples_leaf = [1, 2, 4]
    # Method of selecting samples for training each tree
    bootstrap = [True, False]
    # Create the random grid
    random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}
    
    cv = TimeSeriesSplit(n_splits = 5) 
    
    rf_random = RandomizedSearchCV(estimator = rf, 
                              param_distributions = random_grid, 
                              n_iter = 100, cv = cv, verbose = 2, 
                              random_state = 200, n_jobs = -1)
    
    rf_random.fit(train_x, train_y) 
    return rf_random 

In [27]:
def Predict(fitted_model, test_x, test_y, 
           name):
    '''Prediction Accuracy'''
    prediction = fitted_model.predict(test_x) 
    score = accuracy_score(prediction, test_y) 
    prediction = pd.DataFrame({'prediction_{}'.format(name): prediction})
    print('The {} Model Score is: {}'.format(name, score)) 
    return prediction, score    

### Neural Network

In [28]:
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(5)
])

model.stop_training = True

In [29]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer='adam',
              loss=loss_fn,
              metrics=['accuracy'])

## Run Models

In [None]:
# Category range must go from 0 to 5 instead of -2 to 2
y_train = train_y +2 
y_test = test_y +2

model.fit(train_x.to_numpy(), 
          y_train.to_numpy(),
          epochs=1000)


Train on 1452 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 7

Epoch 77/1000
Epoch 78/1000
Epoch 79/1000
Epoch 80/1000
Epoch 81/1000
Epoch 82/1000
Epoch 83/1000
Epoch 84/1000
Epoch 85/1000
Epoch 86/1000
Epoch 87/1000
Epoch 88/1000
Epoch 89/1000
Epoch 90/1000
Epoch 91/1000
Epoch 92/1000
Epoch 93/1000
Epoch 94/1000
Epoch 95/1000
Epoch 96/1000
Epoch 97/1000
Epoch 98/1000
Epoch 99/1000
Epoch 100/1000
Epoch 101/1000
Epoch 102/1000
Epoch 103/1000
Epoch 104/1000
Epoch 105/1000
Epoch 106/1000
Epoch 107/1000
Epoch 108/1000
Epoch 109/1000
Epoch 110/1000
Epoch 111/1000
Epoch 112/1000
Epoch 113/1000
Epoch 114/1000
Epoch 115/1000
Epoch 116/1000
Epoch 117/1000
Epoch 118/1000
Epoch 119/1000
Epoch 120/1000
Epoch 121/1000
Epoch 122/1000
Epoch 123/1000
Epoch 124/1000
Epoch 125/1000
Epoch 126/1000
Epoch 127/1000
Epoch 128/1000
Epoch 129/1000
Epoch 130/1000
Epoch 131/1000
Epoch 132/1000
Epoch 133/1000
Epoch 134/1000
Epoch 135/1000
Epoch 136/1000
Epoch 137/1000
Epoch 138/1000
Epoch 139/1000
Epoch 140/1000
Epoch 141/1000
Epoch 142/1000
Epoch 143/1000
Epoch 144/1000
Epo

Epoch 151/1000
Epoch 152/1000
Epoch 153/1000
Epoch 154/1000
Epoch 155/1000
Epoch 156/1000
Epoch 157/1000
Epoch 158/1000
Epoch 159/1000
Epoch 160/1000
Epoch 161/1000
Epoch 162/1000
Epoch 163/1000
Epoch 164/1000
Epoch 165/1000
Epoch 166/1000
Epoch 167/1000
Epoch 168/1000
Epoch 169/1000
Epoch 170/1000
Epoch 171/1000
Epoch 172/1000
Epoch 173/1000
Epoch 174/1000
Epoch 175/1000
Epoch 176/1000
Epoch 177/1000
Epoch 178/1000
Epoch 179/1000
Epoch 180/1000
Epoch 181/1000
Epoch 182/1000
Epoch 183/1000
Epoch 184/1000
Epoch 185/1000
Epoch 186/1000
Epoch 187/1000
Epoch 188/1000
Epoch 189/1000
Epoch 190/1000
Epoch 191/1000
Epoch 192/1000
Epoch 193/1000
Epoch 194/1000
Epoch 195/1000
Epoch 196/1000
Epoch 197/1000
Epoch 198/1000
Epoch 199/1000
Epoch 200/1000
Epoch 201/1000
Epoch 202/1000
Epoch 203/1000
Epoch 204/1000
Epoch 205/1000
Epoch 206/1000
Epoch 207/1000
Epoch 208/1000
Epoch 209/1000
Epoch 210/1000
Epoch 211/1000
Epoch 212/1000
Epoch 213/1000
Epoch 214/1000
Epoch 215/1000
Epoch 216/1000
Epoch 217/

Epoch 225/1000
Epoch 226/1000
Epoch 227/1000
Epoch 228/1000
Epoch 229/1000
Epoch 230/1000
Epoch 231/1000
Epoch 232/1000
Epoch 233/1000
Epoch 234/1000
Epoch 235/1000
Epoch 236/1000
Epoch 237/1000
Epoch 238/1000
Epoch 239/1000
Epoch 240/1000
Epoch 241/1000
Epoch 242/1000
Epoch 243/1000
Epoch 244/1000
Epoch 245/1000
Epoch 246/1000
Epoch 247/1000
Epoch 248/1000
Epoch 249/1000
Epoch 250/1000
Epoch 251/1000
Epoch 252/1000
Epoch 253/1000
Epoch 254/1000
Epoch 255/1000
Epoch 256/1000
Epoch 257/1000
Epoch 258/1000
Epoch 259/1000
Epoch 260/1000
Epoch 261/1000
Epoch 262/1000
Epoch 263/1000
Epoch 264/1000
Epoch 265/1000
Epoch 266/1000
Epoch 267/1000
Epoch 268/1000
Epoch 269/1000
Epoch 270/1000
Epoch 271/1000
Epoch 272/1000
Epoch 273/1000
Epoch 274/1000
Epoch 275/1000
  32/1452 [..............................] - ETA: 0s - loss: nan - accuracy: 0.0000e+00

In [None]:
results = model.evaluate(test_x.to_numpy(),  y_test.to_numpy(), verbose=2)
probability_model = tf.keras.Sequential([model, 
                                         tf.keras.layers.Softmax()])
predictions = np.argmax(probability_model.predict(test_x.to_numpy()),axis=1)
confusion_matrix = tf.math.confusion_matrix(y_test, predictions)
# np.argmax(predictions,axis=1)
print("Tensorflow",confusion_matrix.numpy(),sep="\n")

In [None]:
## Make predictions
sigmoid = SVM_Fit(train_x, train_y, 'sigmoid') 
rbf = SVM_Fit(train_x, train_y, 'rbf') 
linear = SVM_Fit(train_x, train_y,'linear')
#poly = SVM_Fit(train_x, train_y, 'poly') 
poly = SVC(kernel = 'poly').fit(train_x, train_y)

rf = RandomForestsModel(train_x, train_y) 

In [None]:
## SVM Score
sigmoid_predict, sigmoid_score = Predict(sigmoid, test_x, test_y, 'sigmoid') 
lin_predict, lin_score = Predict(linear, test_x, test_y, 'linear') 
poly_predict, poly_score = Predict(poly, test_x, test_y, 'poly') 
rbf_predict, rbf_score = Predict(rbf, test_x, test_y, 'radial')

## TF Score 
tf_score = accuracy_score(predictions, y_test.to_numpy()) 

## Random Forest Score
random_predict, random_score = Predict(rf, test_x, test_y, 
                                      'Random Forest')

In [None]:
from sklearn.metrics import confusion_matrix
print("Sigmoid",confusion_matrix(test_y,sigmoid_predict,normalize="true"),sep="\n")
print("Linear",confusion_matrix(test_y,lin_predict,normalize="true"),sep="\n")
print("Poly",confusion_matrix(test_y,poly_predict,normalize="true"),sep="\n")
print("Radial",confusion_matrix(test_y,rbf_predict,normalize="true"),sep="\n")
print("Random Forest",confusion_matrix(test_y,random_predict,normalize="true"),sep="\n")

In [None]:
fig = go.Figure() 
model_names = ['Sigmoid SVM', 'Radial SVM', 'Linear SVM', 'Polynomial SVM', 'Neural Network']

model_accuracy = [sigmoid_score, rbf_score, lin_score, poly_score, tf_score]

fig.add_trace(go.Bar(x = model_names, 
                    y = model_accuracy, 
                    text = model_accuracy, 
                    textposition = 'auto'))
fig.update_layout(title = 'Model Accuracy Scores Numeric Prediction')

fig.update_yaxes(title_text = 'Accuracy Score') 
fig.update_xaxes(title_text = "Model")
fig.show()