## Loading and Cleaning the Corporate Credit Rating Data

In [None]:
import numpy as np 
import pandas as pd
from matplotlib import pyplot as plt 

In [392]:
df = pd.read_csv('corporate_rating.csv')

In [393]:
print(df.shape[0], "records, each with", df.shape[1], "features")

2029 records, each with 31 features


In [394]:
df.Rating.value_counts()

BBB    671
BB     490
A      398
B      302
AA      89
CCC     64
AAA      7
CC       5
C        2
D        1
Name: Rating, dtype: int64

We have an unbalanced dataset, rebalance by separating between IG (1) and high-yield (0) bonds

In [395]:
grade_dict = {'AAA': 1,
              'AA': 1, 
              'A': 1,
              'BBB': 1,
              'BB': 0,
              'B': 0,
              'CCC': 0,
              'CC': 0,
              'C':0,
              'D':0}

df['Grade'] = df.Rating.map(grade_dict)

In [396]:
df.Grade.value_counts()

1    1165
0     864
Name: Grade, dtype: int64

In [397]:
df.reset_index(inplace = True, drop = True)

In [398]:
df.head()

Unnamed: 0,Rating,Name,Symbol,Rating Agency Name,Date,Sector,currentRatio,quickRatio,cashRatio,daysOfSalesOutstanding,...,freeCashFlowOperatingCashFlowRatio,freeCashFlowPerShare,cashPerShare,companyEquityMultiplier,ebitPerRevenue,enterpriseValueMultiple,operatingCashFlowPerShare,operatingCashFlowSalesRatio,payablesTurnover,Grade
0,A,Whirlpool Corporation,WHR,Egan-Jones Ratings Company,11/27/2015,Consumer Durables,0.945894,0.426395,0.09969,44.203245,...,0.437551,6.810673,9.809403,4.008012,0.049351,7.057088,15.565438,0.058638,3.906655,1
1,BBB,Whirlpool Corporation,WHR,Egan-Jones Ratings Company,2/13/2014,Consumer Durables,1.033559,0.498234,0.20312,38.991156,...,0.541997,8.625473,17.40227,3.156783,0.048857,6.460618,15.91425,0.067239,4.002846,1
2,BBB,Whirlpool Corporation,WHR,Fitch Ratings,3/6/2015,Consumer Durables,0.963703,0.451505,0.122099,50.841385,...,0.513185,9.693487,13.103448,4.094575,0.044334,10.49197,18.888889,0.074426,3.48351,1
3,BBB,Whirlpool Corporation,WHR,Fitch Ratings,6/15/2012,Consumer Durables,1.019851,0.510402,0.176116,41.161738,...,-0.14717,-1.015625,14.440104,3.63095,-0.012858,4.080741,6.901042,0.028394,4.58115,1
4,BBB,Whirlpool Corporation,WHR,Standard & Poor's Ratings Services,10/24/2016,Consumer Durables,0.957844,0.495432,0.141608,47.761126,...,0.451372,7.135348,14.257556,4.01278,0.05377,8.293505,15.808147,0.058065,3.85779,1


In [399]:
df.describe()

Unnamed: 0,currentRatio,quickRatio,cashRatio,daysOfSalesOutstanding,netProfitMargin,pretaxProfitMargin,grossProfitMargin,operatingProfitMargin,returnOnAssets,returnOnCapitalEmployed,...,freeCashFlowOperatingCashFlowRatio,freeCashFlowPerShare,cashPerShare,companyEquityMultiplier,ebitPerRevenue,enterpriseValueMultiple,operatingCashFlowPerShare,operatingCashFlowSalesRatio,payablesTurnover,Grade
count,2029.0,2029.0,2029.0,2029.0,2029.0,2029.0,2029.0,2029.0,2029.0,2029.0,...,2029.0,2029.0,2029.0,2029.0,2029.0,2029.0,2029.0,2029.0,2029.0,2029.0
mean,3.529607,2.653986,0.667364,333.795606,0.278447,0.431483,0.497968,0.587322,-37.517928,-73.974193,...,0.40955,5094.719,4227.549,3.323579,0.437454,48.287985,6515.123,1.447653,38.002718,0.574174
std,44.052361,32.944817,3.583943,4447.839583,6.064134,8.984982,0.525307,11.224622,1166.17222,2350.275719,...,3.796488,146915.6,122400.0,87.529866,8.984299,529.118961,177529.0,19.483294,758.923588,0.494589
min,-0.932005,-1.893266,-0.192736,-811.845623,-101.845815,-124.343612,-14.800817,-124.343612,-40213.17829,-87162.16216,...,-120.91601,-4912.742,-19.15035,-2555.419643,-124.343612,-3749.921337,-11950.49,-4.461837,-76.66285,0.0
25%,1.07193,0.602825,0.13063,22.905093,0.021006,0.025649,0.233127,0.04461,0.019176,0.028112,...,0.271478,0.4119924,1.566038,2.046822,0.028057,6.238066,2.356735,0.073886,2.205912,0.0
50%,1.493338,0.985679,0.297493,42.37412,0.064753,0.084965,0.414774,0.107895,0.045608,0.074421,...,0.644529,2.131742,3.686513,2.652456,0.087322,9.274398,4.352584,0.13305,5.759722,1.0
75%,2.166891,1.45382,0.624906,59.323563,0.114807,0.144763,0.849693,0.176181,0.077468,0.135036,...,0.836949,4.230253,8.086152,3.658331,0.149355,12.911759,7.319759,0.240894,9.480892,1.0
max,1725.505005,1139.541703,125.917417,115961.6374,198.517873,309.694856,2.702533,410.182214,0.487826,2.439504,...,34.594086,5753380.0,4786803.0,2562.871795,309.694856,11153.60709,6439270.0,688.526591,20314.8804,1.0


In [410]:
data = df.select_dtypes(exclude=["bool","object_"])
data.head()

Unnamed: 0,currentRatio,quickRatio,cashRatio,daysOfSalesOutstanding,netProfitMargin,pretaxProfitMargin,grossProfitMargin,operatingProfitMargin,returnOnAssets,returnOnCapitalEmployed,...,freeCashFlowOperatingCashFlowRatio,freeCashFlowPerShare,cashPerShare,companyEquityMultiplier,ebitPerRevenue,enterpriseValueMultiple,operatingCashFlowPerShare,operatingCashFlowSalesRatio,payablesTurnover,Grade
0,0.945894,0.426395,0.09969,44.203245,0.03748,0.049351,0.176631,0.06151,0.041189,0.091514,...,0.437551,6.810673,9.809403,4.008012,0.049351,7.057088,15.565438,0.058638,3.906655,1
1,1.033559,0.498234,0.20312,38.991156,0.044062,0.048857,0.175715,0.066546,0.053204,0.1048,...,0.541997,8.625473,17.40227,3.156783,0.048857,6.460618,15.91425,0.067239,4.002846,1
2,0.963703,0.451505,0.122099,50.841385,0.032709,0.044334,0.170843,0.059783,0.032497,0.075955,...,0.513185,9.693487,13.103448,4.094575,0.044334,10.49197,18.888889,0.074426,3.48351,1
3,1.019851,0.510402,0.176116,41.161738,0.020894,-0.012858,0.138059,0.04243,0.02569,-0.027015,...,-0.14717,-1.015625,14.440104,3.63095,-0.012858,4.080741,6.901042,0.028394,4.58115,1
4,0.957844,0.495432,0.141608,47.761126,0.042861,0.05377,0.17772,0.065354,0.046363,0.096945,...,0.451372,7.135348,14.257556,4.01278,0.05377,8.293505,15.808147,0.058065,3.85779,1


In [415]:
data = np.array(data)
np.random.shuffle(data)


X = data[:,0:-1]
X = (X-X.mean(axis=0))/X.std(axis=0)
X = X.T
X.shape

(25, 2029)

In [416]:
Y = data[:,-1]
Y.shape

(2029,)

In [417]:
# training set
Y_train = Y[0:1000]
X_train = X[:,0:1000]

# test set
Y_test = Y[1000:Y.size]
X_test = X[:,1000:Y.size]

print(Y_train.shape)
print(Y_test.shape)
print(X_train.shape)
print(X_test.shape)

(1000,)
(1029,)
(25, 1000)
(25, 1029)


## Building a Neural Network

In [418]:
def softmax(Z):
    A = np.exp(Z)/sum(np.exp(Z))
    return A 

def one_hot(Y):
    one_hot_Y = np.zeros((Y.max()+1, Y.size))
    counter = 0
    for i in Y:
        one_hot_Y[i, counter] = 1
        counter += 1
    return(one_hot_Y)

In [419]:
# layers 1 and 2
l2 = 2
l1 = round((2/3) * 25 + l2)

In [420]:
def gradient_descent(l1, l2, X, Y, alpha, iterations):
    
    _,m = X_train.shape
    
    W1 = np.random.rand(l1, 25) - 0.5
    b1 = np.random.rand(l1, 1) - 0.5
    W2 = np.random.rand(l2, l1) - 0.5
    b2 = np.random.rand(l2, 1) - 0.5 
    
    for i in range(iterations):
        # forward propagation
        Z1 = W1.dot(X) + b1
        A1 = np.maximum(Z1, 0)
        Z2 = W2.dot(A1) + b2
        A2 = softmax(Z2)
        
        # backward propagation
        Y = Y.astype('int64')
        one_hot_Y = one_hot(Y)
        dZ2 = A2 - one_hot_Y
        dW2 = 1 / m * dZ2.dot(A1.T)
        db2 = 1 / m * np.sum(dZ2)
        dZ1 = W2.T.dot(dZ2) * (Z1 > 0)
        dW1 = 1 / m * dZ1.dot(X.T)
        db1 = 1 / m * np.sum(dZ1)
        
        # update params
        W1 = W1 - alpha * dW1
        b1 = b1 - alpha * db1 
        W2 = W2 - alpha * dW2 
        b2 = b2 - alpha * db2
        
        predictions = np.argmax(A2, 0)
        accuracy = np.sum(predictions == Y) / Y.size
        
        if i % 5000 == 0:
            print("Accuracy:", accuracy, "at iteration", i)
    
    return accuracy

In [421]:
accuracy = gradient_descent(l1, l2, X_train, Y_train, 0.3, 200000)

Accuracy: 0.441 at iteration 0
Accuracy: 0.749 at iteration 5000
Accuracy: 0.774 at iteration 10000
Accuracy: 0.782 at iteration 15000
Accuracy: 0.789 at iteration 20000
Accuracy: 0.788 at iteration 25000
Accuracy: 0.798 at iteration 30000
Accuracy: 0.783 at iteration 35000
Accuracy: 0.804 at iteration 40000
Accuracy: 0.803 at iteration 45000
Accuracy: 0.807 at iteration 50000
Accuracy: 0.814 at iteration 55000
Accuracy: 0.808 at iteration 60000
Accuracy: 0.813 at iteration 65000
Accuracy: 0.817 at iteration 70000
Accuracy: 0.807 at iteration 75000
Accuracy: 0.808 at iteration 80000
Accuracy: 0.807 at iteration 85000
Accuracy: 0.823 at iteration 90000
Accuracy: 0.818 at iteration 95000
Accuracy: 0.817 at iteration 100000
Accuracy: 0.83 at iteration 105000
Accuracy: 0.835 at iteration 110000
Accuracy: 0.841 at iteration 115000
Accuracy: 0.836 at iteration 120000
Accuracy: 0.81 at iteration 125000
Accuracy: 0.805 at iteration 130000
Accuracy: 0.836 at iteration 135000
Accuracy: 0.84 at i

In [422]:
accuracy

0.86