<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Load-Data" data-toc-modified-id="Load-Data-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Load Data</a></span></li><li><span><a href="#Preprocess-Data" data-toc-modified-id="Preprocess-Data-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Preprocess Data</a></span><ul class="toc-item"><li><span><a href="#Convert-Score-Into-Grade" data-toc-modified-id="Convert-Score-Into-Grade-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Convert Score Into Grade</a></span></li><li><span><a href="#Map-Grade-To-Number" data-toc-modified-id="Map-Grade-To-Number-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>Map Grade To Number</a></span></li><li><span><a href="#Deal-With-The-NaN-Value" data-toc-modified-id="Deal-With-The-NaN-Value-2.3"><span class="toc-item-num">2.3&nbsp;&nbsp;</span>Deal With The NaN Value</a></span></li><li><span><a href="#Deal-With-Non-Numeric-Column" data-toc-modified-id="Deal-With-Non-Numeric-Column-2.4"><span class="toc-item-num">2.4&nbsp;&nbsp;</span>Deal With Non Numeric Column</a></span></li><li><span><a href="#Drop-Unnecessary-Column" data-toc-modified-id="Drop-Unnecessary-Column-2.5"><span class="toc-item-num">2.5&nbsp;&nbsp;</span>Drop Unnecessary Column</a></span></li><li><span><a href="#Split-Training-Set-And-Test-Set" data-toc-modified-id="Split-Training-Set-And-Test-Set-2.6"><span class="toc-item-num">2.6&nbsp;&nbsp;</span>Split Training Set And Test Set</a></span></li></ul></li><li><span><a href="#Create-Dataset-Class" data-toc-modified-id="Create-Dataset-Class-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Create Dataset Class</a></span><ul class="toc-item"><li><span><a href="#Create-Training-Set-Class" data-toc-modified-id="Create-Training-Set-Class-3.1"><span class="toc-item-num">3.1&nbsp;&nbsp;</span>Create Training Set Class</a></span></li><li><span><a href="#Create-Test-Set-Class" data-toc-modified-id="Create-Test-Set-Class-3.2"><span class="toc-item-num">3.2&nbsp;&nbsp;</span>Create Test Set Class</a></span></li></ul></li><li><span><a href="#Create-Dataloader" data-toc-modified-id="Create-Dataloader-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Create Dataloader</a></span></li><li><span><a href="#Define-Model" data-toc-modified-id="Define-Model-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Define Model</a></span></li><li><span><a href="#Test-Performance" data-toc-modified-id="Test-Performance-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>Test Performance</a></span></li></ul></div>

##### Load Package

In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import random

In [2]:
from sklearn.model_selection import train_test_split

In [3]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

##### Set Up Seed

In [4]:
def setup_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministric = True

In [5]:
setup_seed(9)

# Load Data

In [6]:
df = pd.read_excel("Financial_G.xlsx")

In [7]:
df.head()

Unnamed: 0,Organization ID,Fiscal Year,Ticker,Special Values Flag,License Flag,Audit Committee Independence,Audit Committee Non-Executive Member,Compensation Committee Independence,Compensation Committee Non-Executive Member,Nomination Committee Independence,...,Board Member Membership Limits,Vision and Strategy Objectives/Integrated Strategy (inactive),CSR Sustainability External Audit Name,Single Biggest Owner Name (Inactive),Advance Notice Period,Poison Pill Adoption Date,Poison Pill Expiration Date,Feed Date,Fiscal Year End Date,Corporate Governance Score
0,18183,2002,AXP,2,1,,,,,,...,,,,"Warren Buffett, Berkshire Hathaway Inc. and su...",,,,2020-08-02,2002-12-31,0.937708
1,18183,2003,AXP,2,1,,,,,,...,,,,"Warren Buffett, Berkshire Hathaway Inc. and su...",,,,2020-08-02,2003-12-31,0.836389
2,18183,2004,AXP,2,1,,,,,,...,,,,Warren Buffett,,,,2020-08-02,2004-12-31,0.914682
3,18183,2005,AXP,2,1,,,,,,...,,,,Warren Buffett,,,,2020-08-02,2005-12-31,0.893765
4,18183,2006,AXP,2,1,,,,,,...,No Limit,,,Warren Buffett,,,,2020-08-02,2006-12-31,0.831624


In [8]:
df.tail()

Unnamed: 0,Organization ID,Fiscal Year,Ticker,Special Values Flag,License Flag,Audit Committee Independence,Audit Committee Non-Executive Member,Compensation Committee Independence,Compensation Committee Non-Executive Member,Nomination Committee Independence,...,Board Member Membership Limits,Vision and Strategy Objectives/Integrated Strategy (inactive),CSR Sustainability External Audit Name,Single Biggest Owner Name (Inactive),Advance Notice Period,Poison Pill Adoption Date,Poison Pill Expiration Date,Feed Date,Fiscal Year End Date,Corporate Governance Score
696,114217052,2016,SYF,2,1,,,,,,...,No Limit,,,,90.0,,,2021-05-30,2016-12-31,0.716048
697,114217052,2017,SYF,2,1,,,,,,...,No Limit,,,,90.0,,,2022-04-10,2017-12-31,0.630861
698,114217052,2018,SYF,2,1,,,,,,...,No Limit,,,,90.0,,,2021-07-11,2018-12-31,0.888813
699,114217052,2019,SYF,2,1,,,,,,...,No Limit,,,,90.0,,,2022-04-10,2019-12-31,0.728224
700,114217052,2020,SYF,2,1,,,,,,...,No Limit,,,,90.0,,,2022-04-24,2020-12-31,0.834329


In [9]:
df.shape

(701, 145)

In [10]:
df.dtypes

Organization ID                         int64
Fiscal Year                             int64
Ticker                                 object
Special Values Flag                     int64
License Flag                            int64
                                    ...      
Poison Pill Adoption Date              object
Poison Pill Expiration Date            object
Feed Date                      datetime64[ns]
Fiscal Year End Date           datetime64[ns]
Corporate Governance Score            float64
Length: 145, dtype: object

# Preprocess Data

## Convert Score Into Grade

In [11]:
def convert_score_t_grade(score):
    if 0 <= score <= 0.083333:
        return "D-"
    
    elif 0.083333 < score <= 0.166666:
        return "D"
    
    elif 0.166666 < score <= 0.250000:
        return "D+"
    
    elif 0.250000 < score <= 0.333333:
        return "C-"
    
    elif 0.333333 < score <= 0.416666:
        return "C"
    
    elif 0.416666 < score <= 0.500000:
        return "C+"
    
    elif 0.500000 < score <= 0.583333:
        return "B-"
    
    elif 0.583333 < score <= 0.666666:
        return "B+"
    
    elif 0.666666 < score <= 0.750000:
        return "B+"
    
    elif 0.750000 < score <= 0.833333:
        return "A-"
    
    elif 0.833333 < score <= 0.916666:
        return "A"
    
    elif 0.916666 < score <= 1:
        return "A+"

In [12]:
df["G_Grade"] = df["Corporate Governance Score"].apply(convert_score_t_grade)

In [13]:
df["G_Grade"].value_counts()

A     197
A-    184
B+    182
A+     66
B-     33
C+     14
C      13
C-      7
D+      4
D       1
Name: G_Grade, dtype: int64

## Map Grade To Number

In [14]:
grade_to_number = {"A+": 0,
                   "A": 1,
                   "A-": 2,
                   "B+": 3,
                   "B": 4,
                   "B-": 5,
                   "C+": 6,
                   "C": 7,
                   "C-": 8,
                   "D+": 9,
                   "D": 10, 
                   "D-": 11}

In [15]:
df["G_num"] = df["G_Grade"].map(grade_to_number)

In [16]:
df

Unnamed: 0,Organization ID,Fiscal Year,Ticker,Special Values Flag,License Flag,Audit Committee Independence,Audit Committee Non-Executive Member,Compensation Committee Independence,Compensation Committee Non-Executive Member,Nomination Committee Independence,...,CSR Sustainability External Audit Name,Single Biggest Owner Name (Inactive),Advance Notice Period,Poison Pill Adoption Date,Poison Pill Expiration Date,Feed Date,Fiscal Year End Date,Corporate Governance Score,G_Grade,G_num
0,18183,2002,AXP,2,1,,,,,,...,,"Warren Buffett, Berkshire Hathaway Inc. and su...",,,,2020-08-02,2002-12-31,0.937708,A+,0
1,18183,2003,AXP,2,1,,,,,,...,,"Warren Buffett, Berkshire Hathaway Inc. and su...",,,,2020-08-02,2003-12-31,0.836389,A,1
2,18183,2004,AXP,2,1,,,,,,...,,Warren Buffett,,,,2020-08-02,2004-12-31,0.914682,A,1
3,18183,2005,AXP,2,1,,,,,,...,,Warren Buffett,,,,2020-08-02,2005-12-31,0.893765,A,1
4,18183,2006,AXP,2,1,,,,,,...,,Warren Buffett,,,,2020-08-02,2006-12-31,0.831624,A-,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
696,114217052,2016,SYF,2,1,,,,,,...,,,90.0,,,2021-05-30,2016-12-31,0.716048,B+,3
697,114217052,2017,SYF,2,1,,,,,,...,,,90.0,,,2022-04-10,2017-12-31,0.630861,B+,3
698,114217052,2018,SYF,2,1,,,,,,...,,,90.0,,,2021-07-11,2018-12-31,0.888813,A,1
699,114217052,2019,SYF,2,1,,,,,,...,,,90.0,,,2022-04-10,2019-12-31,0.728224,B+,3


## Deal With The NaN Value

In [17]:
df = df.fillna(0)

In [18]:
df

Unnamed: 0,Organization ID,Fiscal Year,Ticker,Special Values Flag,License Flag,Audit Committee Independence,Audit Committee Non-Executive Member,Compensation Committee Independence,Compensation Committee Non-Executive Member,Nomination Committee Independence,...,CSR Sustainability External Audit Name,Single Biggest Owner Name (Inactive),Advance Notice Period,Poison Pill Adoption Date,Poison Pill Expiration Date,Feed Date,Fiscal Year End Date,Corporate Governance Score,G_Grade,G_num
0,18183,2002,AXP,2,1,0.0,0.0,0.0,0.0,0.0,...,0,"Warren Buffett, Berkshire Hathaway Inc. and su...",0.0,0,0,2020-08-02,2002-12-31,0.937708,A+,0
1,18183,2003,AXP,2,1,0.0,0.0,0.0,0.0,0.0,...,0,"Warren Buffett, Berkshire Hathaway Inc. and su...",0.0,0,0,2020-08-02,2003-12-31,0.836389,A,1
2,18183,2004,AXP,2,1,0.0,0.0,0.0,0.0,0.0,...,0,Warren Buffett,0.0,0,0,2020-08-02,2004-12-31,0.914682,A,1
3,18183,2005,AXP,2,1,0.0,0.0,0.0,0.0,0.0,...,0,Warren Buffett,0.0,0,0,2020-08-02,2005-12-31,0.893765,A,1
4,18183,2006,AXP,2,1,0.0,0.0,0.0,0.0,0.0,...,0,Warren Buffett,0.0,0,0,2020-08-02,2006-12-31,0.831624,A-,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
696,114217052,2016,SYF,2,1,0.0,0.0,0.0,0.0,0.0,...,0,0,90.0,0,0,2021-05-30,2016-12-31,0.716048,B+,3
697,114217052,2017,SYF,2,1,0.0,0.0,0.0,0.0,0.0,...,0,0,90.0,0,0,2022-04-10,2017-12-31,0.630861,B+,3
698,114217052,2018,SYF,2,1,0.0,0.0,0.0,0.0,0.0,...,0,0,90.0,0,0,2021-07-11,2018-12-31,0.888813,A,1
699,114217052,2019,SYF,2,1,0.0,0.0,0.0,0.0,0.0,...,0,0,90.0,0,0,2022-04-10,2019-12-31,0.728224,B+,3


## Deal With Non Numeric Column

In [19]:
df = df.set_index(["Fiscal Year", "Ticker"])

In [20]:
df = df.select_dtypes(["int64", "float64"])

## Drop Unnecessary Column

In [21]:
df = df.drop(columns = ["Organization ID", "Corporate Governance Score"])

In [22]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Special Values Flag,License Flag,Audit Committee Independence,Audit Committee Non-Executive Member,Compensation Committee Independence,Compensation Committee Non-Executive Member,Nomination Committee Independence,Nomination Committee Non-Executive Member,Number of Board Meetings,Board Meeting Attendance Average,...,Significant Company Transactions (M&A) Shareholders Approval,Fair Price Provision,Limitations on Removal of Directors,Advance Notice for Shareholder Proposals,Written Consent Requirements,Expanded Constituency Provision,Succession Plan for Executives,External Consultants,Advance Notice Period,G_num
Fiscal Year,Ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2002,AXP,2,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0
2003,AXP,2,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,1
2004,AXP,2,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,1
2005,AXP,2,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,1
2006,AXP,2,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016,SYF,2,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,0.0,1,1,90.0,3
2017,SYF,2,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,0.0,1,1,90.0,3
2018,SYF,2,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,0.0,1,1,90.0,1
2019,SYF,2,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,0.0,1,1,90.0,3


## Split Training Set And Test Set

In [23]:
x_train, x_test, y_train, y_test = train_test_split(df.iloc[:, :],
                                                    df.iloc[:, -1],
                                                    test_size = 0.3,
                                                    random_state = 42)

print(x_train.shape)
print(x_test.shape)

(490, 133)
(211, 133)


# Create Dataset Class

## Create Training Set Class

In [24]:
class TrainingSet(Dataset):
    
    def __init__(self):
        xy = x_train.values
        
        self.x = xy[:, :-1].astype(np.float32)
        #self.x = scaler.fit_transform(self.x)
        self.y = xy[:, -1].astype(np.int64)
        
        self.x = torch.from_numpy(self.x)
        self.y = torch.from_numpy(self.y)
        
        self.n_samples = xy.shape[0]
        
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    
    def __len__(self):
        return self.n_samples

In [25]:
training_set = TrainingSet()

In [26]:
first_data_training = training_set[0]

feature, label = first_data_training
print(feature.shape)
print(label)

torch.Size([132])
tensor(3)


## Create Test Set Class

In [27]:
class TestSet(Dataset):
    
    def __init__(self):
        xy = x_test.values
        
        self.x = xy[:, : -1].astype(np.float32)
        #self.x = scaler.fit_transform(self.x)
        self.y = xy[:, -1].astype(np.int64)
        
        self.x = torch.from_numpy(self.x)
        self.y = torch.from_numpy(self.y)
        
        self.n_samples = xy.shape[0]
        
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    
    def __len__(self):
        return self.n_samples

In [28]:
test_set = TestSet()

In [29]:
first_data_test = test_set[0]

feature, label = first_data_test
print(feature.shape)
print(label)

torch.Size([132])
tensor(2)


# Create Dataloader

In [30]:
train_loader = DataLoader(dataset = training_set,
                          batch_size = 40,
                          shuffle = True)

test_loader = DataLoader(dataset = test_set,
                         batch_size = 40,
                         shuffle = True)

In [31]:
dataiter = iter(train_loader)

data = dataiter.next()

In [32]:
features, labels = data

print(features.shape)
print(labels.shape)

torch.Size([40, 132])
torch.Size([40])


# Define Model

In [33]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_size, output_size):
        super(NeuralNetwork, self).__init__()
        
        self.lin_start = nn.Linear(input_size, 800)
        #self.lin1 = nn.Linear(800, 400)
        self.lin_end = nn.Linear(800, output_size)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        out = self.lin_start(x)
        out = self.relu(out)
        
        #out = self.lin1(out)
        #out = self.relu(out)
        
        out = self.lin_end(out)
        
        return out

In [34]:
input_size = 132
output_size = 12

model = NeuralNetwork(input_size, output_size)

In [35]:
criterion = nn.CrossEntropyLoss()

In [36]:
learning_rate = 0.001

optimizer = torch.optim.Adam(model.parameters(),
                             lr = learning_rate)

In [37]:
num_epochs = 300

for epoch in range(num_epochs):
    for i, (features, labels) in enumerate(train_loader):
        
        y_pred = model(features)
        loss = criterion(y_pred, labels)
        
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        if (i+1)%5 == 0:
            print(f'epoch {epoch+1}/{num_epochs}, step{i+1}/{len(train_loader)}, loss = {loss.item():.4f}')

epoch 1/300, step5/13, loss = 2.0599
epoch 1/300, step10/13, loss = 2.5760
epoch 2/300, step5/13, loss = 2.1884
epoch 2/300, step10/13, loss = 2.2442
epoch 3/300, step5/13, loss = 1.6493
epoch 3/300, step10/13, loss = 1.4799
epoch 4/300, step5/13, loss = 1.5430
epoch 4/300, step10/13, loss = 1.7459
epoch 5/300, step5/13, loss = 1.5404
epoch 5/300, step10/13, loss = 1.8324
epoch 6/300, step5/13, loss = 1.4514
epoch 6/300, step10/13, loss = 1.6147
epoch 7/300, step5/13, loss = 1.5597
epoch 7/300, step10/13, loss = 1.3790
epoch 8/300, step5/13, loss = 1.5151
epoch 8/300, step10/13, loss = 1.3280
epoch 9/300, step5/13, loss = 1.5169
epoch 9/300, step10/13, loss = 1.4905
epoch 10/300, step5/13, loss = 1.7124
epoch 10/300, step10/13, loss = 1.2642
epoch 11/300, step5/13, loss = 1.5737
epoch 11/300, step10/13, loss = 1.4937
epoch 12/300, step5/13, loss = 1.4202
epoch 12/300, step10/13, loss = 1.2874
epoch 13/300, step5/13, loss = 1.4860
epoch 13/300, step10/13, loss = 1.2877
epoch 14/300, ste

epoch 108/300, step10/13, loss = 0.4310
epoch 109/300, step5/13, loss = 0.5251
epoch 109/300, step10/13, loss = 0.4349
epoch 110/300, step5/13, loss = 0.5164
epoch 110/300, step10/13, loss = 0.4679
epoch 111/300, step5/13, loss = 0.6394
epoch 111/300, step10/13, loss = 0.3895
epoch 112/300, step5/13, loss = 0.8763
epoch 112/300, step10/13, loss = 0.4624
epoch 113/300, step5/13, loss = 0.6959
epoch 113/300, step10/13, loss = 0.4979
epoch 114/300, step5/13, loss = 0.7498
epoch 114/300, step10/13, loss = 0.9220
epoch 115/300, step5/13, loss = 0.4528
epoch 115/300, step10/13, loss = 0.5550
epoch 116/300, step5/13, loss = 0.5241
epoch 116/300, step10/13, loss = 0.6002
epoch 117/300, step5/13, loss = 0.4950
epoch 117/300, step10/13, loss = 0.5822
epoch 118/300, step5/13, loss = 0.6530
epoch 118/300, step10/13, loss = 0.4023
epoch 119/300, step5/13, loss = 0.5763
epoch 119/300, step10/13, loss = 0.3719
epoch 120/300, step5/13, loss = 0.4803
epoch 120/300, step10/13, loss = 0.4401
epoch 121/30

epoch 212/300, step10/13, loss = 0.3221
epoch 213/300, step5/13, loss = 0.2874
epoch 213/300, step10/13, loss = 0.3550
epoch 214/300, step5/13, loss = 0.1948
epoch 214/300, step10/13, loss = 0.1685
epoch 215/300, step5/13, loss = 0.2902
epoch 215/300, step10/13, loss = 0.3509
epoch 216/300, step5/13, loss = 0.2587
epoch 216/300, step10/13, loss = 0.2251
epoch 217/300, step5/13, loss = 0.3130
epoch 217/300, step10/13, loss = 0.2521
epoch 218/300, step5/13, loss = 0.2348
epoch 218/300, step10/13, loss = 0.2211
epoch 219/300, step5/13, loss = 0.2176
epoch 219/300, step10/13, loss = 0.2049
epoch 220/300, step5/13, loss = 0.2174
epoch 220/300, step10/13, loss = 0.2558
epoch 221/300, step5/13, loss = 0.3374
epoch 221/300, step10/13, loss = 0.4660
epoch 222/300, step5/13, loss = 0.3266
epoch 222/300, step10/13, loss = 0.3652
epoch 223/300, step5/13, loss = 0.4448
epoch 223/300, step10/13, loss = 0.1545
epoch 224/300, step5/13, loss = 0.4094
epoch 224/300, step10/13, loss = 0.2473
epoch 225/30

# Test Performance

In [38]:
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    
    for features_t, labels_t in test_loader:
        
        y_pred_t = model(features_t)
        
        _, predictions = torch.max(y_pred_t, 1)
        n_samples += labels_t.shape[0]
        n_correct += (predictions == labels_t).sum().item()
    
    acc = 100*n_correct/n_samples
    
    print(f'accuracy = {acc}')

accuracy = 41.23222748815166
