<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Load-Data" data-toc-modified-id="Load-Data-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Load Data</a></span></li><li><span><a href="#Preprocess-Data" data-toc-modified-id="Preprocess-Data-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Preprocess Data</a></span><ul class="toc-item"><li><span><a href="#Convert-Score-Into-Grade" data-toc-modified-id="Convert-Score-Into-Grade-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Convert Score Into Grade</a></span></li><li><span><a href="#Map-Grade-To-Number" data-toc-modified-id="Map-Grade-To-Number-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>Map Grade To Number</a></span></li><li><span><a href="#Deal-With-The-NaN-Value" data-toc-modified-id="Deal-With-The-NaN-Value-2.3"><span class="toc-item-num">2.3&nbsp;&nbsp;</span>Deal With The NaN Value</a></span></li><li><span><a href="#Deal-With-Non-Numeric-Column" data-toc-modified-id="Deal-With-Non-Numeric-Column-2.4"><span class="toc-item-num">2.4&nbsp;&nbsp;</span>Deal With Non Numeric Column</a></span></li><li><span><a href="#Drop-Unnecessary-Column" data-toc-modified-id="Drop-Unnecessary-Column-2.5"><span class="toc-item-num">2.5&nbsp;&nbsp;</span>Drop Unnecessary Column</a></span></li><li><span><a href="#Drop-Zero-Row" data-toc-modified-id="Drop-Zero-Row-2.6"><span class="toc-item-num">2.6&nbsp;&nbsp;</span>Drop Zero Row</a></span></li><li><span><a href="#Split-Training-Set-And-Test-Set" data-toc-modified-id="Split-Training-Set-And-Test-Set-2.7"><span class="toc-item-num">2.7&nbsp;&nbsp;</span>Split Training Set And Test Set</a></span></li></ul></li><li><span><a href="#Create-Dataset-Class" data-toc-modified-id="Create-Dataset-Class-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Create Dataset Class</a></span><ul class="toc-item"><li><span><a href="#Create-Training-Set-Class" data-toc-modified-id="Create-Training-Set-Class-3.1"><span class="toc-item-num">3.1&nbsp;&nbsp;</span>Create Training Set Class</a></span></li><li><span><a href="#Create-Test-Set-Class" data-toc-modified-id="Create-Test-Set-Class-3.2"><span class="toc-item-num">3.2&nbsp;&nbsp;</span>Create Test Set Class</a></span></li></ul></li><li><span><a href="#Create-Dataloader" data-toc-modified-id="Create-Dataloader-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Create Dataloader</a></span></li><li><span><a href="#Define-Model" data-toc-modified-id="Define-Model-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Define Model</a></span></li><li><span><a href="#Test-Performance" data-toc-modified-id="Test-Performance-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>Test Performance</a></span></li></ul></div>

##### Load Package

In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import random

In [2]:
from sklearn.model_selection import train_test_split

In [3]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

##### Set Up Seed

In [4]:
def setup_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministric = True

In [5]:
setup_seed(9)

# Load Data

In [6]:
df = pd.read_excel("Financial_E_Category.xlsx")

In [7]:
df.head()

Unnamed: 0,Organization ID,Fiscal Year,Ticker,Product Innovation (Inactive),Resource Use Score,Emissions Score,Environmental Innovation Score,Environmental Score
0,18183,2002,AXP,0.266936,0.0,0.0,0.0,0.180989
1,18183,2003,AXP,0.248025,0.0,0.0,0.0,0.161219
2,18183,2004,AXP,0.250626,0.0,0.0,0.0,0.180355
3,18183,2005,AXP,0.232798,0.0,0.0,0.0,0.149305
4,18183,2006,AXP,0.230322,0.217105,0.292208,0.0,0.61329


In [8]:
df.tail()

Unnamed: 0,Organization ID,Fiscal Year,Ticker,Product Innovation (Inactive),Resource Use Score,Emissions Score,Environmental Innovation Score,Environmental Score
697,114217052,2016,SYF,0.239906,0.0,0.0,0.0,0.14655
698,114217052,2017,SYF,0.250259,0.0,0.0,0.0,0.156625
699,114217052,2018,SYF,0.209234,0.152235,0.199187,0.0,0.195295
700,114217052,2019,SYF,0.199357,0.146991,0.201525,0.0,0.139922
701,114217052,2020,SYF,0.199043,0.586653,0.778085,0.0,0.623711


In [9]:
df.shape

(702, 8)

In [10]:
df.dtypes

Organization ID                     int64
Fiscal Year                         int64
Ticker                             object
Product Innovation (Inactive)     float64
Resource Use Score                float64
Emissions Score                   float64
Environmental Innovation Score    float64
Environmental Score               float64
dtype: object

# Preprocess Data

## Convert Score Into Grade

In [11]:
def convert_score_t_grade(score):
    if 0 <= score <= 0.083333:
        return "D-"
    
    elif 0.083333 < score <= 0.166666:
        return "D"
    
    elif 0.166666 < score <= 0.250000:
        return "D+"
    
    elif 0.250000 < score <= 0.333333:
        return "C-"
    
    elif 0.333333 < score <= 0.416666:
        return "C"
    
    elif 0.416666 < score <= 0.500000:
        return "C+"
    
    elif 0.500000 < score <= 0.583333:
        return "B-"
    
    elif 0.583333 < score <= 0.666666:
        return "B+"
    
    elif 0.666666 < score <= 0.750000:
        return "B+"
    
    elif 0.750000 < score <= 0.833333:
        return "A-"
    
    elif 0.833333 < score <= 0.916666:
        return "A"
    
    elif 0.916666 < score <= 1:
        return "A+"

In [12]:
df["E_Grade"] = df["Environmental Score"].apply(convert_score_t_grade)

In [13]:
df["E_Grade"].value_counts()

D     188
A+    133
A      89
D+     88
B+     75
A-     52
C      24
B-     23
C-     16
C+     12
Name: E_Grade, dtype: int64

## Map Grade To Number

In [14]:
grade_to_number = {"A+": 0,
                   "A": 1,
                   "A-": 2,
                   "B+": 3,
                   "B": 4,
                   "B-": 5,
                   "C+": 6,
                   "C": 7,
                   "C-": 8,
                   "D+": 9,
                   "D": 10, 
                   "D-": 11}

In [15]:
df["E_num"] = df["E_Grade"].map(grade_to_number)

In [16]:
df

Unnamed: 0,Organization ID,Fiscal Year,Ticker,Product Innovation (Inactive),Resource Use Score,Emissions Score,Environmental Innovation Score,Environmental Score,E_Grade,E_num
0,18183,2002,AXP,0.266936,0.000000,0.000000,0.0,0.180989,D+,9.0
1,18183,2003,AXP,0.248025,0.000000,0.000000,0.0,0.161219,D,10.0
2,18183,2004,AXP,0.250626,0.000000,0.000000,0.0,0.180355,D+,9.0
3,18183,2005,AXP,0.232798,0.000000,0.000000,0.0,0.149305,D,10.0
4,18183,2006,AXP,0.230322,0.217105,0.292208,0.0,0.613290,B+,3.0
...,...,...,...,...,...,...,...,...,...,...
697,114217052,2016,SYF,0.239906,0.000000,0.000000,0.0,0.146550,D,10.0
698,114217052,2017,SYF,0.250259,0.000000,0.000000,0.0,0.156625,D,10.0
699,114217052,2018,SYF,0.209234,0.152235,0.199187,0.0,0.195295,D+,9.0
700,114217052,2019,SYF,0.199357,0.146991,0.201525,0.0,0.139922,D,10.0


## Deal With The NaN Value

In [17]:
df = df.fillna(0)

In [18]:
df

Unnamed: 0,Organization ID,Fiscal Year,Ticker,Product Innovation (Inactive),Resource Use Score,Emissions Score,Environmental Innovation Score,Environmental Score,E_Grade,E_num
0,18183,2002,AXP,0.266936,0.000000,0.000000,0.0,0.180989,D+,9.0
1,18183,2003,AXP,0.248025,0.000000,0.000000,0.0,0.161219,D,10.0
2,18183,2004,AXP,0.250626,0.000000,0.000000,0.0,0.180355,D+,9.0
3,18183,2005,AXP,0.232798,0.000000,0.000000,0.0,0.149305,D,10.0
4,18183,2006,AXP,0.230322,0.217105,0.292208,0.0,0.613290,B+,3.0
...,...,...,...,...,...,...,...,...,...,...
697,114217052,2016,SYF,0.239906,0.000000,0.000000,0.0,0.146550,D,10.0
698,114217052,2017,SYF,0.250259,0.000000,0.000000,0.0,0.156625,D,10.0
699,114217052,2018,SYF,0.209234,0.152235,0.199187,0.0,0.195295,D+,9.0
700,114217052,2019,SYF,0.199357,0.146991,0.201525,0.0,0.139922,D,10.0


## Deal With Non Numeric Column

In [19]:
df = df.set_index(["Fiscal Year", "Ticker"])

In [20]:
df = df.select_dtypes(["int64", "float64"])

## Drop Unnecessary Column

In [21]:
df = df.drop(columns = ["Organization ID", "Environmental Score"])

In [22]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Product Innovation (Inactive),Resource Use Score,Emissions Score,Environmental Innovation Score,E_num
Fiscal Year,Ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2002,AXP,0.266936,0.000000,0.000000,0.0,9.0
2003,AXP,0.248025,0.000000,0.000000,0.0,10.0
2004,AXP,0.250626,0.000000,0.000000,0.0,9.0
2005,AXP,0.232798,0.000000,0.000000,0.0,10.0
2006,AXP,0.230322,0.217105,0.292208,0.0,3.0
...,...,...,...,...,...,...
2016,SYF,0.239906,0.000000,0.000000,0.0,10.0
2017,SYF,0.250259,0.000000,0.000000,0.0,10.0
2018,SYF,0.209234,0.152235,0.199187,0.0,9.0
2019,SYF,0.199357,0.146991,0.201525,0.0,10.0


## Drop Zero Row

In [23]:
# condition = (df["Resource Use Score"] == 0) & \
#             (df["Emissions Score"] == 0) & \
#             (df["Environmental Innovation Score"] == 0 )

In [24]:
# df = df[~condition]

## Split Training Set And Test Set

In [25]:
x_train, x_test, y_train, y_test = train_test_split(df.iloc[:, :],
                                                    df.iloc[:, -1],
                                                    test_size = 0.3,
                                                    random_state = 42)

print(x_train.shape)
print(x_test.shape)

(491, 5)
(211, 5)


# Create Dataset Class

## Create Training Set Class

In [26]:
class TrainingSet(Dataset):
    
    def __init__(self):
        xy = x_train.values
        
        self.x = xy[:, :-1].astype(np.float32)
        #self.x = scaler.fit_transform(self.x)
        self.y = xy[:, -1].astype(np.int64)
        
        self.x = torch.from_numpy(self.x)
        self.y = torch.from_numpy(self.y)
        
        self.n_samples = xy.shape[0]
        
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    
    def __len__(self):
        return self.n_samples

In [27]:
training_set = TrainingSet()

In [28]:
first_data_training = training_set[0]

feature, label = first_data_training
print(feature.shape)
print(label)

torch.Size([4])
tensor(1)


## Create Test Set Class

In [29]:
class TestSet(Dataset):
    
    def __init__(self):
        xy = x_test.values
        
        self.x = xy[:, : -1].astype(np.float32)
        #self.x = scaler.fit_transform(self.x)
        self.y = xy[:, -1].astype(np.int64)
        
        self.x = torch.from_numpy(self.x)
        self.y = torch.from_numpy(self.y)
        
        self.n_samples = xy.shape[0]
        
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    
    def __len__(self):
        return self.n_samples

In [30]:
test_set = TestSet()

In [31]:
first_data_test = test_set[0]

feature, label = first_data_test
print(feature.shape)
print(label)

torch.Size([4])
tensor(9)


# Create Dataloader

In [32]:
train_loader = DataLoader(dataset = training_set,
                          batch_size = 25,
                          shuffle = True)

test_loader = DataLoader(dataset = test_set,
                         batch_size = 25,
                         shuffle = True)

In [33]:
dataiter = iter(train_loader)

data = dataiter.next()

In [34]:
features, labels = data

print(features.shape)
print(labels.shape)

torch.Size([25, 4])
torch.Size([25])


# Define Model

In [35]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_size, output_size):
        super(NeuralNetwork, self).__init__()
        
        self.lin_start = nn.Linear(input_size, 800)
        #self.lin1 = nn.Linear(800, 400)
        self.lin_end = nn.Linear(800, output_size)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        out = self.lin_start(x)
        out = self.relu(out)
        
        #out = self.lin1(out)
        #out = self.relu(out)
        
        out = self.lin_end(out)
        
        return out

In [36]:
input_size = df.shape[1] - 1
output_size = 12

model = NeuralNetwork(input_size, output_size)

In [37]:
criterion = nn.CrossEntropyLoss()

In [38]:
learning_rate = 0.001

optimizer = torch.optim.Adam(model.parameters(),
                             lr = learning_rate)

In [39]:
num_epochs = 300

for epoch in range(num_epochs):
    for i, (features, labels) in enumerate(train_loader):
        
        y_pred = model(features)
        loss = criterion(y_pred, labels)
        
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        if (i+1)%5 == 0:
            print(f'epoch {epoch+1}/{num_epochs}, step{i+1}/{len(train_loader)}, loss = {loss.item():.4f}')

epoch 1/300, step5/20, loss = 2.1381
epoch 1/300, step10/20, loss = 1.9030
epoch 1/300, step15/20, loss = 1.7143
epoch 1/300, step20/20, loss = 1.5983
epoch 2/300, step5/20, loss = 1.4788
epoch 2/300, step10/20, loss = 1.6010
epoch 2/300, step15/20, loss = 1.1101
epoch 2/300, step20/20, loss = 1.9270
epoch 3/300, step5/20, loss = 1.2633
epoch 3/300, step10/20, loss = 1.4508
epoch 3/300, step15/20, loss = 1.2480
epoch 3/300, step20/20, loss = 1.4733
epoch 4/300, step5/20, loss = 1.0354
epoch 4/300, step10/20, loss = 1.0109
epoch 4/300, step15/20, loss = 0.9843
epoch 4/300, step20/20, loss = 1.4798
epoch 5/300, step5/20, loss = 1.1518
epoch 5/300, step10/20, loss = 1.0798
epoch 5/300, step15/20, loss = 1.3501
epoch 5/300, step20/20, loss = 0.8768
epoch 6/300, step5/20, loss = 1.4138
epoch 6/300, step10/20, loss = 1.0110
epoch 6/300, step15/20, loss = 1.1563
epoch 6/300, step20/20, loss = 1.0019
epoch 7/300, step5/20, loss = 0.8662
epoch 7/300, step10/20, loss = 0.9941
epoch 7/300, step15

epoch 55/300, step15/20, loss = 0.6910
epoch 55/300, step20/20, loss = 0.7392
epoch 56/300, step5/20, loss = 0.8432
epoch 56/300, step10/20, loss = 0.8532
epoch 56/300, step15/20, loss = 0.6302
epoch 56/300, step20/20, loss = 0.4598
epoch 57/300, step5/20, loss = 0.9113
epoch 57/300, step10/20, loss = 0.8374
epoch 57/300, step15/20, loss = 0.6999
epoch 57/300, step20/20, loss = 0.4149
epoch 58/300, step5/20, loss = 0.8456
epoch 58/300, step10/20, loss = 0.6314
epoch 58/300, step15/20, loss = 0.6510
epoch 58/300, step20/20, loss = 0.7563
epoch 59/300, step5/20, loss = 0.6094
epoch 59/300, step10/20, loss = 0.5474
epoch 59/300, step15/20, loss = 0.7597
epoch 59/300, step20/20, loss = 0.4806
epoch 60/300, step5/20, loss = 0.8424
epoch 60/300, step10/20, loss = 0.6358
epoch 60/300, step15/20, loss = 0.6540
epoch 60/300, step20/20, loss = 0.6621
epoch 61/300, step5/20, loss = 0.8561
epoch 61/300, step10/20, loss = 0.6098
epoch 61/300, step15/20, loss = 0.7378
epoch 61/300, step20/20, loss =

epoch 108/300, step20/20, loss = 0.5898
epoch 109/300, step5/20, loss = 0.4798
epoch 109/300, step10/20, loss = 0.7227
epoch 109/300, step15/20, loss = 0.7052
epoch 109/300, step20/20, loss = 0.5913
epoch 110/300, step5/20, loss = 0.5568
epoch 110/300, step10/20, loss = 0.4033
epoch 110/300, step15/20, loss = 0.6191
epoch 110/300, step20/20, loss = 0.5018
epoch 111/300, step5/20, loss = 0.4233
epoch 111/300, step10/20, loss = 0.6579
epoch 111/300, step15/20, loss = 1.0502
epoch 111/300, step20/20, loss = 0.8762
epoch 112/300, step5/20, loss = 0.5714
epoch 112/300, step10/20, loss = 0.4924
epoch 112/300, step15/20, loss = 0.9374
epoch 112/300, step20/20, loss = 0.6877
epoch 113/300, step5/20, loss = 0.7079
epoch 113/300, step10/20, loss = 0.4344
epoch 113/300, step15/20, loss = 0.5857
epoch 113/300, step20/20, loss = 0.5479
epoch 114/300, step5/20, loss = 0.6216
epoch 114/300, step10/20, loss = 1.0052
epoch 114/300, step15/20, loss = 0.6244
epoch 114/300, step20/20, loss = 0.5988
epoch 

epoch 160/300, step15/20, loss = 0.5002
epoch 160/300, step20/20, loss = 0.6341
epoch 161/300, step5/20, loss = 0.7125
epoch 161/300, step10/20, loss = 0.6152
epoch 161/300, step15/20, loss = 0.4282
epoch 161/300, step20/20, loss = 0.3753
epoch 162/300, step5/20, loss = 0.6287
epoch 162/300, step10/20, loss = 0.4386
epoch 162/300, step15/20, loss = 0.7611
epoch 162/300, step20/20, loss = 0.7406
epoch 163/300, step5/20, loss = 0.6293
epoch 163/300, step10/20, loss = 0.6340
epoch 163/300, step15/20, loss = 0.5810
epoch 163/300, step20/20, loss = 0.4739
epoch 164/300, step5/20, loss = 0.7571
epoch 164/300, step10/20, loss = 0.6024
epoch 164/300, step15/20, loss = 0.6871
epoch 164/300, step20/20, loss = 0.5644
epoch 165/300, step5/20, loss = 0.7960
epoch 165/300, step10/20, loss = 0.7583
epoch 165/300, step15/20, loss = 0.4289
epoch 165/300, step20/20, loss = 0.9698
epoch 166/300, step5/20, loss = 0.7118
epoch 166/300, step10/20, loss = 0.8255
epoch 166/300, step15/20, loss = 0.6008
epoch 

epoch 217/300, step5/20, loss = 0.5077
epoch 217/300, step10/20, loss = 0.4836
epoch 217/300, step15/20, loss = 0.5187
epoch 217/300, step20/20, loss = 0.5807
epoch 218/300, step5/20, loss = 0.5062
epoch 218/300, step10/20, loss = 0.6721
epoch 218/300, step15/20, loss = 0.6791
epoch 218/300, step20/20, loss = 0.7310
epoch 219/300, step5/20, loss = 0.5043
epoch 219/300, step10/20, loss = 0.3192
epoch 219/300, step15/20, loss = 0.4994
epoch 219/300, step20/20, loss = 0.5213
epoch 220/300, step5/20, loss = 0.6359
epoch 220/300, step10/20, loss = 0.4146
epoch 220/300, step15/20, loss = 0.4796
epoch 220/300, step20/20, loss = 0.5682
epoch 221/300, step5/20, loss = 0.7135
epoch 221/300, step10/20, loss = 0.5328
epoch 221/300, step15/20, loss = 0.5920
epoch 221/300, step20/20, loss = 0.3797
epoch 222/300, step5/20, loss = 0.4126
epoch 222/300, step10/20, loss = 0.6664
epoch 222/300, step15/20, loss = 0.3465
epoch 222/300, step20/20, loss = 0.3226
epoch 223/300, step5/20, loss = 0.4178
epoch 2

epoch 268/300, step20/20, loss = 0.3101
epoch 269/300, step5/20, loss = 0.8061
epoch 269/300, step10/20, loss = 0.3991
epoch 269/300, step15/20, loss = 0.4973
epoch 269/300, step20/20, loss = 0.4149
epoch 270/300, step5/20, loss = 0.6146
epoch 270/300, step10/20, loss = 0.5314
epoch 270/300, step15/20, loss = 0.5955
epoch 270/300, step20/20, loss = 0.5582
epoch 271/300, step5/20, loss = 0.3710
epoch 271/300, step10/20, loss = 0.7649
epoch 271/300, step15/20, loss = 0.5172
epoch 271/300, step20/20, loss = 0.4909
epoch 272/300, step5/20, loss = 0.5893
epoch 272/300, step10/20, loss = 0.5621
epoch 272/300, step15/20, loss = 0.5206
epoch 272/300, step20/20, loss = 0.9362
epoch 273/300, step5/20, loss = 0.5031
epoch 273/300, step10/20, loss = 0.7991
epoch 273/300, step15/20, loss = 0.8105
epoch 273/300, step20/20, loss = 0.7271
epoch 274/300, step5/20, loss = 0.6131
epoch 274/300, step10/20, loss = 0.8504
epoch 274/300, step15/20, loss = 0.5925
epoch 274/300, step20/20, loss = 0.3052
epoch 

# Test Performance

In [40]:
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    
    for features_t, labels_t in test_loader:
        
        y_pred_t = model(features_t)
        
        _, predictions = torch.max(y_pred_t, 1)
        n_samples += labels_t.shape[0]
        n_correct += (predictions == labels_t).sum().item()
    
    acc = 100*n_correct/n_samples
    
    print(f'accuracy = {acc}')

accuracy = 61.611374407582936
