In [1]:
import torch
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader,TensorDataset
from torchvision import transforms
import numpy as np
import data as da

In [2]:
# 1. data processing
# 1.1 load the data

data_pro=da.Data()
df=data_pro.read_data()

# 1.2 scale and sparate the data
from sklearn.preprocessing import StandardScaler

xscaler = StandardScaler()
yscaler = StandardScaler()
X_scaler = xscaler.fit(df.iloc[:,:-1].values)
Y_scaler = yscaler.fit(df.iloc[:,-1].values.reshape(-1,1))

from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test=train_test_split(df.iloc[:,:-1].values,df.iloc[:,-1].values.reshape(-1,1),test_size=.2,random_state=0)

# 1.3 transform x and y into pytorch format
from torchvision import transforms

X_train_scaler = xscaler.transform(X_train)
X_test_scaler = xscaler.transform(X_test)
y_train_scaler = yscaler.transform(y_train)
y_test_scaler = yscaler.transform(y_test)


In [3]:
# define and train a Multi-Layer Perceptron 
class MLPRegressor(nn.Module):
    def __init__(self, input_size, output_size):
        super(MLPRegressor, self).__init__()
        self.hidden1 = nn.Linear(input_size, 25)
        self.hidden2 = nn.Linear(25, 15)
        self.hidden3 = nn.Linear(15, output_size)
    # defines the forward pass
    def forward(self, input):
        x = F.relu(self.hidden1(input))
        x = F.relu(self.hidden2(x))
        return self.hidden3(x)
# instantiate this model 
model = MLPRegressor(X_train_scaler.shape[1], y_train_scaler.shape[1])

In [4]:
# generate an iterator for the training & testing datasets
train_dataloader = DataLoader(TensorDataset(torch.tensor(X_train_scaler).float(), torch.tensor(y_train_scaler).float()), batch_size = 2000, shuffle = False)
test_dataloader = DataLoader(TensorDataset(torch.tensor(X_test_scaler).float(), torch.tensor(y_test_scaler).float()), batch_size = 1000, shuffle = False)

In [5]:
# optimize the parameters of the model
optimizer = optim.SGD(model.parameters(), lr=0.1)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.5)
loss_function = nn.MSELoss()

def train(epoch, train_loader):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        # loss function
        loss=loss_function(output, target)
        # backpropagation
        loss.backward()
        optimizer.step()
        scheduler.step()
        print('train epoch: {} [{}/{} ({:.0f}%)\tLoss: {:.6f}]'.format(
            epoch, batch_idx * len(data), len(train_loader.dataset),
            100.*batch_idx / len(train_loader), loss.item()))
        
def test(test_loader):
    with torch.no_grad():
        model.eval()
        test_loss = 0
        for data, target in test_loader:
            output = model(data)
            test_loss += F.mse_loss(target, output)
        print('test loss: {:.4f}'.format(test_loss))

In [6]:
# loop over the batches to train on the dataset for 100 times
for epoch in range(100):
    train(epoch, train_dataloader)
    test(test_dataloader)

train epoch: 0 [0/3918 (0%)	Loss: 0.972858]
train epoch: 0 [1918/3918 (50%)	Loss: 0.968778]
test loss: 1.0840
train epoch: 1 [0/3918 (0%)	Loss: 0.916492]
train epoch: 1 [1918/3918 (50%)	Loss: 0.936886]
test loss: 1.0640
train epoch: 2 [0/3918 (0%)	Loss: 0.885615]
train epoch: 2 [1918/3918 (50%)	Loss: 0.914433]
test loss: 1.0457
train epoch: 3 [0/3918 (0%)	Loss: 0.861892]
train epoch: 3 [1918/3918 (50%)	Loss: 0.893460]
test loss: 1.0263
train epoch: 4 [0/3918 (0%)	Loss: 0.840063]
train epoch: 4 [1918/3918 (50%)	Loss: 0.872468]
test loss: 1.0062
train epoch: 5 [0/3918 (0%)	Loss: 0.818908]
train epoch: 5 [1918/3918 (50%)	Loss: 0.851601]
test loss: 0.9862
train epoch: 6 [0/3918 (0%)	Loss: 0.798434]
train epoch: 6 [1918/3918 (50%)	Loss: 0.831464]
test loss: 0.9670
train epoch: 7 [0/3918 (0%)	Loss: 0.779247]
train epoch: 7 [1918/3918 (50%)	Loss: 0.812708]
test loss: 0.9493
train epoch: 8 [0/3918 (0%)	Loss: 0.761762]
train epoch: 8 [1918/3918 (50%)	Loss: 0.795654]
test loss: 0.9332
train epoc

train epoch: 77 [0/3918 (0%)	Loss: 0.609996]
train epoch: 77 [1918/3918 (50%)	Loss: 0.656860]
test loss: 0.7894
train epoch: 78 [0/3918 (0%)	Loss: 0.609617]
train epoch: 78 [1918/3918 (50%)	Loss: 0.656511]
test loss: 0.7889
train epoch: 79 [0/3918 (0%)	Loss: 0.609250]
train epoch: 79 [1918/3918 (50%)	Loss: 0.656165]
test loss: 0.7885
train epoch: 80 [0/3918 (0%)	Loss: 0.608880]
train epoch: 80 [1918/3918 (50%)	Loss: 0.655824]
test loss: 0.7880
train epoch: 81 [0/3918 (0%)	Loss: 0.608512]
train epoch: 81 [1918/3918 (50%)	Loss: 0.655475]
test loss: 0.7876
train epoch: 82 [0/3918 (0%)	Loss: 0.608148]
train epoch: 82 [1918/3918 (50%)	Loss: 0.655133]
test loss: 0.7872
train epoch: 83 [0/3918 (0%)	Loss: 0.607800]
train epoch: 83 [1918/3918 (50%)	Loss: 0.654805]
test loss: 0.7868
train epoch: 84 [0/3918 (0%)	Loss: 0.607461]
train epoch: 84 [1918/3918 (50%)	Loss: 0.654470]
test loss: 0.7864
train epoch: 85 [0/3918 (0%)	Loss: 0.607132]
train epoch: 85 [1918/3918 (50%)	Loss: 0.654128]
test loss:

In [7]:
for xtest, ytest in test_dataloader:
    pass
y_test_pred = model(xtest).detach().numpy()
ytest = ytest.detach().numpy()

In [8]:
y_test = yscaler.inverse_transform(ytest)
y_test_pred = yscaler.inverse_transform(y_test_pred)
ytest_summary = np.concatenate([y_test, y_test_pred], axis=1)

In [9]:
# showing the dataframe
pd.DataFrame(ytest_summary, columns=['test', 'prediction'])

Unnamed: 0,test,prediction
0,5.0,5.487374
1,6.0,5.510200
2,7.0,5.662288
3,8.0,6.714055
4,5.0,5.754687
...,...,...
975,5.0,5.686732
976,6.0,5.862689
977,6.0,5.494952
978,6.0,5.321161


In [10]:
#Fit the model and make prediction
ytrain_pred_list = []
ytrain_list = []
for xtrain, ytrain in train_dataloader:
    ytrain_pred_list.append(model(xtrain).detach().numpy())
    ytrain_list.append(ytrain.detach().numpy())


In [11]:
ytrain_pred_list = np.concatenate(ytrain_pred_list)
ytrain_list = np.concatenate(ytrain_list)

ytrain_pred_list = yscaler.inverse_transform(ytrain_pred_list)
ytrain_list = yscaler.inverse_transform(ytrain_list)

In [12]:
import fit_result
fit_result.score('NN', ytrain_list, ytrain_pred_list, y_test, y_test_pred)

Train score of NN trian set 0.35389745235443115
Test score of NN test set 0.30862826108932495 

Train mean error of NN trian set 0.54421335
Test mean error of NN test set 0.605678 

Train R2 of NN trian set 0.353871574559231
Test R2 error of NN test set 0.3055026465948639
