In [19]:
import numpy as np
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, Dataset

In [20]:
class DatasetPT(Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y

    def __len__(self):
        return self.Y.shape[0]

    def __getitem__(self, idx):
        X_out = torch.from_numpy(self.X[idx, :]).float()
        Y_out = torch.from_numpy(self.Y[idx, :]).float()
        return X_out, Y_out
    
class DataHandlerPT(Dataset):
    def __init__(self, _X, _Y, scalerX, scalerY):
        self._X = _X
        self._Y = _Y
        self.scalerX = scalerX
        self.scalerY = scalerY
        self.X_train = None
        self.X_test = None
        self.Y_train = None
        self.Y_test = None

    def split_and_scale(self, test_size, random_state):
        _X_train, _X_test, _Y_train, _Y_test = train_test_split(
            self._X, self._Y, test_size=test_size, random_state=random_state
        )
        self.X_train = self.scalerX.fit_transform(_X_train)
        self.X_test = self.scalerX.transform(_X_test)

        self.Y_train = self.scalerY.fit_transform(_Y_train)
        self.Y_test = self.scalerY.transform(_Y_test)

    # This part is different from SKLearn version
    def get_train(self):
        return DatasetPT(X=self.X_train, Y=self.Y_train)

    def get_test(self):
        return DatasetPT(X=self.X_test, Y=self.Y_test)

In [21]:
df = pd.read_excel("data.xlsx", index_col="exp")
_X = df.iloc[:, :-3].values
_Y = df.iloc[:, -3:].values
print(_X.shape)
print(_Y.shape)
data_handler = DataHandlerPT(
    _X=_X, _Y=_Y, scalerX=StandardScaler(), scalerY=StandardScaler()
)
data_handler.split_and_scale(test_size=0.3, random_state=0)
train_dataset = data_handler.get_train()
test_dataset = data_handler.get_test()
loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

(100, 47)
(100, 3)


In [22]:
import torch.nn as nn
import torch.optim as optim

# Define the model
model = nn.Sequential(
    nn.Linear(data_handler._X.shape[1], 24),
    nn.ReLU(),
    nn.Linear(24, 12),
    nn.ReLU(),
    nn.Linear(12, 6),
    nn.ReLU(),
    nn.Linear(6, data_handler._Y.shape[1]),
)

# loss function and optimizer
loss_fn = nn.MSELoss()  # mean square error
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [23]:
from torch.utils.tensorboard import SummaryWriter
import numpy as np

# Creates a 'runs' directory by default for logs
writer = SummaryWriter()

In [24]:
n_epochs = 100000  # number of epochs to run
batch_size = 10  # size of each batch


X_test, Y_test = test_dataset[:]


for epoch in range(n_epochs):
    model.train()
    for X_batch, Y_batch in loader:
        Y_pred = model(X_batch)
        loss = loss_fn(Y_pred, Y_batch)
        # backward pass
        optimizer.zero_grad()
        loss.backward()
        # update weights
        optimizer.step()
    model.eval()
    Y_pred = model(X_test)
    mse = loss_fn(Y_pred, Y_test)
    mse = float(mse)
    writer.add_scalar('Loss/train', mse, epoch)

writer.flush()
writer.close()

KeyboardInterrupt: 