# Advanced Extra Exercise Project - PyTorch for linear Regression

https://www.kaggle.com/code/simpleparadox/boston-housing-dataset-pytorch

In [None]:
import numpy as np
import pandas as pd
import torch
import os
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from sklearn.preprocessing import StandardScaler

In [None]:
folder_path = "/content/drive/MyDrive/deeplearning2024_VincenzinaSoos/ex_withoutKeras"
os.chdir(folder_path)

In [None]:
dataset = pd.read_csv("data/boston-housing-dataset.csv")

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
dataset = StandardScaler().fit_transform(dataset)

In [None]:
# Create model and define RMSE loss.
class LinearRegression(nn.Module):
    def __init__(self):
        super(LinearRegression, self).__init__()
        self.linear = nn.Linear(13, 1)  # Define the weight matrix. Size: Number of features x Number of targets

    def forward(self, x):
        x = self.linear(x)
        return x


def RMSELoss(yhat,y):
    return torch.sqrt(F.mse_loss(yhat, y))

In [None]:
def fit(num_epochs, model, loss_fn, optimizer, train_dataloader, val_loader):
    for epoch in range(num_epochs):
        for inputs, targets in train_dataloader:
            inputs, targets = inputs.to(device), targets.to(device)
            # Get predictions.
            preds = model(inputs)

            # Get loss.
            loss = loss_fn(preds, targets)

            # Compute gradients.
            loss.backward()
            #print(loss.item())

            # Update model parameters i.e. backpropagation.
            optimizer.step()

            # Reset gradients to zero before the next epoch.
            optimizer.zero_grad()

        if (epoch + 1) % 50 == 0:
            # Get validation loss as well.
            for val_input, val_targets in val_loader:
                val_input, val_targets = val_input.to(device), val_targets.to(device)
                out = model(val_input)
                val_loss = RMSELoss(out, val_targets)
            print("Epoch [{}/{}], Training loss: {:.4f}, Validation Loss: {:.4f}".format(epoch + 1, num_epochs, loss.item(), val_loss)) # Report loss value after each epoch.

In [None]:
# Define model training metadata.
num_epochs = 2000
model = LinearRegression()
model.to(device)
loss_fn = RMSELoss
optimizer = torch.optim.SGD(model.parameters(), lr = 1e-3)

# Making sure shapes are correct and defining dataloader.
input_np_array = np.array(dataset[:,1:-1].tolist(), dtype='float32')
inputs = torch.from_numpy(input_np_array)
# inputs = torch.from_numpy(np.array(dataset.iloc[:,1:-1].values, dtype='float32'))
print(inputs.shape)
targets_np_array = np.array(dataset[:,-1].tolist(), dtype='float32')
targets_np_array_rs = np.reshape(targets_np_array, (targets_np_array.shape[0],1))
targets = torch.from_numpy(targets_np_array_rs)
print(targets.shape)
# targets = torch.from_numpy(np.array(dataset.iloc[:,-1].values, dtype='float32'))

torch.Size([506, 13])
torch.Size([506, 1])


In [None]:
train_tensor_dataset = TensorDataset(inputs, targets)
val_size, train_size = int(0.1 * len(train_tensor_dataset)),  len(train_tensor_dataset) - int(0.1 * len(train_tensor_dataset))
batch_size = 50
train_data, val_data = random_split(train_tensor_dataset, [train_size, val_size])
train_dataloader = DataLoader(train_data, batch_size, shuffle=True)
val_dataloader = DataLoader(val_data, batch_size)

# # Train the model.
fit(num_epochs, model, loss_fn, optimizer, train_dataloader, val_dataloader)

Epoch [50/2000], Training loss: 0.3042, Validation Loss: 0.7481
Epoch [100/2000], Training loss: 1.0054, Validation Loss: 0.6731
Epoch [150/2000], Training loss: 0.3865, Validation Loss: 0.6562
Epoch [200/2000], Training loss: 0.2688, Validation Loss: 0.6508
Epoch [250/2000], Training loss: 0.3153, Validation Loss: 0.6483
Epoch [300/2000], Training loss: 0.4052, Validation Loss: 0.6440
Epoch [350/2000], Training loss: 0.3579, Validation Loss: 0.6413
Epoch [400/2000], Training loss: 0.3586, Validation Loss: 0.6388
Epoch [450/2000], Training loss: 0.4308, Validation Loss: 0.6357
Epoch [500/2000], Training loss: 1.2165, Validation Loss: 0.6356
Epoch [550/2000], Training loss: 0.6808, Validation Loss: 0.6335
Epoch [600/2000], Training loss: 0.3497, Validation Loss: 0.6325
Epoch [650/2000], Training loss: 0.4208, Validation Loss: 0.6309
Epoch [700/2000], Training loss: 0.2391, Validation Loss: 0.6302
Epoch [750/2000], Training loss: 0.7644, Validation Loss: 0.6306
Epoch [800/2000], Training