In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
from math import sqrt


# Load the datasets

In [2]:
train_df = pd.read_csv('../data/train.csv')
test_df = pd.read_csv('../data/test.csv')


In [3]:
train_df.head()


Unnamed: 0,ID,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,black,lstat,medv
0,1,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,2,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,4,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
3,5,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2
4,7,0.08829,12.5,7.87,0,0.524,6.012,66.6,5.5605,5,311,15.2,395.6,12.43,22.9


In [4]:
test_df.head()


Unnamed: 0,ID,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,black,lstat
0,3,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03
1,6,0.02985,0.0,2.18,0,0.458,6.43,58.7,6.0622,3,222,18.7,394.12,5.21
2,8,0.14455,12.5,7.87,0,0.524,6.172,96.1,5.9505,5,311,15.2,396.9,19.15
3,9,0.21124,12.5,7.87,0,0.524,5.631,100.0,6.0821,5,311,15.2,386.63,29.93
4,10,0.17004,12.5,7.87,0,0.524,6.004,85.9,6.5921,5,311,15.2,386.71,17.1


# Split the training data into training and validation sets

In [5]:
X = train_df.drop('medv', axis=1).values
y = train_df['medv'].values

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.float32).view(-1, 1)

X_train, X_validation, y_train, y_validation = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)


In [6]:
len(X_train)


266

In [7]:
len(X_validation)


67

In [8]:
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)


# Initialize and train the model

In [9]:
class RegressionModel(nn.Module):
    def __init__(self, input_size):
        super(RegressionModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 50)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(50, 20)
        self.fc3 = nn.Linear(20, 1)
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        return out

model = RegressionModel(X_train.shape[1])


In [10]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [11]:
num_epochs = 600
for epoch in range(num_epochs):
    for inputs, targets in train_loader:
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
    

Epoch [10/600], Loss: 544.1342
Epoch [20/600], Loss: 183.4078
Epoch [30/600], Loss: 90.6041
Epoch [40/600], Loss: 41.7870
Epoch [50/600], Loss: 34.5708
Epoch [60/600], Loss: 8.7866
Epoch [70/600], Loss: 14.6397
Epoch [80/600], Loss: 25.5963
Epoch [90/600], Loss: 13.4068
Epoch [100/600], Loss: 7.0479
Epoch [110/600], Loss: 6.0413
Epoch [120/600], Loss: 13.4913
Epoch [130/600], Loss: 9.8116
Epoch [140/600], Loss: 5.9913
Epoch [150/600], Loss: 4.3515
Epoch [160/600], Loss: 16.7197
Epoch [170/600], Loss: 7.9996
Epoch [180/600], Loss: 13.7728
Epoch [190/600], Loss: 10.4465
Epoch [200/600], Loss: 4.9412
Epoch [210/600], Loss: 10.3828
Epoch [220/600], Loss: 7.5316
Epoch [230/600], Loss: 8.8431
Epoch [240/600], Loss: 14.6290
Epoch [250/600], Loss: 19.0724
Epoch [260/600], Loss: 17.4439
Epoch [270/600], Loss: 2.5562
Epoch [280/600], Loss: 3.9531
Epoch [290/600], Loss: 9.9315
Epoch [300/600], Loss: 13.5839
Epoch [310/600], Loss: 19.2068
Epoch [320/600], Loss: 13.6479
Epoch [330/600], Loss: 5.634

# Predict on validation set

In [12]:
model.eval()
with torch.no_grad():
    predictions = model(X_validation)
    rmse = sqrt(mean_squared_error(y_validation.numpy(), predictions.numpy()))
    print(f'Validation RMSE: {rmse}')


Validation RMSE: 3.522476098958915


# Predict on test set

In [13]:
X_test = test_df.values

X_test_scaled = scaler.transform(X_test)

X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)


In [14]:
with torch.no_grad():
    test_predictions = model(X_test_tensor)

test_predictions_np = test_predictions.numpy().flatten()


In [15]:
submission_df = pd.DataFrame()
submission_df['ID'] = test_df['ID']
submission_df['medv'] = test_predictions
submission_df


Unnamed: 0,ID,medv
0,3,29.302475
1,6,25.263079
2,8,17.654057
3,9,16.835131
4,10,17.448463
...,...,...
168,496,19.787426
169,497,14.980715
170,499,19.332132
171,501,17.946054


In [16]:
submission_df.to_csv('submission_my_model.csv', index=False)


# The model got 3.40790 score on Kaggle