In [1]:
import numpy as np
import pandas as pd
import torch
import matplotlib.pyplot as plt
from torch import nn, optim
from torch.utils.data import DataLoader
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
%matplotlib inline

In [2]:
train = pd.read_csv('./train.csv')
test = pd.read_csv('./test.csv')

Id = test["Id"]

train = train.drop(["Id"], axis = 1)
test = test.drop(["Id"], axis = 1)

data = train.append(test, ignore_index = True, sort = False)
data = pd.get_dummies(data, dummy_na = True, drop_first = True)
data.isnull().values.any()

True

In [3]:
data.shape

(2919, 289)

In [4]:
data.fillna(data.median(), inplace = True)
columns = data.columns
sale_price = data['SalePrice']
data.isnull().values.any()

False

In [5]:
scaler = MinMaxScaler()
data = pd.DataFrame(scaler.fit_transform(data), columns = columns)
data['SalePrice'] = sale_price
data.head()

Unnamed: 0,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,...,SaleType_New,SaleType_Oth,SaleType_WD,SaleType_nan,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial,SaleCondition_nan
0,0.235294,0.150685,0.03342,0.666667,0.5,0.949275,0.883333,0.1225,0.125089,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,0.0,0.202055,0.038795,0.555556,0.875,0.753623,0.433333,0.0,0.173281,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,0.235294,0.160959,0.046507,0.666667,0.5,0.934783,0.866667,0.10125,0.086109,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,0.294118,0.133562,0.038561,0.666667,0.5,0.311594,0.333333,0.0,0.038271,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.235294,0.215753,0.060576,0.777778,0.5,0.927536,0.833333,0.21875,0.116052,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [6]:
train = data.iloc[:1460]
test = data.iloc[1460:]
test.drop('SalePrice', axis = 1, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


In [7]:
target = train['SalePrice']
train = train.drop('SalePrice', axis = 1)
X_train, X_test, y_train, y_test = train_test_split(train, target, test_size = 0.33)

In [8]:
X_train.shape

(978, 288)

In [9]:
class Regressor(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(288, 144)
        self.fc2 = nn.Linear(144, 72)
        self.fc3 = nn.Linear(72, 18)
        self.fc4 = nn.Linear(18, 1)

    def forward(self, x):
        x = nn.relu(self.fc1(x))
        x = nn.relu(self.fc2(x))
        x = nn.relu(self.fc3(x))
        x = nn.relu(self.fc4(x))
        return x

In [10]:
train_batch = np.array_split(X_train, 50)
label_batch = np.array_split(y_train, 50)

In [11]:
# convert to Tensors from numpy arrays 

for i in range(len(train_batch)):
    train_batch[i] = torch.from_numpy(train_batch[i].values).float()
    
for i in range(len(label_batch)):
    label_batch[i] = torch.from_numpy(label_batch[i].values).float().view(-1, 1)

X_test = torch.from_numpy(X_test.values).float()
y_test = torch.from_numpy(y_test.values).float().view(-1, 1)

In [12]:
model = Regressor()
ps = model(train_batch[0])
ps.shape

torch.Size([20, 1])

In [13]:
model = Regressor()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 400

train_losses, test_losses = [], []
for e in range(epochs):
    model.train()
    train_loss = 0
    for i in range(len(train_batch)):
        optimizer.zero_grad()
        output = model(train_batch[i])
        loss = torch.sqrt(criterion(torch.log(output), torch.log(label_batch[i])))
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        
    else:
        test_loss = 0
        accuracy = 0
        
        with torch.no_grad():
            model.eval()
            predictions = model(X_test)
            test_loss += torch.sqrt(criterion(torch.log(predictions), torch.log(y_test)))
                
        train_losses.append(train_loss/len(train_batch))
        test_losses.append(test_loss)

        print("Epoch: {}/{}.. ".format(e+1, epochs),
              "Training Loss: {:.3f}.. ".format(train_loss/len(train_batch)),
              "Test Loss: {:.3f}.. ".format(test_loss))

Epoch: 1/400..  Training Loss: 11.766..  Test Loss: 9.601.. 
Epoch: 2/400..  Training Loss: 8.495..  Test Loss: 7.595.. 
Epoch: 3/400..  Training Loss: 7.001..  Test Loss: 6.484.. 
Epoch: 4/400..  Training Loss: 6.065..  Test Loss: 5.683.. 
Epoch: 5/400..  Training Loss: 5.346..  Test Loss: 5.025.. 
Epoch: 6/400..  Training Loss: 4.724..  Test Loss: 4.450.. 
Epoch: 7/400..  Training Loss: 4.203..  Test Loss: 3.981.. 
Epoch: 8/400..  Training Loss: 3.772..  Test Loss: 3.586.. 
Epoch: 9/400..  Training Loss: 3.404..  Test Loss: 3.241.. 
Epoch: 10/400..  Training Loss: 3.077..  Test Loss: 2.932.. 
Epoch: 11/400..  Training Loss: 2.782..  Test Loss: 2.652.. 
Epoch: 12/400..  Training Loss: 2.515..  Test Loss: 2.397.. 
Epoch: 13/400..  Training Loss: 2.269..  Test Loss: 2.159.. 
Epoch: 14/400..  Training Loss: 2.038..  Test Loss: 1.932.. 
Epoch: 15/400..  Training Loss: 1.798..  Test Loss: 1.679.. 
Epoch: 16/400..  Training Loss: 1.548..  Test Loss: 1.438.. 
Epoch: 17/400..  Training Loss: 

Epoch: 136/400..  Training Loss: 0.127..  Test Loss: 0.156.. 
Epoch: 137/400..  Training Loss: 0.126..  Test Loss: 0.155.. 
Epoch: 138/400..  Training Loss: 0.125..  Test Loss: 0.155.. 
Epoch: 139/400..  Training Loss: 0.124..  Test Loss: 0.154.. 
Epoch: 140/400..  Training Loss: 0.124..  Test Loss: 0.154.. 
Epoch: 141/400..  Training Loss: 0.123..  Test Loss: 0.153.. 
Epoch: 142/400..  Training Loss: 0.122..  Test Loss: 0.153.. 
Epoch: 143/400..  Training Loss: 0.122..  Test Loss: 0.152.. 
Epoch: 144/400..  Training Loss: 0.121..  Test Loss: 0.152.. 
Epoch: 145/400..  Training Loss: 0.120..  Test Loss: 0.151.. 
Epoch: 146/400..  Training Loss: 0.120..  Test Loss: 0.151.. 
Epoch: 147/400..  Training Loss: 0.119..  Test Loss: 0.150.. 
Epoch: 148/400..  Training Loss: 0.118..  Test Loss: 0.150.. 
Epoch: 149/400..  Training Loss: 0.118..  Test Loss: 0.149.. 
Epoch: 150/400..  Training Loss: 0.117..  Test Loss: 0.149.. 
Epoch: 151/400..  Training Loss: 0.117..  Test Loss: 0.149.. 
Epoch: 1

Epoch: 271/400..  Training Loss: 0.072..  Test Loss: 0.130.. 
Epoch: 272/400..  Training Loss: 0.072..  Test Loss: 0.130.. 
Epoch: 273/400..  Training Loss: 0.072..  Test Loss: 0.130.. 
Epoch: 274/400..  Training Loss: 0.072..  Test Loss: 0.130.. 
Epoch: 275/400..  Training Loss: 0.072..  Test Loss: 0.130.. 
Epoch: 276/400..  Training Loss: 0.071..  Test Loss: 0.130.. 
Epoch: 277/400..  Training Loss: 0.071..  Test Loss: 0.130.. 
Epoch: 278/400..  Training Loss: 0.071..  Test Loss: 0.130.. 
Epoch: 279/400..  Training Loss: 0.071..  Test Loss: 0.130.. 
Epoch: 280/400..  Training Loss: 0.071..  Test Loss: 0.130.. 
Epoch: 281/400..  Training Loss: 0.070..  Test Loss: 0.130.. 
Epoch: 282/400..  Training Loss: 0.070..  Test Loss: 0.130.. 
Epoch: 283/400..  Training Loss: 0.070..  Test Loss: 0.130.. 
Epoch: 284/400..  Training Loss: 0.070..  Test Loss: 0.130.. 
Epoch: 285/400..  Training Loss: 0.070..  Test Loss: 0.130.. 
Epoch: 286/400..  Training Loss: 0.070..  Test Loss: 0.130.. 
Epoch: 2

In [14]:
test = torch.from_numpy(test.values).float()

with torch.no_grad():
    model.eval()
    predictions = model.forward(test)

predictions.shape

torch.Size([1459, 1])

In [15]:
np.ravel(predictions) 

array([116608.484, 153003.27 , 185160.61 , ..., 164172.47 , 107129.53 ,
       203325.1  ], dtype=float32)

In [16]:
submission = pd.DataFrame({"Id": Id, "SalePrice": np.exp(np.ravel(predictions))})
submission.to_csv("nn_prices.csv", index = False)