## Importing Libraries

In [56]:
import pandas as pd
from category_encoders import TargetEncoder
import torch
from sklearn.preprocessing import StandardScaler
from torch.utils import data
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F

In [2]:
df_train = pd.read_csv('/kaggle/input/house-prices-advanced-regression-techniques/train.csv')
df_test = pd.read_csv('/kaggle/input/house-prices-advanced-regression-techniques/test.csv')

In [3]:
df_train.shape

(1460, 81)

In [4]:
df_test.shape

(1459, 80)

## EDA

In [5]:
pd.options.display.max_rows = 90
df_dtype = pd.DataFrame(df_train.dtypes,columns=['dtype'])
df_dtype

Unnamed: 0,dtype
Id,int64
MSSubClass,int64
MSZoning,object
LotFrontage,float64
LotArea,int64
Street,object
Alley,object
LotShape,object
LandContour,object
Utilities,object


In [6]:
df_train.isnull().sum()

Id                  0
MSSubClass          0
MSZoning            0
LotFrontage       259
LotArea             0
Street              0
Alley            1369
LotShape            0
LandContour         0
Utilities           0
LotConfig           0
LandSlope           0
Neighborhood        0
Condition1          0
Condition2          0
BldgType            0
HouseStyle          0
OverallQual         0
OverallCond         0
YearBuilt           0
YearRemodAdd        0
RoofStyle           0
RoofMatl            0
Exterior1st         0
Exterior2nd         0
MasVnrType        872
MasVnrArea          8
ExterQual           0
ExterCond           0
Foundation          0
BsmtQual           37
BsmtCond           37
BsmtExposure       38
BsmtFinType1       37
BsmtFinSF1          0
BsmtFinType2       38
BsmtFinSF2          0
BsmtUnfSF           0
TotalBsmtSF         0
Heating             0
HeatingQC           0
CentralAir          0
Electrical          1
1stFlrSF            0
2ndFlrSF            0
LowQualFin

In [7]:
df_test.isnull().sum()

Id                  0
MSSubClass          0
MSZoning            4
LotFrontage       227
LotArea             0
Street              0
Alley            1352
LotShape            0
LandContour         0
Utilities           2
LotConfig           0
LandSlope           0
Neighborhood        0
Condition1          0
Condition2          0
BldgType            0
HouseStyle          0
OverallQual         0
OverallCond         0
YearBuilt           0
YearRemodAdd        0
RoofStyle           0
RoofMatl            0
Exterior1st         1
Exterior2nd         1
MasVnrType        894
MasVnrArea         15
ExterQual           0
ExterCond           0
Foundation          0
BsmtQual           44
BsmtCond           45
BsmtExposure       44
BsmtFinType1       42
BsmtFinSF1          1
BsmtFinType2       42
BsmtFinSF2          1
BsmtUnfSF           1
TotalBsmtSF         1
Heating             0
HeatingQC           0
CentralAir          0
Electrical          0
1stFlrSF            0
2ndFlrSF            0
LowQualFin

## Correlation Analysis

In [8]:
df_dtype[df_dtype['dtype'] != 'object'].index

Index(['Id', 'MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual',
       'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1',
       'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF',
       'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath',
       'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd',
       'Fireplaces', 'GarageYrBlt', 'GarageCars', 'GarageArea', 'WoodDeckSF',
       'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea',
       'MiscVal', 'MoSold', 'YrSold', 'SalePrice'],
      dtype='object')

In [9]:
df_train[df_dtype[df_dtype['dtype'] != 'object'].index].corr()[['SalePrice']].sort_values(by='SalePrice',ascending=False)

Unnamed: 0,SalePrice
SalePrice,1.0
OverallQual,0.790982
GrLivArea,0.708624
GarageCars,0.640409
GarageArea,0.623431
TotalBsmtSF,0.613581
1stFlrSF,0.605852
FullBath,0.560664
TotRmsAbvGrd,0.533723
YearBuilt,0.522897


In [10]:
df_dtype[df_dtype['dtype'] == 'object'].index

Index(['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities',
       'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2',
       'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st',
       'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation',
       'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2',
       'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual',
       'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual',
       'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature',
       'SaleType', 'SaleCondition'],
      dtype='object')

In [11]:
df_train['SalePrice']

0       208500
1       181500
2       223500
3       140000
4       250000
         ...  
1455    175000
1456    210000
1457    266500
1458    142125
1459    147500
Name: SalePrice, Length: 1460, dtype: int64

## Feature Engineering

In [190]:
train_useful_colms = ['OverallQual','GrLivArea','GarageCars','GarageArea','TotalBsmtSF','1stFlrSF',
                'FullBath','TotRmsAbvGrd','YearBuilt','YearRemodAdd','GarageYrBlt','MasVnrArea','Fireplaces',
                'ExterQual','KitchenQual','BsmtQual','GarageFinish','Neighborhood','SalePrice'] # Categorical

test_useful_colms = ['OverallQual','GrLivArea','GarageCars','GarageArea','TotalBsmtSF','1stFlrSF',
                'FullBath','TotRmsAbvGrd','YearBuilt','YearRemodAdd','GarageYrBlt','MasVnrArea','Fireplaces',
                'ExterQual','KitchenQual','BsmtQual','GarageFinish','Neighborhood'] # Categorical

In [191]:
df_train_pre = df_train[train_useful_colms]
df_test_pre = df_test[test_useful_colms]

In [192]:
df_train_pre.isnull().sum()

OverallQual      0
GrLivArea        0
GarageCars       0
GarageArea       0
TotalBsmtSF      0
1stFlrSF         0
FullBath         0
TotRmsAbvGrd     0
YearBuilt        0
YearRemodAdd     0
GarageYrBlt     81
MasVnrArea       8
Fireplaces       0
ExterQual        0
KitchenQual      0
BsmtQual        37
GarageFinish    81
Neighborhood     0
SalePrice        0
dtype: int64

In [193]:
df_train_pre['BsmtQual'].value_counts()

BsmtQual
TA    649
Gd    618
Ex    121
Fa     35
Name: count, dtype: int64

In [194]:
df_train_pre['GarageFinish'].value_counts()

GarageFinish
Unf    605
RFn    422
Fin    352
Name: count, dtype: int64

In [195]:
df_test_pre.isnull().sum()

OverallQual      0
GrLivArea        0
GarageCars       1
GarageArea       1
TotalBsmtSF      1
1stFlrSF         0
FullBath         0
TotRmsAbvGrd     0
YearBuilt        0
YearRemodAdd     0
GarageYrBlt     78
MasVnrArea      15
Fireplaces       0
ExterQual        0
KitchenQual      1
BsmtQual        44
GarageFinish    78
Neighborhood     0
dtype: int64

In [196]:
df_test_pre['KitchenQual'].value_counts()

KitchenQual
TA    757
Gd    565
Ex    105
Fa     31
Name: count, dtype: int64

In [197]:
df_test_pre['BsmtQual'].value_counts()

BsmtQual
TA    634
Gd    591
Ex    137
Fa     53
Name: count, dtype: int64

In [198]:
df_test_pre['GarageFinish'].value_counts()

GarageFinish
Unf    625
RFn    389
Fin    367
Name: count, dtype: int64

In [199]:
df_test_pre['BsmtQual'].value_counts()

BsmtQual
TA    634
Gd    591
Ex    137
Fa     53
Name: count, dtype: int64

In [203]:
df_test_pre['GarageCars'] = df_test_pre['GarageCars'].fillna(df_test_pre['GarageCars'].mean())
df_test_pre['GarageArea'] = df_test_pre['GarageArea'].fillna(df_test_pre['GarageArea'].mean())
df_test_pre['TotalBsmtSF'] = df_test_pre['TotalBsmtSF'].fillna(df_test_pre['TotalBsmtSF'].mean())
df_test_pre['GarageYrBlt'] = df_test_pre['GarageYrBlt'].fillna(df_test_pre['GarageYrBlt'].mean())
df_test_pre['MasVnrArea'] = df_test_pre['MasVnrArea'].fillna(df_test_pre['MasVnrArea'].mean())

df_train_pre['GarageYrBlt'] = df_train_pre['GarageYrBlt'].fillna(df_train_pre['GarageYrBlt'].mean())
df_train_pre['MasVnrArea'] = df_train_pre['MasVnrArea'].fillna(df_train_pre['MasVnrArea'].mean())

df_train_pre['BsmtQual'] = df_train_pre['BsmtQual'].fillna('TA')
df_train_pre['GarageFinish'] = df_train_pre['GarageFinish'].fillna('Unf')

df_test_pre['KitchenQual'] = df_test_pre['KitchenQual'].fillna('TA')
df_test_pre['BsmtQual'] = df_test_pre['BsmtQual'].fillna('TA')
df_test_pre['GarageFinish'] = df_test_pre['GarageFinish'].fillna('Unf')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test_pre['GarageCars'] = df_test_pre['GarageCars'].fillna(df_test_pre['GarageCars'].mean())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test_pre['GarageArea'] = df_test_pre['GarageArea'].fillna(df_test_pre['GarageArea'].mean())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test_pre['To

In [204]:
df_train_pre.isnull().sum()

OverallQual     0
GrLivArea       0
GarageCars      0
GarageArea      0
TotalBsmtSF     0
1stFlrSF        0
FullBath        0
TotRmsAbvGrd    0
YearBuilt       0
YearRemodAdd    0
GarageYrBlt     0
MasVnrArea      0
Fireplaces      0
ExterQual       0
KitchenQual     0
BsmtQual        0
GarageFinish    0
Neighborhood    0
SalePrice       0
dtype: int64

## categorial encoding

'''
Male - 27 - (27+24+22/3)
Male - 24
Male = 22
Female - 21
Female = 26
Female - 30
'''

In [205]:
df_train_pre

Unnamed: 0,OverallQual,GrLivArea,GarageCars,GarageArea,TotalBsmtSF,1stFlrSF,FullBath,TotRmsAbvGrd,YearBuilt,YearRemodAdd,GarageYrBlt,MasVnrArea,Fireplaces,ExterQual,KitchenQual,BsmtQual,GarageFinish,Neighborhood,SalePrice
0,7,1710,2,548,856,856,2,8,2003,2003,2003.0,196.0,0,Gd,Gd,Gd,RFn,CollgCr,208500
1,6,1262,2,460,1262,1262,2,6,1976,1976,1976.0,0.0,1,TA,TA,Gd,RFn,Veenker,181500
2,7,1786,2,608,920,920,2,6,2001,2002,2001.0,162.0,1,Gd,Gd,Gd,RFn,CollgCr,223500
3,7,1717,3,642,756,961,1,7,1915,1970,1998.0,0.0,1,TA,Gd,TA,Unf,Crawfor,140000
4,8,2198,3,836,1145,1145,2,9,2000,2000,2000.0,350.0,1,Gd,Gd,Gd,RFn,NoRidge,250000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,6,1647,2,460,953,953,2,7,1999,2000,1999.0,0.0,1,TA,TA,Gd,RFn,Gilbert,175000
1456,6,2073,2,500,1542,2073,2,7,1978,1988,1978.0,119.0,2,TA,TA,Gd,Unf,NWAmes,210000
1457,7,2340,1,252,1152,1188,2,9,1941,2006,1941.0,0.0,2,Ex,Gd,TA,RFn,Crawfor,266500
1458,5,1078,1,240,1078,1078,1,5,1950,1996,1950.0,0.0,0,TA,Gd,TA,Unf,NAmes,142125


In [206]:
catcols = ['ExterQual','KitchenQual','BsmtQual','GarageFinish','Neighborhood']
for c in catcols:
    encoder = TargetEncoder()
    df_train_pre[c] = encoder.fit_transform(df_train_pre[c],df_train_pre['SalePrice'])
    df_test_pre[c] = encoder.transform(df_test_pre[c])
    #df_train_pre[c] = encoder.inverse_transform(df_train_pre[c])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_pre[c] = encoder.fit_transform(df_train_pre[c],df_train_pre['SalePrice'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test_pre[c] = encoder.transform(df_test_pre[c])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_pre[c] = encoder.fit_transform(df_train_pre[c],df_train_pre['S

In [207]:
df_test_pre.head()

Unnamed: 0,OverallQual,GrLivArea,GarageCars,GarageArea,TotalBsmtSF,1stFlrSF,FullBath,TotRmsAbvGrd,YearBuilt,YearRemodAdd,GarageYrBlt,MasVnrArea,Fireplaces,ExterQual,KitchenQual,BsmtQual,GarageFinish,Neighborhood
0,5,896,1.0,730.0,882.0,896,1,5,1961,1961,1961.0,0.0,0,144341.313466,139962.511565,138866.295918,137570.460641,145847.080044
1,6,1329,1.0,312.0,1329.0,1329,1,6,1958,1958,1958.0,108.0,0,144341.313466,212116.023891,138866.295918,137570.460641,145847.080044
2,5,1629,2.0,482.0,928.0,928,2,6,1997,1998,1997.0,0.0,1,144341.313466,139962.511565,202688.478964,240052.690341,192821.904993
3,6,1604,2.0,470.0,926.0,926,2,7,1998,1998,1998.0,20.0,1,144341.313466,212116.023891,138866.295918,240052.690341,192821.904993
4,8,1280,2.0,506.0,1280.0,1280,2,5,1992,1992,1992.0,0.0,0,231633.510246,212116.023891,202688.478964,202068.869668,261578.109175


## Noramlization

In [208]:
cols = df_train_pre.columns

In [209]:
for c in cols:
    if c != 'SalePrice':
        scaler = StandardScaler()
        df_train_pre[c] = scaler.fit_transform(df_train_pre[[c]].values)
        df_test_pre[c] = scaler.transform(df_test_pre[[c]].values)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_pre[c] = scaler.fit_transform(df_train_pre[[c]].values)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test_pre[c] = scaler.transform(df_test_pre[[c]].values)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_pre[c] = scaler.fit_transform(df_train_pre[[c]].values)
A value is tryin

In [210]:
df_train_pre

Unnamed: 0,OverallQual,GrLivArea,GarageCars,GarageArea,TotalBsmtSF,1stFlrSF,FullBath,TotRmsAbvGrd,YearBuilt,YearRemodAdd,GarageYrBlt,MasVnrArea,Fireplaces,ExterQual,KitchenQual,BsmtQual,GarageFinish,Neighborhood,SalePrice
0,0.651479,0.370333,0.311725,0.351000,-0.459303,-0.793434,0.789741,0.912210,1.050994,0.878668,1.021157,0.511418,-0.951226,0.945153,0.580336,0.401068,0.490974,0.323832,208500
1,-0.071836,-0.482512,0.311725,-0.060731,0.466465,0.257140,0.789741,-0.318683,0.156734,-0.429577,-0.104483,-0.574410,0.600495,-0.691938,-0.773196,0.401068,0.490974,0.317797,181500
2,0.651479,0.515013,0.311725,0.631726,-0.313369,-0.627826,0.789741,-0.318683,0.984752,0.830215,0.937776,0.323060,0.600495,0.945153,0.580336,0.401068,0.490974,0.323832,223500
3,0.651479,0.383659,1.650307,0.790804,-0.687324,-0.521734,-1.026041,0.296763,-1.863632,-0.720298,0.812705,-0.574410,0.600495,-0.691938,0.580336,-0.790461,-1.006451,0.536754,140000
4,1.374795,1.299326,1.650307,1.698485,0.199680,-0.045611,0.789741,1.527656,0.951632,0.733308,0.896086,1.364570,0.600495,0.945153,0.580336,0.401068,0.490974,2.578465,250000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,-0.071836,0.250402,0.311725,-0.060731,-0.238122,-0.542435,0.789741,0.296763,0.918511,0.733308,0.854395,-0.574410,0.600495,-0.691938,-0.773196,0.401068,0.490974,0.227578,175000
1456,-0.071836,1.061367,0.311725,0.126420,1.104925,2.355701,0.789741,0.296763,0.222975,0.151865,-0.021102,0.084843,2.152216,-0.691938,-0.773196,0.401068,-1.006451,0.156242,210000
1457,0.651479,1.569647,-1.026858,-1.033914,0.215641,0.065656,0.789741,1.527656,-1.002492,1.024029,-1.563645,-0.574410,2.152216,3.353663,0.580336,-0.790461,0.490974,0.536754,266500
1458,-0.795151,-0.832788,-1.026858,-1.090059,0.046905,-0.218982,-1.026041,-0.934130,-0.704406,0.539493,-1.188432,-0.574410,-0.951226,-0.691938,0.580336,-0.790461,-1.006451,-0.651439,142125


## Dataframes to Torch dataloaders

In [211]:
import numpy as np
y_train = np.log(df_train_pre[['SalePrice']].values)
df_train_pre.drop('SalePrice',axis=1,inplace=True)
x_train = df_train_pre.values
x_test = df_test_pre.values

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train_pre.drop('SalePrice',axis=1,inplace=True)


In [212]:
x_train_tensor = torch.tensor(x_train).float()
y_train_tensor = torch.tensor(y_train).float()
x_test_tensor = torch.tensor(x_test).float()

In [213]:
train_dataset = data.TensorDataset(x_train_tensor,y_train_tensor)
batch_size = 32
train_dataloader = DataLoader(train_dataset,batch_size = batch_size,shuffle = True)

## Architecture

In [214]:
class NeuralNetwork(nn.Module):
    
    def __init__(self,n_input,n_hidden,n_output):
        super(NeuralNetwork,self).__init__()
        
        self.layer1 = nn.Linear(n_input,n_hidden) 
        self.relu = nn.ReLU(inplace=True)
        self.layer2 = nn.Linear(n_hidden,n_output)
    
    def forward(self,x):
        x = self.layer1(x)
        x = self.relu(x)
        x = self.layer2(x)
        return x

In [215]:
n_input = x_train_tensor.shape[1]
n_hidden = 32
n_output = 1

In [216]:
n_input,n_hidden,n_output

(18, 32, 1)

In [217]:
model = NeuralNetwork(n_input,n_hidden,n_output)
print (model)

NeuralNetwork(
  (layer1): Linear(in_features=18, out_features=32, bias=True)
  (relu): ReLU(inplace=True)
  (layer2): Linear(in_features=32, out_features=1, bias=True)
)


In [218]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [219]:
loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(),lr=0.01)

In [223]:
epochs = 10

for i in range(epochs):
    
    for batch,(x,y) in enumerate(train_dataloader):
        x = x.to(device)
        y = y.to(device)
        y_pred = model(x)
        loss = loss_fn(y_pred,y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    print (f'Epoch {i} with loss as {loss}')

Epoch 0 with loss as 0.07123006880283356
Epoch 1 with loss as 0.07438560575246811
Epoch 2 with loss as 0.02382153645157814
Epoch 3 with loss as 0.03919427469372749
Epoch 4 with loss as 0.05264553427696228
Epoch 5 with loss as 0.04144373536109924
Epoch 6 with loss as 0.06826800107955933
Epoch 7 with loss as 0.02474384941160679
Epoch 8 with loss as 0.03085692599415779
Epoch 9 with loss as 0.04788314923644066
