In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import warnings; warnings.simplefilter('ignore')

pd.set_option('display.max_rows',5000)
pd.set_option('display.max_columns',5000)

In [61]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')


In [3]:
df = train.append(test, ignore_index=True)

# List of variables that contain year information
year_feature = [feature for feature in df if 'Yr' in feature or 'Year' in feature]
print(year_feature)

continous_feature = [feature for feature in df if len(df[feature].unique()) > 80 and feature not in year_feature + ['Id']]
print('Number of train continous feature : ',len(continous_feature))  
print(continous_feature)

train_continous_feature = df[continous_feature]

train_categorical_features = df.select_dtypes(exclude=[np.number])

train_year_feature = df[year_feature]

df_numerical_features = df.select_dtypes(include=[np.number])
# Numerical variables are usually of 2 type
# 1. Continous variable and Discrete Variables

discrete_feature = [feature for feature in df_numerical_features if len(df_numerical_features[feature].unique()) < 80 
                                                                     and feature not in year_feature + ['Id']]

print('Number of discrete features : ',len(discrete_feature))  
print(discrete_feature)

train_discrete_feature = df[discrete_feature]



['YearBuilt', 'YearRemodAdd', 'GarageYrBlt', 'YrSold']
Number of train continous feature :  16
['LotFrontage', 'LotArea', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'GrLivArea', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', 'ScreenPorch', 'SalePrice']
Number of discrete features :  17
['MSSubClass', 'OverallQual', 'OverallCond', 'LowQualFinSF', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageCars', '3SsnPorch', 'PoolArea', 'MiscVal', 'MoSold']


In [4]:
train_continous_feature_nan = train_continous_feature.isnull().sum()
train_continous_feature_nan=train_continous_feature_nan[train_continous_feature_nan>0]

train_discrete_feature_nan = train_discrete_feature.isnull().sum()
train_discrete_feature_nan=train_discrete_feature_nan[train_discrete_feature_nan>0]

train_categorical_features_nan = train_categorical_features.isnull().sum()
train_categorical_features_nan=train_categorical_features_nan[train_categorical_features_nan>0]

train_year_feature_nan = train_year_feature.isnull().sum()
train_year_feature_nan=train_year_feature_nan[train_year_feature_nan>0]

print('train_continous_feature_nan:',
      train_continous_feature_nan.sort_values(ascending = False))

print('train_discrete_feature_nan:',
      train_discrete_feature_nan.sort_values(ascending = False))

print('train_categorical_features_nan:',
      train_categorical_features_nan.sort_values(ascending = False))

print('train_year_feature_nan:',
      train_year_feature_nan.sort_values(ascending = False))

train_continous_feature_nan: SalePrice      1459
LotFrontage     486
MasVnrArea       23
GarageArea        1
TotalBsmtSF       1
BsmtUnfSF         1
BsmtFinSF2        1
BsmtFinSF1        1
dtype: int64
train_discrete_feature_nan: BsmtHalfBath    2
BsmtFullBath    2
GarageCars      1
dtype: int64
train_categorical_features_nan: PoolQC          2909
MiscFeature     2814
Alley           2721
Fence           2348
FireplaceQu     1420
GarageCond       159
GarageQual       159
GarageFinish     159
GarageType       157
BsmtExposure      82
BsmtCond          82
BsmtQual          81
BsmtFinType2      80
BsmtFinType1      79
MasVnrType        24
MSZoning           4
Functional         2
Utilities          2
Exterior1st        1
SaleType           1
Exterior2nd        1
KitchenQual        1
Electrical         1
dtype: int64
train_year_feature_nan: GarageYrBlt    159
dtype: int64


In [5]:
#fill train_continous_feature NAN values with mean
train_continous_feature['LotFrontage']=train_continous_feature['LotFrontage'].fillna(train_continous_feature['LotFrontage'].mean())
train_continous_feature['MasVnrArea']=train_continous_feature['MasVnrArea'].fillna(train_continous_feature['MasVnrArea'].mean())

In [6]:
#categorical fetures with more NAN values
train_categorical_features.drop(['PoolQC','MiscFeature','Alley','Fence','FireplaceQu'],axis=1,inplace=True)

In [7]:
train_categorical_features['GarageCond']=train_categorical_features['GarageCond'].fillna(train_categorical_features['GarageCond'].mode()[0])
train_categorical_features['GarageQual']=train_categorical_features['GarageQual'].fillna(train_categorical_features['GarageQual'].mode()[0])
train_categorical_features['GarageFinish']=train_categorical_features['GarageFinish'].fillna(train_categorical_features['GarageFinish'].mode()[0])
train_categorical_features['GarageType']=train_categorical_features['GarageType'].fillna(train_categorical_features['GarageType'].mode()[0])
train_categorical_features['BsmtFinType2']=train_categorical_features['BsmtFinType2'].fillna(train_categorical_features['BsmtFinType2'].mode()[0])
train_categorical_features['BsmtExposure']=train_categorical_features['BsmtExposure'].fillna(train_categorical_features['BsmtExposure'].mode()[0])
train_categorical_features['BsmtFinType1']=train_categorical_features['BsmtFinType1'].fillna(train_categorical_features['BsmtFinType1'].mode()[0])
train_categorical_features['BsmtCond']=train_categorical_features['BsmtCond'].fillna(train_categorical_features['BsmtCond'].mode()[0])
train_categorical_features['BsmtQual']=train_categorical_features['BsmtQual'].fillna(train_categorical_features['BsmtQual'].mode()[0])
train_categorical_features['MasVnrType']=train_categorical_features['MasVnrType'].fillna(train_categorical_features['MasVnrType'].mode()[0])
train_categorical_features['Electrical']=train_categorical_features['Electrical'].fillna(train_categorical_features['Electrical'].mode()[0])

In [8]:
#by using manual check of year data
train_year_feature['GarageYrBlt']=train_year_feature['GarageYrBlt'].fillna(1980)

In [9]:
train_continous_feature_nan = train_continous_feature.isnull().sum()
train_continous_feature_nan=train_continous_feature_nan[train_continous_feature_nan>0]

train_discrete_feature_nan = train_discrete_feature.isnull().sum()
train_discrete_feature_nan=train_discrete_feature_nan[train_discrete_feature_nan>0]

train_categorical_features_nan = train_categorical_features.isnull().sum()
train_categorical_features_nan=train_categorical_features_nan[train_categorical_features_nan>0]

train_year_feature_nan = train_year_feature.isnull().sum()
train_year_feature_nan=train_year_feature_nan[train_year_feature_nan>0]

print('train_continous_feature_nan:',
      train_continous_feature_nan.sort_values(ascending = False))

print('train_discrete_feature_nan:',
      train_discrete_feature_nan.sort_values(ascending = False))

print('train_categorical_features_nan:',
      train_categorical_features_nan.sort_values(ascending = False))

print('train_year_feature_nan:',
      train_year_feature_nan.sort_values(ascending = False))


train_continous_feature_nan: SalePrice      1459
GarageArea        1
TotalBsmtSF       1
BsmtUnfSF         1
BsmtFinSF2        1
BsmtFinSF1        1
dtype: int64
train_discrete_feature_nan: BsmtHalfBath    2
BsmtFullBath    2
GarageCars      1
dtype: int64
train_categorical_features_nan: MSZoning       4
Functional     2
Utilities      2
SaleType       1
KitchenQual    1
Exterior2nd    1
Exterior1st    1
dtype: int64
train_year_feature_nan: Series([], dtype: int64)


In [10]:
# Temporal Variables (Date Time Variables)
# Basically we are capturing the difference of years here

for feature in ['YearBuilt','YearRemodAdd','GarageYrBlt']:
       
    train_year_feature[feature] = train_year_feature['YrSold'] - train_year_feature[feature]

In [11]:
categorical_features = [feature for feature in train_categorical_features.columns if train_categorical_features[feature].dtype == 'O']
categorical_features

['MSZoning',
 'Street',
 'LotShape',
 'LandContour',
 'Utilities',
 'LotConfig',
 'LandSlope',
 'Neighborhood',
 'Condition1',
 'Condition2',
 'BldgType',
 'HouseStyle',
 'RoofStyle',
 'RoofMatl',
 'Exterior1st',
 'Exterior2nd',
 'MasVnrType',
 'ExterQual',
 'ExterCond',
 'Foundation',
 'BsmtQual',
 'BsmtCond',
 'BsmtExposure',
 'BsmtFinType1',
 'BsmtFinType2',
 'Heating',
 'HeatingQC',
 'CentralAir',
 'Electrical',
 'KitchenQual',
 'Functional',
 'GarageType',
 'GarageFinish',
 'GarageQual',
 'GarageCond',
 'PavedDrive',
 'SaleType',
 'SaleCondition']

In [12]:
train_categorical_features1 = pd.concat([train_categorical_features,df[['SalePrice']]], axis=1)

In [13]:
for feature in categorical_features:
    temp = train_categorical_features1.groupby(feature)['SalePrice'].count()/len(train_categorical_features1)
    train_categorical_features2 = temp[temp > 0.01].index
    train_categorical_features1[feature] = np.where(train_categorical_features1[feature].isin(train_categorical_features2), train_categorical_features1[feature], 'Rare_Var')

In [14]:
# Let's map the categories to some specific values 
for feature in categorical_features:
    labels_ordered = train_categorical_features1.groupby([feature])['SalePrice'].mean().sort_values().index
    labels_ordered = {k:i for i,k in enumerate(labels_ordered)}
    train_categorical_features1[feature] = train_categorical_features1[feature].map(labels_ordered)

In [18]:
train_categorical_features1.drop(['SalePrice'],axis=1,inplace=True)

In [19]:
final_df = pd.concat([train_year_feature,train_categorical_features1,train_categorical_features1,train_continous_feature], axis=1)
final_df.head()

Unnamed: 0,YearBuilt,YearRemodAdd,GarageYrBlt,YrSold,MSZoning,Street,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinType2,Heating,HeatingQC,CentralAir,Electrical,KitchenQual,Functional,GarageType,GarageFinish,GarageQual,GarageCond,PavedDrive,SaleType,SaleCondition,MSZoning.1,Street.1,LotShape.1,LandContour.1,Utilities.1,LotConfig.1,LandSlope.1,Neighborhood.1,Condition1.1,Condition2.1,BldgType.1,HouseStyle.1,RoofStyle.1,RoofMatl.1,Exterior1st.1,Exterior2nd.1,MasVnrType.1,ExterQual.1,ExterCond.1,Foundation.1,BsmtQual.1,BsmtCond.1,BsmtExposure.1,BsmtFinType1.1,BsmtFinType2.1,Heating.1,HeatingQC.1,CentralAir.1,Electrical.1,KitchenQual.1,Functional.1,GarageType.1,GarageFinish.1,GarageQual.1,GarageCond.1,PavedDrive.1,SaleType.1,SaleCondition.1,LotFrontage,LotArea,MasVnrArea,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,1stFlrSF,2ndFlrSF,GrLivArea,GarageArea,WoodDeckSF,OpenPorchSF,EnclosedPorch,ScreenPorch,SalePrice
0,5,5,5.0,2008,2,1,0,1,1,0,0,11,2,1,4,5,0,0,6,6,2,2,2,3,2,2,0,5,3,1,4,1,2,2,3,2,1,1,2,2,2,2,2,1,0,1,1,0,0,11,2,1,4,5,0,0,6,6,2,2,2,3,2,2,0,5,3,1,4,1,2,2,3,2,1,1,2,2,2,2,65.0,8450,196.0,706.0,0.0,150.0,856.0,856,854,1710,548.0,0,61,0,0,208500.0
1,31,31,31.0,2007,2,1,0,1,1,1,0,8,1,1,4,4,0,0,1,1,1,1,2,2,2,2,3,3,3,1,4,1,2,1,3,2,1,1,2,2,2,2,2,1,0,1,1,1,0,8,1,1,4,4,0,0,1,1,1,1,2,2,2,2,3,3,3,1,4,1,2,1,3,2,1,1,2,2,2,2,80.0,9600,0.0,978.0,0.0,284.0,1262.0,1262,0,1262,460.0,298,0,0,0,181500.0
2,7,6,7.0,2008,2,1,1,1,1,0,0,11,2,1,4,5,0,0,6,6,2,2,2,3,2,2,1,5,3,1,4,1,2,2,3,2,1,1,2,2,2,2,2,1,1,1,1,0,0,11,2,1,4,5,0,0,6,6,2,2,2,3,2,2,1,5,3,1,4,1,2,2,3,2,1,1,2,2,2,2,68.0,11250,162.0,486.0,0.0,434.0,920.0,920,866,1786,608.0,0,42,0,0,223500.0
3,91,36,8.0,2006,2,1,1,1,1,2,0,12,2,1,4,5,0,0,2,2,1,1,2,1,1,3,0,3,3,1,3,1,2,2,3,0,0,1,2,2,2,0,2,1,1,1,1,2,0,12,2,1,4,5,0,0,2,2,1,1,2,1,1,3,0,3,3,1,3,1,2,2,3,0,0,1,2,2,2,0,60.0,9550,0.0,216.0,0.0,540.0,756.0,961,756,1717,642.0,0,35,272,0,140000.0
4,8,8,8.0,2008,2,1,1,1,1,1,0,16,2,1,4,5,0,0,6,6,2,2,2,3,2,2,2,5,3,1,4,1,2,2,3,2,1,1,2,2,2,2,2,1,1,1,1,1,0,16,2,1,4,5,0,0,6,6,2,2,2,3,2,2,2,5,3,1,4,1,2,2,3,2,1,1,2,2,2,2,84.0,14260,350.0,655.0,0.0,490.0,1145.0,1145,1053,2198,836.0,192,84,0,0,250000.0


In [20]:
final_df.shape

(2919, 96)

In [21]:
df_Train=final_df.iloc[:1460,:]
df_Test=final_df.iloc[1460:,:]

In [23]:
df_Train.tail()

Unnamed: 0,YearBuilt,YearRemodAdd,GarageYrBlt,YrSold,MSZoning,Street,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinType2,Heating,HeatingQC,CentralAir,Electrical,KitchenQual,Functional,GarageType,GarageFinish,GarageQual,GarageCond,PavedDrive,SaleType,SaleCondition,MSZoning.1,Street.1,LotShape.1,LandContour.1,Utilities.1,LotConfig.1,LandSlope.1,Neighborhood.1,Condition1.1,Condition2.1,BldgType.1,HouseStyle.1,RoofStyle.1,RoofMatl.1,Exterior1st.1,Exterior2nd.1,MasVnrType.1,ExterQual.1,ExterCond.1,Foundation.1,BsmtQual.1,BsmtCond.1,BsmtExposure.1,BsmtFinType1.1,BsmtFinType2.1,Heating.1,HeatingQC.1,CentralAir.1,Electrical.1,KitchenQual.1,Functional.1,GarageType.1,GarageFinish.1,GarageQual.1,GarageCond.1,PavedDrive.1,SaleType.1,SaleCondition.1,LotFrontage,LotArea,MasVnrArea,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,1stFlrSF,2ndFlrSF,GrLivArea,GarageArea,WoodDeckSF,OpenPorchSF,EnclosedPorch,ScreenPorch,SalePrice
1455,8,7,8.0,2007,2,1,0,1,1,0,0,10,2,1,4,5,0,0,6,6,1,1,2,3,2,2,0,4,3,1,4,1,2,1,3,2,1,1,2,2,2,2,2,1,0,1,1,0,0,10,2,1,4,5,0,0,6,6,1,1,2,3,2,2,0,4,3,1,4,1,2,1,3,2,1,1,2,2,2,2,62.0,7917,0.0,0.0,0.0,953.0,953.0,953,694,1647,460.0,0,40,0,0,175000.0
1456,32,22,32.0,2010,2,1,0,1,1,0,0,9,2,1,4,4,0,0,4,5,3,1,2,2,2,2,0,3,2,1,2,1,2,1,1,2,0,1,2,2,2,2,2,1,0,1,1,0,0,9,2,1,4,4,0,0,4,5,3,1,2,2,2,2,0,3,2,1,2,1,2,1,1,2,0,1,2,2,2,2,85.0,13175,119.0,790.0,163.0,589.0,1542.0,2073,0,2073,500.0,349,0,0,0,210000.0
1457,69,4,69.0,2010,2,1,0,1,1,0,0,12,2,1,4,5,0,0,7,7,1,3,1,0,1,3,0,5,3,1,4,1,2,2,3,2,1,1,2,2,2,2,2,1,0,1,1,0,0,12,2,1,4,5,0,0,7,7,1,3,1,0,1,3,0,5,3,1,4,1,2,2,3,2,1,1,2,2,2,2,66.0,9042,0.0,275.0,0.0,877.0,1152.0,1188,1152,2340,252.0,0,60,0,0,266500.0
1458,60,14,60.0,2010,2,1,0,1,1,0,0,5,2,1,4,4,2,0,1,1,1,1,2,2,1,2,1,5,2,1,3,1,1,2,3,2,0,1,2,2,2,2,2,1,0,1,1,0,0,5,2,1,4,4,2,0,1,1,1,1,2,2,1,2,1,5,2,1,3,1,1,2,3,2,0,1,2,2,2,2,68.0,9717,0.0,49.0,1029.0,0.0,1078.0,1078,0,1078,240.0,366,0,112,0,142125.0
1459,43,43,43.0,2008,2,1,0,1,1,0,0,2,2,1,4,4,0,0,3,4,1,2,2,2,1,2,0,1,1,1,3,1,2,1,3,2,2,1,2,2,2,2,2,1,0,1,1,0,0,2,2,1,4,4,0,0,3,4,1,2,2,2,1,2,0,1,1,1,3,1,2,1,3,2,2,1,2,2,2,2,75.0,9937,0.0,830.0,290.0,136.0,1256.0,1256,0,1256,276.0,736,68,0,0,147500.0


In [24]:
df_Test.head()

Unnamed: 0,YearBuilt,YearRemodAdd,GarageYrBlt,YrSold,MSZoning,Street,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinType2,Heating,HeatingQC,CentralAir,Electrical,KitchenQual,Functional,GarageType,GarageFinish,GarageQual,GarageCond,PavedDrive,SaleType,SaleCondition,MSZoning.1,Street.1,LotShape.1,LandContour.1,Utilities.1,LotConfig.1,LandSlope.1,Neighborhood.1,Condition1.1,Condition2.1,BldgType.1,HouseStyle.1,RoofStyle.1,RoofMatl.1,Exterior1st.1,Exterior2nd.1,MasVnrType.1,ExterQual.1,ExterCond.1,Foundation.1,BsmtQual.1,BsmtCond.1,BsmtExposure.1,BsmtFinType1.1,BsmtFinType2.1,Heating.1,HeatingQC.1,CentralAir.1,Electrical.1,KitchenQual.1,Functional.1,GarageType.1,GarageFinish.1,GarageQual.1,GarageCond.1,PavedDrive.1,SaleType.1,SaleCondition.1,LotFrontage,LotArea,MasVnrArea,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,1stFlrSF,2ndFlrSF,GrLivArea,GarageArea,WoodDeckSF,OpenPorchSF,EnclosedPorch,ScreenPorch,SalePrice
1460,49,49,49.0,2010,0,1,0,1,1,0,0,5,1,1,4,4,0,0,6,6,1,1,2,2,1,2,0,0,1,1,2,1,2,1,3,2,0,1,2,2,2,2,0,1,0,1,1,0,0,5,1,1,4,4,0,0,6,6,1,1,2,2,1,2,0,0,1,1,2,1,2,1,3,2,0,1,2,2,2,2,80.0,11622,0.0,468.0,144.0,270.0,882.0,896,0,896,730.0,140,0,0,120,
1461,52,52,52.0,2010,2,1,1,1,1,2,0,5,2,1,4,4,2,0,2,0,2,1,2,2,1,2,0,3,3,1,2,1,2,2,3,2,0,1,2,2,2,2,2,1,1,1,1,2,0,5,2,1,4,4,2,0,2,0,2,1,2,2,1,2,0,3,3,1,2,1,2,2,3,2,0,1,2,2,2,2,81.0,14267,108.0,923.0,0.0,406.0,1329.0,1329,0,1329,312.0,393,36,0,0,
1462,13,12,13.0,2010,2,1,1,1,1,0,0,10,2,1,4,5,0,0,6,6,1,1,2,3,2,2,0,5,3,1,3,1,2,1,3,2,2,1,2,2,2,2,2,1,1,1,1,0,0,10,2,1,4,5,0,0,6,6,1,1,2,3,2,2,0,5,3,1,3,1,2,1,3,2,2,1,2,2,2,2,74.0,13830,0.0,791.0,0.0,137.0,928.0,928,701,1629,482.0,212,34,0,0,
1463,12,12,12.0,2010,2,1,1,1,1,0,0,10,2,1,4,5,0,0,6,6,2,1,2,3,1,2,0,5,3,1,4,1,2,2,3,2,2,1,2,2,2,2,2,1,1,1,1,0,0,10,2,1,4,5,0,0,6,6,2,1,2,3,1,2,0,5,3,1,4,1,2,2,3,2,2,1,2,2,2,2,78.0,9978,20.0,602.0,0.0,324.0,926.0,926,678,1604,470.0,360,36,0,0,
1464,18,18,18.0,2010,2,1,1,3,1,0,0,8,2,1,3,4,0,0,3,4,1,2,2,3,2,2,0,3,3,1,4,1,2,2,3,2,1,1,2,2,2,2,2,1,1,3,1,0,0,8,2,1,3,4,0,0,3,4,1,2,2,3,2,2,0,3,3,1,4,1,2,2,3,2,1,1,2,2,2,2,43.0,5005,0.0,263.0,0.0,1017.0,1280.0,1280,0,1280,506.0,0,82,0,144,


In [26]:
df_Test.drop(['SalePrice'],axis=1,inplace=True)

In [27]:
df_Train.shape,df_Test.shape

((1460, 96), (1459, 95))

In [30]:
# Use the random grid to search for best hyperparameters
# First create the base model to tune
rf = RandomForestRegressor()

In [33]:
X_train=df_Train.drop(['SalePrice'],axis=1)
y_train=df_Train['SalePrice']

In [52]:
#Fit
rf.fit(X_train, y_train)

RandomForestRegressor()

In [41]:
#missing value counts in each of these columns
Isnull = df_Test.isnull().sum()
Isnull = Isnull[Isnull>0]
Isnull.sort_values(inplace=True, ascending=False)
Isnull

GarageArea     1
TotalBsmtSF    1
BsmtUnfSF      1
BsmtFinSF2     1
BsmtFinSF1     1
dtype: int64

In [50]:
df_Test['GarageArea']=df_Test['GarageArea'].fillna(df_Test['GarageArea'].mean())
df_Test['TotalBsmtSF']=df_Test['TotalBsmtSF'].fillna(df_Test['TotalBsmtSF'].mean())
df_Test['BsmtUnfSF']=df_Test['BsmtUnfSF'].fillna(df_Test['BsmtUnfSF'].mean())
df_Test['BsmtFinSF1']=df_Test['BsmtFinSF1'].fillna(df_Test['BsmtFinSF1'].mean())
df_Test['BsmtFinSF2']=df_Test['BsmtFinSF2'].fillna(df_Test['BsmtFinSF2'].mean())


In [51]:
#missing value counts in each of these columns
Isnull = df_Test.isnull().sum()
Isnull = Isnull[Isnull>0]
Isnull.sort_values(inplace=True, ascending=False)
Isnull

Series([], dtype: int64)

In [54]:
y_pred=rf.predict(df_Test)
y_pred

array([126274.5 , 154425.  , 186114.  , ..., 145629.23, 109812.58,
       187245.26])

In [65]:
sub_df['Id']=test['Id']
sub_df.head()

0    1461
1    1462
2    1463
3    1464
4    1465
Name: Id, dtype: object

In [67]:
#Train the model
from sklearn.ensemble import RandomForestRegressor
model1 = RandomForestRegressor(n_estimators=1000)

In [68]:

#Fit
model1.fit(X_train, y_train)

RandomForestRegressor(n_estimators=1000)

In [69]:
y_pred2=model1.predict(df_Test)

In [70]:
y_pred2

array([122517.306, 154748.823, 187704.477, ..., 145482.323, 112398.285,
       187886.132])

In [72]:
sub_df=pd.read_csv('sample_submission.csv')

In [73]:
##Create Sample Submission file and Submit
pred=pd.DataFrame(y_pred2)
datasets=pd.concat([sub_df['Id'],pred],axis=1)
datasets.columns=['Id','SalePrice']
datasets.to_csv('submission_RandomForestRegressor.csv',index=False)


0.15560 - Kaggle score
