In [30]:
import numpy as np
import pandas as pd

from sklearn.feature_selection import VarianceThreshold
from sklearn import linear_model
from sklearn import model_selection
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

import xgboost as xgb
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

from time import gmtime, strftime

In [12]:
datapath_2016 = "./zillow data/merged_2016 v2.csv"
datapath_2017 = "./zillow data/merged_2017 v2.csv"

test_datapath_2016 = "./zillow data/properties_2016 v2.csv"
test_datapath_2017 = "./zillow data/properties_2017 v2.csv"

In [18]:
# modify 'transactiondate' of df to keep only the month
def parse_transactiondate(df):
    # modify transactiondate col to keep only the month
    new_transactiondate = []
    for data in df['transactiondate']:
        temp = data.split("-")
        new_transactiondate.append(int(temp[1])) # keep only month

    new_transactiondate = np.array(new_transactiondate)
    new_transactiondate = new_transactiondate.reshape(new_transactiondate.shape[0], 1)

    df['transactiondate'] = new_transactiondate

# return label index, feature index list
# assumes that df label_is_first
def split_on_label(df):
    return df.columns[0], df.columns[1:]

def gen_testdata(df, new_transactiondate):
    df_copy = df.copy()
    # df_copy['transactiondate'] = new_transactiondate
    df_copy.insert(0, 'transactiondate', new_transactiondate)
    return df_copy

def get_low_var_feature(support_list):
    low_var_feature_index = []
    support_feature_index = []
    for i in range(len(support_list)):
        if support_list[i]:
            support_feature_index.append(i)
        else:
            low_var_feature_index.append(i)
    return low_var_feature_index, support_feature_index

def handle_low_var(sel, train_data, feature_list):
    temp_sel = sel.fit(train_data)
    low_var_list, support_list = get_low_var_feature(sel.get_support())
    exclude_low_var_name = []
    for i in low_var_list:
        exclude_low_var_name.append(train_data.columns.values[i])

    return low_var_list,exclude_low_var_name

def removing_missing(X_train, missing_threshold = 0.95):
    exclude_missing = []
    num_rows = X_train.shape[0]
    for col in X_train.columns:
        num_missing = X_train[col].isna().sum()
        if num_missing == 0:
            continue
        missing_fraction = num_missing/float(num_rows)
        if missing_fraction > missing_threshold:
            exclude_missing.append(col)
            
    return exclude_missing

# removing features with unique value
def removing_unique(X_train):
    exclude_unique = []
    for col in X_train.columns:
        num_unique = len(X_train[col].unique())
        if X_train[col].isna().sum()!=0 and num_unique == 1:
            exclude_unique.append(col)
            
    return exclude_unique

def print_list(li,list_name = ''):
    print('%s (%d) :' %(list_name,len(li)))
    for i in li:
        print(i)
        
def get_train_features(X_train,el1 = [], el2 = [], el3 = [],el4 = []):
    train_feature = []
    for col in X_train:
        if col not in el1 and col not in el2 and col not in el3 and col not in el4:
            train_feature.append(col)
    return train_feature

# geting categorical features
def get_cat_feature(X_train, cat_threshold):
    cat_feature_inds = []
    cat_feature = []
    for i,col in enumerate(X_train):
        num_uniques = len(X_train[col].unique())
        if num_uniques < cat_threshold and not 'sqft' in col and not 'cnt' in col \
        and not 'nbr' in col and not 'number' in col:
            cat_feature_inds.append(i)
            cat_feature.append(col)
    return cat_feature_inds,cat_feature

In [13]:
train_data2016 = pd.read_csv(datapath_2016, index_col=0)
train_data2017 = pd.read_csv(datapath_2017, index_col=0)

# --- TEMP Fixes for train_data2016 (not well formatted) ---
# fix Unamed: 0 at col index 3
fix_col = train_data2016.columns.str.contains('unnamed',case = False)
train_data2016 = train_data2016.drop(train_data2016.columns[fix_col], axis=1)
# --- END ---

parcelid16 = train_data2016['parcelid']
parcelid17 = train_data2017['parcelid']

# drop parcelid (index of properties features)
train_data2016 = train_data2016.drop('parcelid', axis=1)
train_data2017 = train_data2017.drop('parcelid', axis=1)

label16, feature16 = split_on_label(train_data2016)
label17, feature17 = split_on_label(train_data2017)

print("[2016] num of features:", len(feature16))
print("[2016] num of instances:", train_data2016.shape[0])

print("[2017] num of features:", len(feature17))
print("[2017] num of instances:", train_data2017.shape[0])

# modify 'transactiondate' to keep only the month
parse_transactiondate(train_data2016)
parse_transactiondate(train_data2017)

# training data and label for model 2016
X_train16 = train_data2016[feature16]
y_train16 = train_data2016[label16]

# training data and label for model 2017
X_train17 = train_data2017[feature17]
y_train17 = train_data2017[label17]

[2016] num of features: 58
[2016] num of instances: 90275
[2017] num of features: 58
[2017] num of instances: 77613


In [111]:
import gc
del test_data2016,test_data2017
gc.collect()

3411

In [21]:
# Removing Freatures with too many missing data
exclude_missing16 = removing_missing(X_train16)
exclude_missing17 = removing_missing(X_train17)

# Removing Features with unique values
exclude_unique16 = removing_unique(X_train16)
exclude_unique17 = removing_unique(X_train17)

# Removing Features with low variance
# it should remove features with many missing values that were filled by imputation
sel = VarianceThreshold(threshold=(.8 * (1 - .8)))

_, exclude_low_var16 = handle_low_var(sel, X_train16, feature16)
_, exclude_low_var17 = handle_low_var(sel, X_train17, feature17)

In [22]:
print_list(exclude_missing16,'exclude_missing16')
print_list(exclude_low_var16,'exclude_low_var16')
print_list(exclude_unique16,'exclude_unique16')
print_list(exclude_missing17,'exclude_missing17')
print_list(exclude_low_var17,'exclude_low_var17')
print_list(exclude_unique17,'exclude_unique17')

exclude_missing16 (0) :
exclude_low_var16 (16) :
architecturalstyletypeid
buildingclasstypeid
decktypeid
fireplacecnt
hashottuborspa
poolcnt
pooltypeid10
pooltypeid2
pooltypeid7
storytypeid
threequarterbathnbr
typeconstructiontypeid
numberofstories
fireplaceflag
assessmentyear
taxdelinquencyflag
exclude_unique16 (0) :
exclude_missing17 (0) :
exclude_low_var17 (16) :
architecturalstyletypeid
buildingclasstypeid
decktypeid
fireplacecnt
hashottuborspa
poolcnt
pooltypeid10
pooltypeid2
pooltypeid7
storytypeid
threequarterbathnbr
typeconstructiontypeid
numberofstories
fireplaceflag
assessmentyear
taxdelinquencyflag
exclude_unique17 (0) :


In [27]:
train_feature16 = get_train_features(X_train16, el1 = exclude_missing16, 
                                                el2 = exclude_low_var16,
                                                el3 = exclude_unique16)

new_X_train16 = X_train16[train_feature16]

print("\n[2016] num of features:", new_X_train16.shape[1])
print_list(train_feature16,"train_feature16")


[2016] num of features: 42
train_feature16 (42) :
transactiondate
airconditioningtypeid
basementsqft
bathroomcnt
bedroomcnt
buildingqualitytypeid
calculatedbathnbr
finishedfloor1squarefeet
calculatedfinishedsquarefeet
finishedsquarefeet12
finishedsquarefeet13
finishedsquarefeet15
finishedsquarefeet50
finishedsquarefeet6
fips
fullbathcnt
garagecarcnt
garagetotalsqft
heatingorsystemtypeid
latitude
longitude
lotsizesquarefeet
poolsizesum
propertycountylandusecode
propertylandusetypeid
propertyzoningdesc
rawcensustractandblock
regionidcity
regionidcounty
regionidneighborhood
regionidzip
roomcnt
unitcnt
yardbuildingsqft17
yardbuildingsqft26
yearbuilt
structuretaxvaluedollarcnt
taxvaluedollarcnt
landtaxvaluedollarcnt
taxamount
taxdelinquencyyear
censustractandblock


In [28]:
train_feature17 = get_train_features(X_train16, el1 = exclude_missing17, 
                                                el2 = exclude_low_var17,
                                                el3 = exclude_unique17)

new_X_train17 = X_train17[train_feature17]

print("\n[2017] num of features:", new_X_train17.shape[1])
print_list(train_feature16,"train_feature17")


[2017] num of features: 42
train_feature17 (42) :
transactiondate
airconditioningtypeid
basementsqft
bathroomcnt
bedroomcnt
buildingqualitytypeid
calculatedbathnbr
finishedfloor1squarefeet
calculatedfinishedsquarefeet
finishedsquarefeet12
finishedsquarefeet13
finishedsquarefeet15
finishedsquarefeet50
finishedsquarefeet6
fips
fullbathcnt
garagecarcnt
garagetotalsqft
heatingorsystemtypeid
latitude
longitude
lotsizesquarefeet
poolsizesum
propertycountylandusecode
propertylandusetypeid
propertyzoningdesc
rawcensustractandblock
regionidcity
regionidcounty
regionidneighborhood
regionidzip
roomcnt
unitcnt
yardbuildingsqft17
yardbuildingsqft26
yearbuilt
structuretaxvaluedollarcnt
taxvaluedollarcnt
landtaxvaluedollarcnt
taxamount
taxdelinquencyyear
censustractandblock


### Regression Model

### Linear Regression

In [107]:
# Linear Regression

def handle_linearRidge(X_train, y_train):
    lr = linear_model.LinearRegression()
    score = -1*model_selection.cross_val_score(lr, X_train, y_train, cv=10, scoring='neg_mean_absolute_error').mean()
    lr.fit(X_train,y_train)
    y_pred = lr.predict(X_train)
    
    print("CV score:{:.6f}".format(score))
    print("Training MAE: {:.6f}".format(mean_absolute_error(y_train, y_pred)))
    print("Training R^2: {:.6f}".format(r2_score(y_train, y_pred)))
    
    return lr
    

print("Linear Regression for 2016 train data")
lr16 = handle_linearRidge(X_train16, y_train16)
print("\n")

print("Linear Regression for 2017 train data")
lr17 = handle_linearRidge(X_train17, y_train17)
print("\n")

Linear Regression for 2016 train data
CV score:0.068565
Training MAE: 0.068438
Training R^2: 0.005605


Linear Regression for 2017 train data
CV score:0.071193
Training MAE: 0.070902
Training R^2: 0.006266




### Ridge Regression

In [125]:
# Ridge Regression

def handle_regrRidge(X_train, y_train):
    regrRidge = linear_model.RidgeCV( normalize=True, gcv_mode = 'auto', 
                                 scoring='neg_mean_absolute_error',store_cv_values = True)
    regrRidge.fit(X_train, y_train)
    y_pred = regrRidge.predict(X_train)
    
    #best_score = np.min(regrRidge.cv_values_)

    print("RidgeCV alpha(lambda):{:.2f}".format(regrRidge.alpha_))
    #print("best CV score:%f"%best_score)
    print("Training MAE: {:.6f}".format(mean_absolute_error(y_train, y_pred)))
    print("Training R^2: {:.6f}".format(r2_score(y_train, y_pred)))
    
    return regrRidge
    

print("Ridge Regression for 2016 train data")
regrRidge16 = handle_regrRidge(X_train16, y_train16)
print("\n")

print("Ridge Regression for 2017 train data")
regrRidge17 = handle_regrRidge(X_train17, y_train17)
print("\n")

Ridge Regression for 2016 train data
RidgeCV alpha(lambda):1.00
Training MAE: 0.068308
Training R^2: 0.002671


Ridge Regression for 2017 train data
RidgeCV alpha(lambda):1.00
Training MAE: 0.070545
Training R^2: 0.004104




### Lasso Regression

In [90]:
# LASSO

def handle_Lasso(X_train, y_train):
    regrLasso = linear_model.LassoCV(eps=0.001, n_alphas=100, normalize=True, cv=10, max_iter=50000)
    regrLasso.fit(X_train, y_train)
    y_pred = regrLasso.predict(X_train)
    
    print("LassoCV alpha(lambda): {:.6f}".format(regrLasso.alpha_))
    print("MAE: {:.6f}".format(mean_absolute_error(y_train, y_pred)))
    print("R^2: {:.6f}".format(r2_score(y_train, y_pred)))
    
    return regrLasso

# return feature given the coef values from model
def get_reduced_features(coef,features):
    sel_features = []
    for i in range(len(coef)):
        if coef[i] != 0:
            sel_features.append(features[i])
    return sel_features

print("Lasso Regression for 2016 train data")
regrLasso16 = handle_Lasso(new_X_train16, y_train16)
print("\n")

print("Lasso Regression for 2017 train data")
regrLasso17 = handle_Lasso(new_X_train17, y_train17)
print("\n")

# selected features from Lasso
sel_features16 = get_reduced_features(regrLasso16.coef_,X_train16.columns)
sel_features17 = get_reduced_features(regrLasso17.coef_,X_train17.columns)

print("[2016] Selected Features[{}]:".format(len(sel_features16)))
for i in sel_features16:
    print(i)
print("\n")

print("[2017] Selected Features[{}]:".format(len(sel_features17)))
for i in sel_features17:
    print(i)
print("\n")

Lasso Regression for 2016 train data
LassoCV alpha(lambda): 0.000001
MAE: 0.068305
R^2: 0.004544


Lasso Regression for 2017 train data
LassoCV alpha(lambda): 0.000002
MAE: 0.070681
R^2: 0.005414


[2016] Selected Features[23]:
transactiondate
architecturalstyletypeid
calculatedbathnbr
decktypeid
finishedfloor1squarefeet
calculatedfinishedsquarefeet
finishedsquarefeet13
fips
fireplacecnt
fullbathcnt
garagetotalsqft
heatingorsystemtypeid
latitude
longitude
pooltypeid2
propertycountylandusecode
propertylandusetypeid
rawcensustractandblock
regionidcity
regionidcounty
regionidzip
roomcnt
storytypeid


[2017] Selected Features[24]:
transactiondate
airconditioningtypeid
architecturalstyletypeid
bathroomcnt
bedroomcnt
decktypeid
calculatedfinishedsquarefeet
finishedsquarefeet12
finishedsquarefeet13
fips
fireplacecnt
fullbathcnt
garagecarcnt
garagetotalsqft
latitude
longitude
pooltypeid10
pooltypeid2
pooltypeid7
propertylandusetypeid
regionidcity
regionidzip
roomcnt
storytypeid




### ElasticNet

In [96]:
def handle_Elastic(X_train, y_train):
    regrElastic = linear_model.ElasticNetCV(l1_ratio = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9],cv=10, random_state=0,
                                            max_iter = 50000,normalize = True)
    regrElastic.fit(X_train, y_train)
    y_pred = regrElastic.predict(X_train)
    
    print("ElasticCV alpha(lambda): {:.6f}".format(regrElastic.alpha_))
    print("ElasticCV l1_ratio: {:.6f}".format(regrElastic.l1_ratio_))
    print("MAE: {:.6f}".format(mean_absolute_error(y_train, y_pred)))
    print("R^2: {:.6f}".format(r2_score(y_train, y_pred)))
    
    return regrElastic

print("Elastic Regression for 2016 train data")
regrElastic16 = handle_Elastic(new_X_train16, y_train16)
print("\n")

print("Elastic Regression for 2017 train data")
regrElastic17 = handle_Elastic(new_X_train17, y_train17)
print("\n")

# return feature given the coef values from model
def get_reduced_features(coef, features):
    sel_features = []
    for i in range(len(coef)):
        if coef[i] != 0:
            sel_features.append(features[i])
    return sel_features

# selected features from Lasso
sel_features16 = get_reduced_features(regrElastic16.coef_, X_train16.columns)
sel_features17 = get_reduced_features(regrElastic17.coef_, X_train17.columns)

print("[2016] Selected Features[{}]:".format(len(sel_features16)))
for i in sel_features16:
    print(i)
print("\n")

print("[2017] Selected Features[{}]:".format(len(sel_features17)))
for i in sel_features17:
    print(i)
print("\n")

Elastic Regression for 2016 train data
ElasticCV alpha(lambda): 0.000001
ElasticCV l1_ratio: 0.900000
MAE: 0.068322
R^2: 0.004813


Elastic Regression for 2017 train data
ElasticCV alpha(lambda): 0.000002
ElasticCV l1_ratio: 0.900000
MAE: 0.070675
R^2: 0.005404


[2016] Selected Features[32]:
transactiondate
airconditioningtypeid
architecturalstyletypeid
basementsqft
bathroomcnt
bedroomcnt
calculatedbathnbr
decktypeid
finishedfloor1squarefeet
calculatedfinishedsquarefeet
finishedsquarefeet13
finishedsquarefeet6
fips
fireplacecnt
fullbathcnt
garagecarcnt
garagetotalsqft
heatingorsystemtypeid
latitude
longitude
poolcnt
pooltypeid10
pooltypeid2
propertycountylandusecode
propertylandusetypeid
propertyzoningdesc
rawcensustractandblock
regionidcity
regionidcounty
regionidzip
roomcnt
storytypeid


[2017] Selected Features[24]:
transactiondate
airconditioningtypeid
architecturalstyletypeid
bathroomcnt
bedroomcnt
decktypeid
calculatedfinishedsquarefeet
finishedsquarefeet12
finishedsquarefeet13


### Eliminate Outlier Using RANSAC Regression

In [126]:
from sklearn.model_selection import GridSearchCV
import time
def handle_ransac_GridCV(X, y, param_dict,estimator):
    
    # cross-validate on alpha (regularization strenght) from alphalist
    
    ransac = linear_model.RANSACRegressor(estimator, random_state= 21)
       
    # cross-validate on random search CV
    grid_search = GridSearchCV(ransac,param_grid=param_dict,
                               cv=5, scoring='neg_mean_absolute_error')
    
    start = time.time()
    grid_search.fit(X, y)
    print("GridSearchCV took %.2f seconds"
          " parameter settings." % (time.time() - start))

    return grid_search

param_dict = {
    'min_samples':[0.2,0.5,0.8],
    'random_state':[42]
}
grid_result = handle_ransac_GridCV(X_train16, y_train16, param_dict,regrRidge16)
print(grid_result.grid_scores_)

GridSearchCV took 698.33 seconds parameter settings.
[mean: -0.06818, std: 0.00322, params: {'min_samples': 0.2, 'random_state': 42}, mean: -0.06814, std: 0.00309, params: {'min_samples': 0.5, 'random_state': 42}, mean: -0.06814, std: 0.00306, params: {'min_samples': 0.8, 'random_state': 42}]


In [148]:
import warnings
warnings.filterwarnings('ignore')
def handle_Ransac(X_train, y_train,estimator):
    ransac = linear_model.RANSACRegressor(estimator, max_trials= 2,min_samples = 0.8, random_state= 21)
    ransac.fit(X_train, y_train)
    y_pred = ransac.predict(X_train)
    
    print("MAE: {:.6f}".format(mean_absolute_error(y_train, y_pred)))
    print("R^2: {:.6f}".format(r2_score(y_train, y_pred)))
    
    return ransac


print("ransac Regression based on Lasso for 2016 train data")
ransac16 = handle_Ransac(new_X_train16, y_train16, regrLasso16)
print("\n")

print("ransac Regression based on Lasso for 2017 train data")
ransac17 = handle_Ransac(new_X_train17, y_train17,regrLasso17)
print("\n")

ransac Regression based on Lasso for 2016 train data


KeyboardInterrupt: 

In [147]:
inlier = ransac17.inlier_mask_
X_train17[inlier].shape

(37038, 58)

### Training Data

In [76]:
# read in test
test_data2016 = pd.read_csv(test_datapath_2016, index_col=0)
test_data2017 = pd.read_csv(test_datapath_2017, index_col=0)

print("[2016] num of instances: ", test_data2016.shape[0])
print("[2017] num of instances: ", test_data2017.shape[0])

# save parcelid for merge 
test_parcelid16 = test_data2016['parcelid']
test_parcelid17 = test_data2017['parcelid']

# drop parcelid col
test_data2016 = test_data2016.drop('parcelid', axis=1)
test_data2017 = test_data2017.drop('parcelid', axis=1)

# generate transaction date
test10_16 = np.repeat(10, test_data2016.shape[0])
test11_16 = np.repeat(11, test_data2016.shape[0])
test12_16 = np.repeat(12, test_data2016.shape[0])

test10_17 = np.repeat(10, test_data2017.shape[0])
test11_17 = np.repeat(11, test_data2017.shape[0])
test12_17 = np.repeat(12, test_data2017.shape[0])

# get new test data with transaction date
X_test10_16 = gen_testdata(test_data2016, test10_16)
X_test11_16 = gen_testdata(test_data2016, test11_16)
X_test12_16 = gen_testdata(test_data2016, test12_16)

X_test10_17 = gen_testdata(test_data2017, test10_17)
X_test11_17 = gen_testdata(test_data2017, test11_17)
X_test12_17 = gen_testdata(test_data2017, test12_17)

[2016] num of instances:  2985217
[2017] num of instances:  2985217


In [103]:
new_X_test10_16 = X_test10_16[train_feature16]
new_X_test11_16 = X_test11_16[train_feature16]
new_X_test12_16 = X_test12_16[train_feature16]
new_X_test10_17 = X_test10_17[train_feature17]
new_X_test11_17 = X_test11_17[train_feature17]
new_X_test12_17 = X_test12_17[train_feature17]

In [129]:
# predict on test data
estimator16 = ransac16
estimator17 = ransac17


y_pred10_16 = estimator16.predict(X_test10_16)
y_pred11_16 = estimator16.predict(X_test11_16)
y_pred12_16 = estimator16.predict(X_test12_16)

y_pred10_17 = estimator17.predict(X_test10_17)
y_pred11_17 = estimator17.predict(X_test11_17)
y_pred12_17 = estimator17.predict(X_test12_17)

In [130]:
test_dict_16 = {'Parcelid': test_parcelid16, '201610': y_pred10_16, '201611': y_pred11_16, '201612': y_pred12_16}
test_dict_17 = {'Parcelid': test_parcelid17, '201710': y_pred10_17, '201711': y_pred11_17, '201712': y_pred12_17}

df_test_16 = pd.DataFrame(data=test_dict_16)
df_test_17 = pd.DataFrame(data=test_dict_17)

df_merged = df_test_16.merge(df_test_17, left_on='Parcelid', right_on='Parcelid', how='outer')
print(df_merged)

# handle submission file
submitfile = "./zillow data/sample_submission.csv"

submit_df = pd.read_csv(submitfile)
print(submit_df.shape[0], submit_df.shape[1])

          Parcelid    201610    201611    201612    201710    201711    201712
0         10754147  0.009440  0.009755  0.010070  0.018298  0.018513  0.018729
1         10759547  0.006874  0.007189  0.007504 -0.055544 -0.055329 -0.055113
2         10843547  0.371188  0.371503  0.371817 -0.030462 -0.030246 -0.030031
3         10859147 -0.043541 -0.043226 -0.042911 -0.077121 -0.076906 -0.076691
4         10879947 -0.064411 -0.064096 -0.063781 -0.079790 -0.079575 -0.079359
5         10898347 -0.058624 -0.058310 -0.057995 -0.073544 -0.073329 -0.073114
6         10933547  0.012210  0.012525  0.012840  0.011037  0.011252  0.011468
7         10940747 -0.052904 -0.052589 -0.052274 -0.076614 -0.076399 -0.076183
8         10954547  0.009852  0.010167  0.010482  0.021300  0.021516  0.021731
9         10976347 -0.054011 -0.053696 -0.053381 -0.075587 -0.075372 -0.075157
10        11073947 -0.055288 -0.054974 -0.054659 -0.074072 -0.073856 -0.073641
11        11114347  0.006660  0.006975  0.007290  0.

In [131]:
df_merged.to_csv("./zillow data/reg_submission4.csv", index=False)

In [149]:
from sklearn.externals import joblib

joblib.dump(ransac16, "./model/ransac16 58.m")
joblib.dump(ransac17, "./model/ransac17 58.m")
joblib.dump(lr16, "./model/lr16 58.m")
joblib.dump(lr17, "./model/lr17 58.m")
joblib.dump(regrLasso16, "./model/Lasso16 42.m")
joblib.dump(regrLasso17, "./model/Lasso17 42.m")
joblib.dump(regrRidge16, "./model/Ridge16 42.m")
joblib.dump(regrRidge17, "./model/Ridge17 42.m")
joblib.dump(regrElastic16, "./model/Lasso16 42.m")
joblib.dump(regrElastic17, "./model/Lasso17 42.m")

['./model/Lasso17 42.m']