# COMPREHENSIVE GUIDE TO HYPERPARAMETER TUNING
[Vikum Wijesinghe](https://www.linkedin.com/in/vikumwijesinghe/) - September 2019

Other Kernels: https://www.kaggle.com/vikumsw/kernels

---

ICE Breaker

# Table Of Contents

1. [Library import & Data Loading]()
1. [Quick view at data -> head and tail of our data](#view)
1. [Univariate Analysis](#UnivariateAnalysis)
    1. [Analysis of a numerical feature](#AnalysisofaNumericalFeature)
    1. [Analysis of a categorical feature](#Analysisofacategoricalfeature)

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
import sys

In [2]:
train = pd.read_csv('../input/house-prices-advanced-regression-techniques/train.csv',index_col='Id') 
test  = pd.read_csv('../input/house-prices-advanced-regression-techniques/test.csv',index_col='Id')

In [3]:
train.shape

(1460, 80)

In [4]:
train.head()

Unnamed: 0_level_0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,Inside,...,0,,,,0,2,2008,WD,Normal,208500
2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,FR2,...,0,,,,0,5,2007,WD,Normal,181500
3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,Inside,...,0,,,,0,9,2008,WD,Normal,223500
4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,Corner,...,0,,,,0,2,2006,WD,Abnorml,140000
5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,FR2,...,0,,,,0,12,2008,WD,Normal,250000


Quick Data Cleaning & Missing Values Handling

In [5]:
def dropTargetMissingRows(df,target):
    df.dropna(axis=0, subset=[target], inplace=True)

def handleMissingValues(df):
    # for Object columns fill using 'UNKOWN'
    # for Numeric columns fill using median
    num_cols = [cname for cname in df.columns if df[cname].dtype in ['int64', 'float64']]
    cat_cols = [cname for cname in df.columns if df[cname].dtype == "object"]
    values = {}
    for a in cat_cols:
        values[a] = 'UNKOWN'

    for a in num_cols:
        values[a] = df[a].median()
        
    df.fillna(value=values,inplace=True)


def performOneHotEncoding(df,columnsToEncode):
    return pd.get_dummies(df,columns = columnsToEncode)    


def getObjectColumnsList(df):
    return [cname for cname in df.columns if df[cname].dtype == "object"]


def encodeCatFeatures(df,catColsToEncode):
    df = performOneHotEncoding(df,catColsToEncode)
    return df
    
def quickPreprocessData(df,target,catColsToEncode):
    dropTargetMissingRows(df,target)
    handleMissingValues(df)
    df = encodeCatFeatures(df,catColsToEncode)
    return df

    
def checkDataBeforeTraining(df):
    if(df.isnull().sum().sum() != 0):
        print("Error : Null Values Exist in Data")
        return False;
    
    if(len([cname for cname in df.columns if df[cname].dtype == "object"])>0):
        print("Error : Object Columns Exist in Data")
        return False;
    
    print("Data is Ready for Training")
    return True;
    

catColsToEncode = getObjectColumnsList(train)
data = quickPreprocessData(train,'SalePrice',catColsToEncode)
if(checkDataBeforeTraining(data) == False):
    sys.exit()

Data is Ready for Training
