# Boiler Plate

Following the schema at https://xgboost.readthedocs.io/en/latest/tutorials/custom_metric_obj.html

In [1]:
import numpy as np
import xgboost as xgb
from typing import Tuple
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
def msle(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, float]:
    ''' Mean squared log error metric.'''
    y = dtrain.get_label()
    predt[predt < -1] = -1 + 1e-6
    elements = np.power(np.log1p(y) - np.log1p(predt), 2)
    return 'MSLE', float(np.mean(elements))

def gradient_exact(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:
    '''Compute the gradient squared log error.'''
    y = dtrain.get_label()
    return (np.log1p(predt) - np.log1p(y)) / (predt + 1)


def evaluate(gradient, hessian):
    
    iteration = 0
    def squared_log(predt: np.ndarray,
                    dtrain: xgb.DMatrix) -> Tuple[np.ndarray, np.ndarray]:
        '''Squared Log Error objective. A simplified version for RMSLE used as
        objective function.
        '''
        predt[predt < -1] = -1 + 1e-6
        grad = gradient(predt, dtrain)
        hess = hessian(predt, dtrain)
        return grad, hess
    
    results = {}

    xgb.train({'tree_method': 'hist', 'seed': 1994,
               'disable_default_eval_metric': 1, 'eta': 0.3},
              dtrain=dtrain,
              num_boost_round=20,
              obj=squared_log,
              feval=msle,
              evals=[(dtrain, 'dtrain'), (dtest, 'dtest')],
              evals_result=results)
    return results

In [3]:
X = pd.read_csv('data/housesalesprediction/kc_house_data.csv')

y = np.array(X['price'])

X.drop(columns=['id', 'date', 'price', 'zipcode', 'lat', 'long', 'sqft_living15',
       'sqft_lot15'], inplace=True) # the last once just to keep training faster

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

# Approximation using Taylor expension

In [4]:
def hessian_taylor(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:
    '''Compute the hessian for squared log error.'''
    y = dtrain.get_label()
    return ((-np.log1p(predt) + np.log1p(y) + 1) /
            np.power(predt + 1, 2))

In [5]:
taylor = evaluate(gradient_exact, hessian_taylor)

[0]	dtrain-MSLE:153.897	dtest-MSLE:154.235
[1]	dtrain-MSLE:147.884	dtest-MSLE:148.216
[2]	dtrain-MSLE:141.999	dtest-MSLE:142.324
[3]	dtrain-MSLE:136.242	dtest-MSLE:136.561
[4]	dtrain-MSLE:130.612	dtest-MSLE:130.925
[5]	dtrain-MSLE:125.11	dtest-MSLE:125.416
[6]	dtrain-MSLE:119.735	dtest-MSLE:120.034
[7]	dtrain-MSLE:114.487	dtest-MSLE:114.78
[8]	dtrain-MSLE:109.367	dtest-MSLE:109.654
[9]	dtrain-MSLE:104.374	dtest-MSLE:104.655
[10]	dtrain-MSLE:99.5102	dtest-MSLE:99.7841
[11]	dtrain-MSLE:94.7755	dtest-MSLE:95.043
[12]	dtrain-MSLE:90.1723	dtest-MSLE:90.4335
[13]	dtrain-MSLE:85.7039	dtest-MSLE:85.9588
[14]	dtrain-MSLE:81.3757	dtest-MSLE:81.6242
[15]	dtrain-MSLE:77.196	dtest-MSLE:77.4384
[16]	dtrain-MSLE:73.1778	dtest-MSLE:73.414
[17]	dtrain-MSLE:69.3401	dtest-MSLE:69.5702
[18]	dtrain-MSLE:65.7091	dtest-MSLE:65.9333
[19]	dtrain-MSLE:62.3182	dtest-MSLE:62.5369


# Quadratic Apprixmation 1

In [6]:
def hessian_approx1(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:
    '''Compute the hessian for squared log error.'''
    y = dtrain.get_label()
    return ((np.log1p(predt) - np.log1p(y)) /
            ((predt+1)*(predt-y)))

In [7]:
approx1 = evaluate(gradient_exact, hessian_approx1)

[0]	dtrain-MSLE:7.35528	dtest-MSLE:7.4356
[1]	dtrain-MSLE:7.35521	dtest-MSLE:7.43553
[2]	dtrain-MSLE:7.35514	dtest-MSLE:7.43546
[3]	dtrain-MSLE:7.35507	dtest-MSLE:7.43539
[4]	dtrain-MSLE:7.355	dtest-MSLE:7.43532
[5]	dtrain-MSLE:7.35493	dtest-MSLE:7.43525
[6]	dtrain-MSLE:7.35486	dtest-MSLE:7.43518
[7]	dtrain-MSLE:7.35479	dtest-MSLE:7.43511
[8]	dtrain-MSLE:7.35472	dtest-MSLE:7.43504
[9]	dtrain-MSLE:7.35465	dtest-MSLE:7.43497
[10]	dtrain-MSLE:7.35458	dtest-MSLE:7.4349
[11]	dtrain-MSLE:7.35451	dtest-MSLE:7.43483
[12]	dtrain-MSLE:7.35444	dtest-MSLE:7.43476
[13]	dtrain-MSLE:7.35437	dtest-MSLE:7.43469
[14]	dtrain-MSLE:7.3543	dtest-MSLE:7.43462
[15]	dtrain-MSLE:7.35423	dtest-MSLE:7.43454
[16]	dtrain-MSLE:7.35416	dtest-MSLE:7.43447
[17]	dtrain-MSLE:7.35409	dtest-MSLE:7.43441
[18]	dtrain-MSLE:7.35402	dtest-MSLE:7.43434
[19]	dtrain-MSLE:7.35395	dtest-MSLE:7.43426


# Quadratic Apprixmationm 2

In [8]:
def gradient_approx2(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:
    y = dtrain.get_label()
    return np.square(np.log1p(predt) - np.log1p(y)) / (predt -y)

def hessian_approx2(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:
    '''Compute the hessian for squared log error.'''
    y = dtrain.get_label()
    return (np.power(np.log1p(predt) - np.log1p(y), 2)/
            np.power(predt - y, 2))

In [10]:
approx2 = evaluate(gradient_approx2, hessian_approx2)

[0]	dtrain-MSLE:139.605	dtest-MSLE:139.927
[1]	dtrain-MSLE:130.307	dtest-MSLE:130.619
[2]	dtrain-MSLE:124.262	dtest-MSLE:124.567
[3]	dtrain-MSLE:119.8	dtest-MSLE:120.099
[4]	dtrain-MSLE:116.275	dtest-MSLE:116.57
[5]	dtrain-MSLE:113.369	dtest-MSLE:113.661
[6]	dtrain-MSLE:110.902	dtest-MSLE:111.191
[7]	dtrain-MSLE:108.762	dtest-MSLE:109.048
[8]	dtrain-MSLE:106.876	dtest-MSLE:107.159
[9]	dtrain-MSLE:105.19	dtest-MSLE:105.471
[10]	dtrain-MSLE:103.668	dtest-MSLE:103.947
[11]	dtrain-MSLE:102.281	dtest-MSLE:102.559
[12]	dtrain-MSLE:101.009	dtest-MSLE:101.285
[13]	dtrain-MSLE:99.8349	dtest-MSLE:100.109
[14]	dtrain-MSLE:98.7447	dtest-MSLE:99.0176
[15]	dtrain-MSLE:97.7278	dtest-MSLE:97.9994
[16]	dtrain-MSLE:96.7755	dtest-MSLE:97.0457
[17]	dtrain-MSLE:95.8802	dtest-MSLE:96.1493
[18]	dtrain-MSLE:95.0359	dtest-MSLE:95.3038
[19]	dtrain-MSLE:94.2372	dtest-MSLE:94.504
