# Boiler Plate

Following the schema at https://xgboost.readthedocs.io/en/latest/tutorials/custom_metric_obj.html

In [1]:
import numpy as np
import xgboost as xgb
from typing import Tuple
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
def msle(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, float]:
    ''' Mean squared log error metric.'''
    y = dtrain.get_label()
    predt[predt < -1] = -1 + 1e-6
    elements = np.power(np.log1p(y) - np.log1p(predt), 2)
    return 'MSLE', float(np.mean(elements))

def gradient_exact(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:
    '''Compute the gradient squared log error.'''
    y = dtrain.get_label()
    return (np.log1p(predt) - np.log1p(y)) / (predt + 1)


def evaluate(gradient, hessian):
    
    iteration = 0
    def squared_log(predt: np.ndarray,
                    dtrain: xgb.DMatrix) -> Tuple[np.ndarray, np.ndarray]:
        '''Squared Log Error objective. A simplified version for RMSLE used as
        objective function.
        '''
        predt[predt < -1] = -1 + 1e-6
        grad = gradient(predt, dtrain)
        hess = hessian(predt, dtrain)
        print("Grad norm: {:.2E} Hess norm: {:.2E}".format(np.linalg.norm(grad), np.linalg.norm(hess)))
        return grad, hess
    
    results = {}

    xgb.train({'tree_method': 'hist', 'seed': 1994,
               'disable_default_eval_metric': 1, 'eta': 0.3, 'lambda': 0.0},
              dtrain=dtrain,
              num_boost_round=20,
              obj=squared_log,
              feval=msle,
              evals=[(dtrain, 'dtrain'), (dtest, 'dtest')],
              evals_result=results)
    return results

In [3]:
X = pd.read_csv('data/housesalesprediction/kc_house_data.csv')

y = np.array(X['price'])

X.drop(columns=['id', 'date', 'price', 'zipcode', 'lat', 'long', 'sqft_living15',
       'sqft_lot15'], inplace=True) # the last once just to keep training faster

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

# Approximation using Taylor expension

In [4]:
def hessian_taylor(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:
    '''Compute the hessian for squared log error.'''
    y = dtrain.get_label()
    return ((-np.log1p(predt) + np.log1p(y) + 1) /
            np.power(predt + 1, 2))

In [5]:
taylor = evaluate(gradient_exact, hessian_taylor)

Grad norm: 1.11E+03 Hess norm: 7.98E+02
[0]	dtrain-MSLE:153.897	dtest-MSLE:154.235
Grad norm: 8.51E+02 Hess norm: 4.80E+02
[1]	dtrain-MSLE:147.884	dtest-MSLE:148.215
Grad norm: 6.53E+02 Hess norm: 2.88E+02
[2]	dtrain-MSLE:141.999	dtest-MSLE:142.323
Grad norm: 5.01E+02 Hess norm: 1.74E+02
[3]	dtrain-MSLE:136.241	dtest-MSLE:136.559
Grad norm: 3.84E+02 Hess norm: 1.04E+02
[4]	dtrain-MSLE:130.611	dtest-MSLE:130.922
Grad norm: 2.95E+02 Hess norm: 6.29E+01
[5]	dtrain-MSLE:125.108	dtest-MSLE:125.413
Grad norm: 2.26E+02 Hess norm: 3.79E+01
[6]	dtrain-MSLE:119.732	dtest-MSLE:120.03
Grad norm: 1.73E+02 Hess norm: 2.28E+01
[7]	dtrain-MSLE:114.483	dtest-MSLE:114.774
Grad norm: 1.33E+02 Hess norm: 1.38E+01
[8]	dtrain-MSLE:109.36	dtest-MSLE:109.645
Grad norm: 1.02E+02 Hess norm: 8.29E+00
[9]	dtrain-MSLE:104.363	dtest-MSLE:104.642
Grad norm: 7.82E+01 Hess norm: 5.00E+00
[10]	dtrain-MSLE:99.4928	dtest-MSLE:99.7644
Grad norm: 6.00E+01 Hess norm: 3.02E+00
[11]	dtrain-MSLE:94.7479	dtest-MSLE:95.013
Grad 

Grad norm: 1.47E-04 Hess norm: 6.51E-10
[97]	dtrain-MSLE:0.268004	dtest-MSLE:0.285105
Grad norm: 1.47E-04 Hess norm: 6.51E-10
[98]	dtrain-MSLE:0.268004	dtest-MSLE:0.285105
Grad norm: 1.47E-04 Hess norm: 6.51E-10
[99]	dtrain-MSLE:0.268004	dtest-MSLE:0.285105


# Quadratic Apprixmation 1

In [6]:
def hessian_approx1(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:
    '''Compute the hessian for squared log error.'''
    y = dtrain.get_label()
    return 2*((np.log1p(predt) - np.log1p(y)) /
            ((predt+1)*(predt-y)))

In [7]:
approx1 = evaluate(gradient_exact, hessian_approx1)

Grad norm: 1.11E+03 Hess norm: 5.91E-03
[0]	dtrain-MSLE:4.30146	dtest-MSLE:4.36443
Grad norm: 4.38E-03 Hess norm: 2.34E-08
[1]	dtrain-MSLE:2.10803	dtest-MSLE:2.15369
Grad norm: 1.60E-03 Hess norm: 9.42E-09
[2]	dtrain-MSLE:1.28958	dtest-MSLE:1.32605
Grad norm: 8.83E-04 Hess norm: 5.75E-09
[3]	dtrain-MSLE:0.887646	dtest-MSLE:0.918163
Grad norm: 5.85E-04 Hess norm: 4.15E-09
[4]	dtrain-MSLE:0.663936	dtest-MSLE:0.690234
Grad norm: 4.32E-04 Hess norm: 3.29E-09
[5]	dtrain-MSLE:0.530095	dtest-MSLE:0.553247
Grad norm: 3.43E-04 Hess norm: 2.76E-09
[6]	dtrain-MSLE:0.446183	dtest-MSLE:0.466911
Grad norm: 2.87E-04 Hess norm: 2.41E-09
[7]	dtrain-MSLE:0.391831	dtest-MSLE:0.410649
Grad norm: 2.50E-04 Hess norm: 2.16E-09
[8]	dtrain-MSLE:0.355781	dtest-MSLE:0.373068
Grad norm: 2.25E-04 Hess norm: 1.98E-09
[9]	dtrain-MSLE:0.331437	dtest-MSLE:0.347483
Grad norm: 2.07E-04 Hess norm: 1.85E-09
[10]	dtrain-MSLE:0.314769	dtest-MSLE:0.329799
Grad norm: 1.94E-04 Hess norm: 1.75E-09
[11]	dtrain-MSLE:0.303232	dtes

Grad norm: 1.49E-04 Hess norm: 1.29E-09
[96]	dtrain-MSLE:0.275435	dtest-MSLE:0.285207
Grad norm: 1.49E-04 Hess norm: 1.29E-09
[97]	dtrain-MSLE:0.275435	dtest-MSLE:0.285207
Grad norm: 1.49E-04 Hess norm: 1.29E-09
[98]	dtrain-MSLE:0.275435	dtest-MSLE:0.285207
Grad norm: 1.49E-04 Hess norm: 1.29E-09
[99]	dtrain-MSLE:0.275435	dtest-MSLE:0.285207


# Quadratic Apprixmationm 2

In [8]:
def gradient_approx2(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:
    y = dtrain.get_label()
    return np.square(np.log1p(predt) - np.log1p(y)) / (predt -y)

def hessian_approx2(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:
    '''Compute the hessian for squared log error.'''
    y = dtrain.get_label()
    return 2*(np.power(np.log1p(predt) - np.log1p(y), 2)/
            np.power(predt - y, 2))

In [9]:
approx2 = evaluate(gradient_approx2, hessian_approx2)

Grad norm: 5.41E-02 Hess norm: 4.39E-07
[0]	dtrain-MSLE:5.23673	dtest-MSLE:5.30556
Grad norm: 1.54E-03 Hess norm: 1.14E-08
[1]	dtrain-MSLE:2.66142	dtest-MSLE:2.71214
Grad norm: 8.10E-04 Hess norm: 6.65E-09
[2]	dtrain-MSLE:1.67828	dtest-MSLE:1.71946
Grad norm: 5.31E-04 Hess norm: 4.87E-09
[3]	dtrain-MSLE:1.18058	dtest-MSLE:1.21558
Grad norm: 3.89E-04 Hess norm: 3.94E-09
[4]	dtrain-MSLE:0.893675	dtest-MSLE:0.924294
Grad norm: 3.07E-04 Hess norm: 3.37E-09
[5]	dtrain-MSLE:0.715103	dtest-MSLE:0.742455
Grad norm: 2.56E-04 Hess norm: 2.99E-09
[6]	dtrain-MSLE:0.598095	dtest-MSLE:0.622928
Grad norm: 2.23E-04 Hess norm: 2.73E-09
[7]	dtrain-MSLE:0.518501	dtest-MSLE:0.541344
Grad norm: 2.02E-04 Hess norm: 2.53E-09
[8]	dtrain-MSLE:0.46277	dtest-MSLE:0.484017
Grad norm: 1.88E-04 Hess norm: 2.38E-09
[9]	dtrain-MSLE:0.422824	dtest-MSLE:0.442775
Grad norm: 1.78E-04 Hess norm: 2.27E-09
[10]	dtrain-MSLE:0.393628	dtest-MSLE:0.412515
Grad norm: 1.72E-04 Hess norm: 2.18E-09
[11]	dtrain-MSLE:0.371927	dtest-M

Grad norm: 1.57E-04 Hess norm: 1.75E-09
[96]	dtrain-MSLE:0.293056	dtest-MSLE:0.306347
Grad norm: 1.57E-04 Hess norm: 1.75E-09
[97]	dtrain-MSLE:0.293056	dtest-MSLE:0.306347
Grad norm: 1.57E-04 Hess norm: 1.75E-09
[98]	dtrain-MSLE:0.293056	dtest-MSLE:0.306347
Grad norm: 1.57E-04 Hess norm: 1.75E-09
[99]	dtrain-MSLE:0.293056	dtest-MSLE:0.306347
