In [13]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_absolute_error,mean_squared_error
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np

In [14]:
class GradientBoost:
    def __init__(self, max_depth=8, min_samples_split=5, min_samples_leaf=5, max_features=3, learning_rate=0.1, num_iter=50):
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.min_samples_leaf = min_samples_leaf
        self.max_features = max_features
        self.learning_rate = learning_rate
        self.num_iter = num_iter
        self.F0 = None
        self.trees = []

    def loss(y,y_pred):
        return 1/2 * 1/len(y) * np.sum(np.square(y - y_pred))

    def _gradient(self,y,y_pred):
        return np.array(y) - y_pred

    def _create_decision_tree(self,X,y):
        tree_regressor = DecisionTreeRegressor(max_depth=self.max_depth,
                                               min_samples_split=self.min_samples_split,
                                               min_samples_leaf=self.min_samples_leaf,
                                               max_features=self.max_features)
        tree_regressor.fit(X,y)
        return tree_regressor

    def fit(self,X,y):
        self.F0 = np.mean(y)
        pred = np.array([self.F0]* len(y)).reshape(-1,1)
        y = np.array(y).reshape(-1,1)
        for i in range(self.num_iter):
            grads = self._gradient(y, pred)
            base = self._create_decision_tree(X, grads)
            r = (base.predict(X)).reshape(len(X),1)
            pred += self.learning_rate * r
            self.trees.append(base)

    def predict(self,X):
        pred_0 = np.array([self.F0] * len(X))
        pred = pred_0.reshape(-1,1)
        for i in range(self.num_iter):
            temp = (self.trees[i].predict(X)).reshape(len(X),1)
            pred += self.learning_rate * temp
        return pred

In [15]:
df = pd.read_csv('../datasets/Advertising.csv')
X = df.drop(columns='sales')
y = df['sales']
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3, random_state=42)

# Using user-defined model

In [16]:
gg_mdl = GradientBoost()
gg_mdl.fit(X_train,y_train)
y_pred = gg_mdl.predict(X_test)
mae = mean_absolute_error(y_test,y_pred)
mse = mean_squared_error(y_test, y_pred)
print(f'Mean squared error: {round(mse,3)}')
print(f'Mean absolute error: {round(mae,3)}')

Mean squared error: 0.586
Mean absolute error: 0.599


# Using sklearn library 

In [17]:
from sklearn.ensemble import GradientBoostingRegressor

sk_mdl = GradientBoostingRegressor(max_depth=8, 
                                   min_samples_split=5, min_samples_leaf=5, max_features=3, 
                                   learning_rate=0.1, n_estimators=50)
sk_mdl.fit(X_train,y_train)
y_pred = sk_mdl.predict(X_test)

mae = mean_absolute_error(y_test,y_pred)
mse = mean_squared_error(y_test, y_pred)
print(f'Mean squared error: {round(mse,3)}')
print(f'Mean absolute error: {round(mae,3)}')

Mean squared error: 0.51
Mean absolute error: 0.573
