# XGBoost (Xabier Etxezarreta Argarate)

## 1. Implementation using XGBClassifier

#### Import necessary libraries and the seed

In [1]:
from sklearn import datasets
from xgboost import XGBClassifier
from sklearn import metrics
from sklearn.model_selection import train_test_split

seed = 0

#### Load the iris dataset from sklearn, where X will be the features and Y the class to predict

In [2]:
iris = datasets.load_iris()
X = iris.data
y = iris.target

#### Split the data on train (75%) and test (25%)

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=seed)

#### Create an XGB classifier and fit with the training data

In [4]:
model = XGBClassifier().fit(X_train, y_train)
model

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0,
              learning_rate=0.1, max_delta_step=0, max_depth=3,
              min_child_weight=1, missing=None, n_estimators=100, n_jobs=1,
              nthread=None, objective='multi:softprob', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
              silent=None, subsample=1, verbosity=1)

#### After fitting the model, lets predict using the features (X) the test. After predicting, we are going to check the accuracy.

In [5]:
y_pred = model.predict(X_test)

metrics.accuracy_score(y_test, y_pred)

0.9736842105263158

# 2. Implementation using xgb library

#### Import necessary libraries and the seed

In [6]:
import numpy as np
from sklearn import datasets
import xgboost as xgb
from sklearn import metrics
from sklearn.model_selection import train_test_split

seed = 0

#### Load the iris dataset from sklearn, where X will be the features and Y the class to predict

In [7]:
iris = datasets.load_iris()
X = iris.data
y = iris.target

#### Split the data on train (75%) and test (25%)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=seed)

#### Segregation of data as train and test using DMatrix Data structure

In [9]:
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

#### Define the parameters to build the model using the training data

In [10]:
param = {
    'max_depth': 3,  # maximum depth of each tree
    'eta': 0.3,  # training step for each iteration
    'silent': 1,  # logging mode - quiet
    'objective': 'multi:softprob',  # error evaluation for multiclass training
    'num_class': 3 # the number of classes that exist in this datset
}  

num_round = 5  # the number of training iterations

#### Build the model with the training data and parameters defined before

In [11]:
model = xgb.train(param, dtrain, num_round)
model

<xgboost.core.Booster at 0x1cb10dcaa48>

####  After fitting the model, lets predict using the features (X) the test. After predicting, we are going to check the accuracy.

In [12]:
y_pred = model.predict(dtest)
y_best_preds = np.asarray([np.argmax(line) for line in y_pred])

metrics.accuracy_score(y_test, y_best_preds)

0.9736842105263158