# Module 

In [9]:
import xgboost as xgb
import lightgbm as lgb
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, accuracy_score

# Config 

# Data 

In [10]:
iris = datasets.load_iris()
X = iris.data
y = iris.target
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2)

In [11]:
X_train.shape

(120, 4)

# xgboost 

## Data  

In [12]:
D_train = xgb.DMatrix(X_train, Y_train)
D_test = xgb.DMatrix(X_test, Y_test)

## Train 

In [13]:
param = {
    'eta': 0.3, 
    'max_depth': 3,  
    'objective': 'multi:softprob',  
    'num_class': 3} 

steps = 20  # The number of training iterations

In [14]:
model = xgb.train(param, D_train, steps)

## Eval 

In [22]:

preds = model.predict(D_test)
best_preds = np.asarray([np.argmax(line) for line in preds])

print("Precision = {}".format(precision_score(Y_test, best_preds, average='macro')))
print("Recall = {}".format(recall_score(Y_test, best_preds, average='macro')))
print("Accuracy = {}".format(accuracy_score(Y_test, best_preds)))

Precision = 0.9666666666666667
Recall = 0.9696969696969697
Accuracy = 0.9666666666666667


In [25]:
len(preds)

30

In [24]:
best_preds

array([2, 1, 2, 2, 0, 2, 0, 0, 0, 1, 1, 1, 2, 0, 1, 0, 1, 1, 0, 1, 1, 2,
       2, 0, 2, 2, 1, 2, 0, 0])

# lighgbm 

## Train 

In [28]:
Y_train

array([1, 1, 2, 0, 1, 1, 2, 2, 2, 2, 1, 2, 1, 1, 1, 2, 1, 1, 2, 0, 1, 0,
       0, 0, 2, 1, 0, 2, 0, 1, 0, 0, 2, 1, 2, 0, 2, 0, 2, 0, 0, 1, 2, 0,
       1, 2, 1, 0, 2, 2, 1, 1, 2, 2, 0, 0, 0, 0, 1, 2, 1, 0, 2, 2, 1, 0,
       2, 0, 1, 0, 2, 1, 2, 2, 2, 2, 1, 0, 1, 1, 2, 2, 0, 0, 0, 2, 1, 0,
       0, 1, 0, 0, 1, 0, 2, 2, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 2, 1, 2,
       0, 2, 0, 1, 0, 0, 1, 2, 2, 0])

In [33]:
# lgb params
params = {
    'objective':'multiclass',
          'metric':'precision',
    "num_class":3
         }
  
# create dataset for lightgbm
lgb_train = lgb.Dataset(X_train, Y_train)
lgb_test = lgb.Dataset(X_test, Y_test, reference=lgb_train)

lgb_model = lgb.train(params, lgb_train, valid_sets=[lgb_train, lgb_test], verbose_eval=10)
# predictions = lgb_model.predict(X_test)

[10]	training's multi_logloss: 0.415082	valid_1's multi_logloss: 0.4164
[20]	training's multi_logloss: 0.208811	valid_1's multi_logloss: 0.206257
[30]	training's multi_logloss: 0.123706	valid_1's multi_logloss: 0.145837
[40]	training's multi_logloss: 0.0838637	valid_1's multi_logloss: 0.141399
[50]	training's multi_logloss: 0.0616752	valid_1's multi_logloss: 0.157091
[60]	training's multi_logloss: 0.049105	valid_1's multi_logloss: 0.175548
[70]	training's multi_logloss: 0.0399579	valid_1's multi_logloss: 0.191304
[80]	training's multi_logloss: 0.032127	valid_1's multi_logloss: 0.205027
[90]	training's multi_logloss: 0.0265547	valid_1's multi_logloss: 0.215229
[100]	training's multi_logloss: 0.0223113	valid_1's multi_logloss: 0.228125


## Eval 

In [34]:
preds = lgb_model.predict(X_test)
best_preds = np.asarray([np.argmax(line) for line in preds])

print("Precision = {}".format(precision_score(Y_test, best_preds, average='macro')))
print("Recall = {}".format(recall_score(Y_test, best_preds, average='macro')))
print("Accuracy = {}".format(accuracy_score(Y_test, best_preds)))

Precision = 0.9166666666666666
Recall = 0.9090909090909092
Accuracy = 0.9
