# 1. load iris dataset

In [2]:
from sklearn import datasets

iris = datasets.load_iris()
x_data = iris.data
y_data = iris.target

In [6]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.2, random_state=0)

# 2. construct data to xgb data format

In [8]:
import xgboost as xgb

dtrain = xgb.DMatrix(x_train, label=y_train)
dtest = xgb.DMatrix(x_test, label=y_test)

# 3. train xgb

In [9]:
param = {
    'max_depth': 3,  # the maximum depth of each tree
    'eta': 0.3,  # the training step for each iteration
    'silent': 1,  # logging mode - quiet
    'objective': 'multi:softprob',  # error evaluation for multiclass training
    'num_class': 3}  # the number of classes that exist in this datset
num_round = 20  # the number of training iterations

In [10]:
bst = xgb.train(param, dtrain, num_round)

# 4. model evaluation

In [11]:
preds = bst.predict(dtest)

In [12]:
preds

array([[0.00580842, 0.03016039, 0.96403116],
       [0.00688668, 0.9835607 , 0.00955265],
       [0.979875  , 0.01659266, 0.00353238],
       [0.00370642, 0.00654464, 0.9897489 ],
       [0.9905529 , 0.0058763 , 0.00357088],
       [0.00448035, 0.00673794, 0.98878163],
       [0.9905529 , 0.0058763 , 0.00357088],
       [0.00441976, 0.9853099 , 0.01027041],
       [0.00444062, 0.9899602 , 0.00559916],
       [0.0062554 , 0.9858571 , 0.00788741],
       [0.0587752 , 0.5120683 , 0.42915654],
       [0.00442203, 0.985816  , 0.00976201],
       [0.0062554 , 0.9858571 , 0.00788741],
       [0.0043267 , 0.9645649 , 0.03110838],
       [0.0062554 , 0.9858571 , 0.00788741],
       [0.99096584, 0.00546184, 0.00357237],
       [0.006142  , 0.9679853 , 0.0258727 ],
       [0.00518121, 0.98828584, 0.00653297],
       [0.99045813, 0.00545904, 0.00408281],
       [0.9844855 , 0.01196551, 0.003549  ],
       [0.00501135, 0.06578495, 0.92920375],
       [0.00510317, 0.97340006, 0.02149671],
       [0.

In [13]:
import numpy as np
best_preds = np.asarray([np.argmax(line) for line in preds])

In [14]:
best_preds

array([2, 1, 0, 2, 0, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 2, 1,
       0, 0, 2, 0, 0, 1, 1, 0])

In [16]:
from sklearn.metrics import precision_score

print( precision_score(y_test, best_preds, average='macro') )

0.9761904761904763
