# Split

In [1]:
import pandas as pd
import os
from sklearn.metrics import classification_report, confusion_matrix 
from sklearn.linear_model import LinearRegression

os.sys.path.append('./..')

from hyperopt.pyll import scope
from utils.loadgenius import DataLoader
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier
import xgboost as xgb
from sklearn.metrics import precision_score
import mlflow
from utils.pretty import pprint
from functools import wraps
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials


In [2]:
numericals = ["n_months", "n_origins", "total_teu",
             "avg_teu", "n_goods", "n_ports", "n_states",
             "biggest_gap", "smallest_gap", "n_zipcodes", "n_istates"]

categoricals = ["major_origin", "major_goods", "major_port_state"]

target = 'importer_rank_cate'

df = DataLoader("../catalog.yml").load('ml_dataset')
pprint(f"Retrieved dataset: {df.shape}")


Retrieved dataset: (20674, 16)

In [3]:
# Split data into train and validation sets (80-20, stratified)
def split_data(X, y):
    X_train, X_val, y_train, y_val = train_test_split(
        X, 
        y,
        test_size=0.2,
        stratify=y,
        random_state=42
    )
    return X_train, X_val, y_train, y_val


In [4]:
# # OneHot encoding
# X = pd.get_dummies(
#     df.drop(target, axis=1),
#     columns=categoricals
# )
# y = df[target]


# X_train, X_val, y_train, y_val = split_data(X, y)
# dtrain = xgb.DMatrix(X_train, label=y_train)
# dval = xgb.DMatrix(X_val, label=y_val)

In [6]:
# Objective function decorator
def objective(func):
    @wraps(func)
    def wrapper(params):
        
        with mlflow.start_run():
            y_val_pred = func(params)
            # Calculate precision
            precision = precision_score(y_val, y_val_pred, average='weighted')

        return {'loss': 1 - precision, 'status': STATUS_OK}



    return wrapper

# Define the objective function for Gradient Boosting

@objective
def gradient_boosting_objective(params):
    
    mlflow.set_tag("model", "gb")
    mlflow.log_params(params)

    # Initialize and train the Gradient Boosting classifier
    clf = GradientBoostingClassifier(**params, random_state=0)
    clf.fit(X_train, y_train)
    
    return clf.predict(X_val)        


# Define the objective function for XGBoost


@objective
def xgb_objective(params):
    
    mlflow.set_tag("model", "xgb")
    mlflow.log_params(params)
    
    booster = xgb.train(
        params=params,
        dtrain=dtrain,
        num_boost_round=100,
        evals=[(dval, 'validation')],
        early_stopping_rounds=50
    )

    return booster.predict(dval)        

@objective
def lr_objective(params):
    mlflow.set_tag("model", "lr")
    mlflow.log_params(params)

    # Initialize and train the Softmax (Logistic Regression) classifier
    lr = LogisticRegression(
        multi_class='multinomial',
        **params
    )
    lr.fit(X_train, y_train)

    return lr.predict(X_val)

@objective
def dt_objective(params):
    mlflow.set_tag("model", "dt")
    mlflow.log_params(params)

    # Initialize and train the Decision Tree classifier
    clf = DecisionTreeClassifier(**params, random_state=0)
    clf.fit(X_train, y_train)

    return clf.predict(X_val)


In [25]:
# Define hyperparameter space for hyperopt
xgb_space = {
    'objective': 'multi:softmax',
    'learning_rate': hp.loguniform('learning_rate', -3, -1),
    'max_depth': scope.int(hp.quniform('max_depth', 2, 5, 1)),
    'min_child_weight': hp.loguniform('min_child_weight', -1, 3),
    'subsample': hp.uniform('subsample', 0.5, .9),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1),
    'num_class': 4
    }

gb_space = {
    'learning_rate': hp.loguniform('learning_rate', -4, -1),
    'max_depth': scope.int(hp.quniform('max_depth', 1, 2, 1)),
    'subsample': hp.uniform('subsample', 0.5, 1),
    'n_estimators': scope.int(hp.quniform('n_estimators', 10, 30, 1)),
    }

# Define hyperparameter space for hyperopt
lr_space = {
    'n_jobs': -1,
    'penalty': "l2",
    'max_iter': int(1e8)
}


# Define the search space for Decision Tree
dt_space = {
    'criterion': hp.choice('criterion', ['gini', 'entropy']),
    'max_depth': scope.int(hp.uniform('max_depth', 7, 15)),
    'min_samples_split': hp.loguniform('min_samples_split', -4, -1),
    'class_weight': 'balanced'
    }

In [8]:
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("importer-class-experiment")



2023/11/22 23:19:48 INFO mlflow.tracking.fluent: Experiment with name 'importer-class-experiment' does not exist. Creating a new experiment.


<Experiment: artifact_location='/home/tunglinux/units/python_practice/impgenius/notebooks/mlruns/1', creation_time=1700669988140, experiment_id='1', last_update_time=1700669988140, lifecycle_stage='active', name='importer-class-experiment', tags={}>

# With numeric features only

In [18]:
X = df[numericals]
y = df[target]


X_train, X_val, y_train, y_val = split_data(X, y)
dtrain = xgb.DMatrix(X_train, label=y_train)
dval = xgb.DMatrix(X_val, label=y_val)

y_train = y_train.ravel()
y_val = y_val.ravel()

In [10]:
# Model 1: xgb
xgb_best_result = fmin(
    fn=xgb_objective,
    space=xgb_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=Trials()
)


[0]	validation-mlogloss:1.36032                                                  
[1]	validation-mlogloss:1.34125                                                  
[2]	validation-mlogloss:1.32857                                                  
[3]	validation-mlogloss:1.31787                                                  
[4]	validation-mlogloss:1.30969                                                  
[5]	validation-mlogloss:1.30357                                                  
[6]	validation-mlogloss:1.29932                                                  
[7]	validation-mlogloss:1.29559                                                  
[8]	validation-mlogloss:1.29312                                                  
[9]	validation-mlogloss:1.29098                                                  
[10]	validation-mlogloss:1.28915                                                 
[11]	validation-mlogloss:1.28767                                                 
[12]	validation-

[22]	validation-mlogloss:1.28476                                                 
[23]	validation-mlogloss:1.28477                                                 
[24]	validation-mlogloss:1.28482                                                 
[25]	validation-mlogloss:1.28470                                                 
[26]	validation-mlogloss:1.28468                                                 
[27]	validation-mlogloss:1.28475                                                 
[28]	validation-mlogloss:1.28479                                                 
[29]	validation-mlogloss:1.28425                                                 
[30]	validation-mlogloss:1.28442                                                 
[31]	validation-mlogloss:1.28446                                                 
[32]	validation-mlogloss:1.28433                                                 
[33]	validation-mlogloss:1.28411                                                 
[34]	validation-

[27]	validation-mlogloss:1.28837                                                 
[28]	validation-mlogloss:1.28818                                                 
[29]	validation-mlogloss:1.28801                                                 
[30]	validation-mlogloss:1.28781                                                 
[31]	validation-mlogloss:1.28758                                                 
[32]	validation-mlogloss:1.28730                                                 
[33]	validation-mlogloss:1.28715                                                 
[34]	validation-mlogloss:1.28704                                                 
[35]	validation-mlogloss:1.28688                                                 
[36]	validation-mlogloss:1.28683                                                 
[37]	validation-mlogloss:1.28655                                                 
[38]	validation-mlogloss:1.28640                                                 
[39]	validation-

[26]	validation-mlogloss:1.28366                                                 
[27]	validation-mlogloss:1.28393                                                 
[28]	validation-mlogloss:1.28412                                                 
[29]	validation-mlogloss:1.28412                                                 
[30]	validation-mlogloss:1.28409                                                 
[31]	validation-mlogloss:1.28373                                                 
[32]	validation-mlogloss:1.28389                                                 
[33]	validation-mlogloss:1.28361                                                 
[34]	validation-mlogloss:1.28375                                                 
[35]	validation-mlogloss:1.28394                                                 
[36]	validation-mlogloss:1.28383                                                 
[37]	validation-mlogloss:1.28380                                                 
[38]	validation-

[34]	validation-mlogloss:1.28349                                                 
[35]	validation-mlogloss:1.28359                                                 
[36]	validation-mlogloss:1.28331                                                 
[37]	validation-mlogloss:1.28321                                                 
[38]	validation-mlogloss:1.28335                                                 
[39]	validation-mlogloss:1.28312                                                 
[40]	validation-mlogloss:1.28284                                                 
[41]	validation-mlogloss:1.28280                                                 
[42]	validation-mlogloss:1.28266                                                 
[43]	validation-mlogloss:1.28266                                                 
[44]	validation-mlogloss:1.28245                                                 
[45]	validation-mlogloss:1.28247                                                 
[46]	validation-

[39]	validation-mlogloss:1.28450                                                 
[40]	validation-mlogloss:1.28453                                                 
[41]	validation-mlogloss:1.28458                                                 
[42]	validation-mlogloss:1.28466                                                 
[43]	validation-mlogloss:1.28472                                                 
[44]	validation-mlogloss:1.28490                                                 
[45]	validation-mlogloss:1.28527                                                 
[46]	validation-mlogloss:1.28486                                                 
[47]	validation-mlogloss:1.28490                                                 
[48]	validation-mlogloss:1.28505                                                 
[49]	validation-mlogloss:1.28492                                                 
[50]	validation-mlogloss:1.28464                                                 
[51]	validation-

[49]	validation-mlogloss:1.28536                                                 
[50]	validation-mlogloss:1.28533                                                 
[51]	validation-mlogloss:1.28519                                                 
[52]	validation-mlogloss:1.28494                                                 
[53]	validation-mlogloss:1.28477                                                 
[54]	validation-mlogloss:1.28449                                                 
[55]	validation-mlogloss:1.28427                                                 
[56]	validation-mlogloss:1.28421                                                 
[57]	validation-mlogloss:1.28402                                                 
[58]	validation-mlogloss:1.28389                                                 
[59]	validation-mlogloss:1.28377                                                 
[60]	validation-mlogloss:1.28368                                                 
[61]	validation-

[48]	validation-mlogloss:1.28409                                                 
[49]	validation-mlogloss:1.28396                                                 
[50]	validation-mlogloss:1.28388                                                 
[51]	validation-mlogloss:1.28381                                                 
[52]	validation-mlogloss:1.28394                                                 
[53]	validation-mlogloss:1.28390                                                 
[54]	validation-mlogloss:1.28391                                                 
[55]	validation-mlogloss:1.28394                                                 
[56]	validation-mlogloss:1.28399                                                 
[57]	validation-mlogloss:1.28388                                                 
[58]	validation-mlogloss:1.28406                                                 
[59]	validation-mlogloss:1.28416                                                 
[60]	validation-

[60]	validation-mlogloss:1.29361                                                 
[61]	validation-mlogloss:1.29348                                                 
[62]	validation-mlogloss:1.29416                                                 
[0]	validation-mlogloss:1.37985                                                  
[1]	validation-mlogloss:1.37335                                                  
[2]	validation-mlogloss:1.36760                                                  
[3]	validation-mlogloss:1.36181                                                  
[4]	validation-mlogloss:1.35693                                                  
[5]	validation-mlogloss:1.35198                                                  
[6]	validation-mlogloss:1.34790                                                  
[7]	validation-mlogloss:1.34397                                                  
[8]	validation-mlogloss:1.34030                                                  
[9]	validation-m

[96]	validation-mlogloss:1.28455                                                 
[97]	validation-mlogloss:1.28450                                                 
[98]	validation-mlogloss:1.28449                                                 
[99]	validation-mlogloss:1.28450                                                 
[0]	validation-mlogloss:1.37241                                                  
[1]	validation-mlogloss:1.35985                                                  
[2]	validation-mlogloss:1.34919                                                  
[3]	validation-mlogloss:1.33959                                                  
[4]	validation-mlogloss:1.33182                                                  
[5]	validation-mlogloss:1.32484                                                  
[6]	validation-mlogloss:1.31990                                                  
[7]	validation-mlogloss:1.31525                                                  
[8]	validation-m

[95]	validation-mlogloss:1.28278                                                 
[96]	validation-mlogloss:1.28262                                                 
[97]	validation-mlogloss:1.28251                                                 
[98]	validation-mlogloss:1.28228                                                 
[99]	validation-mlogloss:1.28242                                                 
[0]	validation-mlogloss:1.35441                                                  
[1]	validation-mlogloss:1.33415                                                  
[2]	validation-mlogloss:1.32068                                                  
[3]	validation-mlogloss:1.31049                                                  
[4]	validation-mlogloss:1.30305                                                  
[5]	validation-mlogloss:1.29828                                                  
[6]	validation-mlogloss:1.29484                                                  
[7]	validation-m

[18]	validation-mlogloss:1.28622                                                 
[19]	validation-mlogloss:1.28606                                                 
[20]	validation-mlogloss:1.28588                                                 
[21]	validation-mlogloss:1.28549                                                 
[22]	validation-mlogloss:1.28531                                                 
[23]	validation-mlogloss:1.28529                                                 
[24]	validation-mlogloss:1.28512                                                 
[25]	validation-mlogloss:1.28490                                                 
[26]	validation-mlogloss:1.28476                                                 
[27]	validation-mlogloss:1.28465                                                 
[28]	validation-mlogloss:1.28457                                                 
[29]	validation-mlogloss:1.28436                                                 
[30]	validation-

[26]	validation-mlogloss:1.28539                                                 
[27]	validation-mlogloss:1.28521                                                 
[28]	validation-mlogloss:1.28497                                                 
[29]	validation-mlogloss:1.28467                                                 
[30]	validation-mlogloss:1.28440                                                 
[31]	validation-mlogloss:1.28458                                                 
[32]	validation-mlogloss:1.28467                                                 
[33]	validation-mlogloss:1.28461                                                 
[34]	validation-mlogloss:1.28441                                                 
[35]	validation-mlogloss:1.28462                                                 
[36]	validation-mlogloss:1.28450                                                 
[37]	validation-mlogloss:1.28444                                                 
[38]	validation-

[44]	validation-mlogloss:1.28594                                                 
[45]	validation-mlogloss:1.28583                                                 
[46]	validation-mlogloss:1.28563                                                 
[47]	validation-mlogloss:1.28541                                                 
[48]	validation-mlogloss:1.28533                                                 
[49]	validation-mlogloss:1.28511                                                 
[50]	validation-mlogloss:1.28509                                                 
[51]	validation-mlogloss:1.28491                                                 
[52]	validation-mlogloss:1.28475                                                 
[53]	validation-mlogloss:1.28480                                                 
[54]	validation-mlogloss:1.28462                                                 
[55]	validation-mlogloss:1.28450                                                 
[56]	validation-

[43]	validation-mlogloss:1.28450                                                 
[44]	validation-mlogloss:1.28435                                                 
[45]	validation-mlogloss:1.28462                                                 
[46]	validation-mlogloss:1.28491                                                 
[47]	validation-mlogloss:1.28500                                                 
[48]	validation-mlogloss:1.28529                                                 
[49]	validation-mlogloss:1.28569                                                 
[50]	validation-mlogloss:1.28563                                                 
[51]	validation-mlogloss:1.28581                                                 
[52]	validation-mlogloss:1.28542                                                 
[53]	validation-mlogloss:1.28566                                                 
[54]	validation-mlogloss:1.28583                                                 
[55]	validation-

[66]	validation-mlogloss:1.28519                                                 
[67]	validation-mlogloss:1.28529                                                 
[68]	validation-mlogloss:1.28541                                                 
[69]	validation-mlogloss:1.28543                                                 
[70]	validation-mlogloss:1.28526                                                 
[71]	validation-mlogloss:1.28525                                                 
[72]	validation-mlogloss:1.28532                                                 
[73]	validation-mlogloss:1.28531                                                 
[74]	validation-mlogloss:1.28542                                                 
[75]	validation-mlogloss:1.28543                                                 
[76]	validation-mlogloss:1.28521                                                 
[77]	validation-mlogloss:1.28535                                                 
[78]	validation-

[65]	validation-mlogloss:1.28812                                                 
[66]	validation-mlogloss:1.28788                                                 
[67]	validation-mlogloss:1.28762                                                 
[68]	validation-mlogloss:1.28754                                                 
[69]	validation-mlogloss:1.28740                                                 
[70]	validation-mlogloss:1.28723                                                 
[71]	validation-mlogloss:1.28702                                                 
[72]	validation-mlogloss:1.28692                                                 
[73]	validation-mlogloss:1.28688                                                 
[74]	validation-mlogloss:1.28676                                                 
[75]	validation-mlogloss:1.28664                                                 
[76]	validation-mlogloss:1.28647                                                 
[77]	validation-

[64]	validation-mlogloss:1.28633                                                 
[65]	validation-mlogloss:1.28702                                                 
[66]	validation-mlogloss:1.28737                                                 
[67]	validation-mlogloss:1.28750                                                 
[68]	validation-mlogloss:1.28797                                                 
[69]	validation-mlogloss:1.28795                                                 
[70]	validation-mlogloss:1.28785                                                 
[71]	validation-mlogloss:1.28789                                                 
[72]	validation-mlogloss:1.28784                                                 
[73]	validation-mlogloss:1.28835                                                 
[74]	validation-mlogloss:1.28819                                                 
[75]	validation-mlogloss:1.28816                                                 
[76]	validation-

[77]	validation-mlogloss:1.28418                                                 
[78]	validation-mlogloss:1.28411                                                 
[79]	validation-mlogloss:1.28411                                                 
[80]	validation-mlogloss:1.28416                                                 
[81]	validation-mlogloss:1.28409                                                 
[82]	validation-mlogloss:1.28402                                                 
[83]	validation-mlogloss:1.28393                                                 
[84]	validation-mlogloss:1.28393                                                 
[85]	validation-mlogloss:1.28390                                                 
[86]	validation-mlogloss:1.28383                                                 
[87]	validation-mlogloss:1.28403                                                 
[88]	validation-mlogloss:1.28408                                                 
[89]	validation-

[76]	validation-mlogloss:1.28587                                                 
[77]	validation-mlogloss:1.28587                                                 
[78]	validation-mlogloss:1.28577                                                 
[79]	validation-mlogloss:1.28574                                                 
[80]	validation-mlogloss:1.28568                                                 
[81]	validation-mlogloss:1.28557                                                 
[82]	validation-mlogloss:1.28552                                                 
[83]	validation-mlogloss:1.28542                                                 
[84]	validation-mlogloss:1.28544                                                 
[85]	validation-mlogloss:1.28533                                                 
[86]	validation-mlogloss:1.28537                                                 
[87]	validation-mlogloss:1.28537                                                 
[88]	validation-

[75]	validation-mlogloss:1.29059                                                 
[76]	validation-mlogloss:1.29050                                                 
[77]	validation-mlogloss:1.29031                                                 
[78]	validation-mlogloss:1.29020                                                 
[79]	validation-mlogloss:1.29005                                                 
[80]	validation-mlogloss:1.28990                                                 
[81]	validation-mlogloss:1.28977                                                 
[82]	validation-mlogloss:1.28968                                                 
[83]	validation-mlogloss:1.28961                                                 
[84]	validation-mlogloss:1.28952                                                 
[85]	validation-mlogloss:1.28942                                                 
[86]	validation-mlogloss:1.28937                                                 
[87]	validation-

[74]	validation-mlogloss:1.29059                                                 
[75]	validation-mlogloss:1.29044                                                 
[76]	validation-mlogloss:1.29034                                                 
[77]	validation-mlogloss:1.29017                                                 
[78]	validation-mlogloss:1.29004                                                 
[79]	validation-mlogloss:1.28989                                                 
[80]	validation-mlogloss:1.28976                                                 
[81]	validation-mlogloss:1.28961                                                 
[82]	validation-mlogloss:1.28953                                                 
[83]	validation-mlogloss:1.28943                                                 
[84]	validation-mlogloss:1.28934                                                 
[85]	validation-mlogloss:1.28926                                                 
[86]	validation-

[73]	validation-mlogloss:1.28768                                                 
[74]	validation-mlogloss:1.28753                                                 
[75]	validation-mlogloss:1.28749                                                 
[76]	validation-mlogloss:1.28747                                                 
[77]	validation-mlogloss:1.28744                                                 
[78]	validation-mlogloss:1.28742                                                 
[79]	validation-mlogloss:1.28735                                                 
[80]	validation-mlogloss:1.28722                                                 
[81]	validation-mlogloss:1.28707                                                 
[82]	validation-mlogloss:1.28700                                                 
[83]	validation-mlogloss:1.28691                                                 
[84]	validation-mlogloss:1.28686                                                 
[85]	validation-

[72]	validation-mlogloss:1.28837                                                 
[73]	validation-mlogloss:1.28831                                                 
[74]	validation-mlogloss:1.28817                                                 
[75]	validation-mlogloss:1.28813                                                 
[76]	validation-mlogloss:1.28808                                                 
[77]	validation-mlogloss:1.28803                                                 
[78]	validation-mlogloss:1.28800                                                 
[79]	validation-mlogloss:1.28788                                                 
[80]	validation-mlogloss:1.28778                                                 
[81]	validation-mlogloss:1.28761                                                 
[82]	validation-mlogloss:1.28755                                                 
[83]	validation-mlogloss:1.28744                                                 
[84]	validation-

[71]	validation-mlogloss:1.28830                                                 
[72]	validation-mlogloss:1.28830                                                 
[73]	validation-mlogloss:1.28823                                                 
[74]	validation-mlogloss:1.28811                                                 
[75]	validation-mlogloss:1.28804                                                 
[76]	validation-mlogloss:1.28796                                                 
[77]	validation-mlogloss:1.28779                                                 
[78]	validation-mlogloss:1.28773                                                 
[79]	validation-mlogloss:1.28767                                                 
[80]	validation-mlogloss:1.28759                                                 
[81]	validation-mlogloss:1.28743                                                 
[82]	validation-mlogloss:1.28733                                                 
[83]	validation-

[70]	validation-mlogloss:1.28789                                                 
[71]	validation-mlogloss:1.28788                                                 
[72]	validation-mlogloss:1.28788                                                 
[73]	validation-mlogloss:1.28786                                                 
[74]	validation-mlogloss:1.28773                                                 
[75]	validation-mlogloss:1.28763                                                 
[76]	validation-mlogloss:1.28753                                                 
[77]	validation-mlogloss:1.28740                                                 
[78]	validation-mlogloss:1.28742                                                 
[79]	validation-mlogloss:1.28731                                                 
[80]	validation-mlogloss:1.28722                                                 
[81]	validation-mlogloss:1.28713                                                 
[82]	validation-

[69]	validation-mlogloss:1.28671                                                 
[70]	validation-mlogloss:1.28660                                                 
[71]	validation-mlogloss:1.28663                                                 
[72]	validation-mlogloss:1.28661                                                 
[73]	validation-mlogloss:1.28656                                                 
[74]	validation-mlogloss:1.28650                                                 
[75]	validation-mlogloss:1.28641                                                 
[76]	validation-mlogloss:1.28640                                                 
[77]	validation-mlogloss:1.28645                                                 
[78]	validation-mlogloss:1.28644                                                 
[79]	validation-mlogloss:1.28645                                                 
[80]	validation-mlogloss:1.28643                                                 
[81]	validation-

[68]	validation-mlogloss:1.28935                                                 
[69]	validation-mlogloss:1.28924                                                 
[70]	validation-mlogloss:1.28908                                                 
[71]	validation-mlogloss:1.28908                                                 
[72]	validation-mlogloss:1.28898                                                 
[73]	validation-mlogloss:1.28891                                                 
[74]	validation-mlogloss:1.28880                                                 
[75]	validation-mlogloss:1.28873                                                 
[76]	validation-mlogloss:1.28866                                                 
[77]	validation-mlogloss:1.28857                                                 
[78]	validation-mlogloss:1.28852                                                 
[79]	validation-mlogloss:1.28849                                                 
[80]	validation-

[67]	validation-mlogloss:1.28655                                                 
[68]	validation-mlogloss:1.28649                                                 
[69]	validation-mlogloss:1.28646                                                 
[70]	validation-mlogloss:1.28626                                                 
[71]	validation-mlogloss:1.28618                                                 
[72]	validation-mlogloss:1.28620                                                 
[73]	validation-mlogloss:1.28615                                                 
[74]	validation-mlogloss:1.28606                                                 
[75]	validation-mlogloss:1.28592                                                 
[76]	validation-mlogloss:1.28585                                                 
[77]	validation-mlogloss:1.28585                                                 
[78]	validation-mlogloss:1.28579                                                 
[79]	validation-

[66]	validation-mlogloss:1.28566                                                 
[67]	validation-mlogloss:1.28559                                                 
[68]	validation-mlogloss:1.28546                                                 
[69]	validation-mlogloss:1.28535                                                 
[70]	validation-mlogloss:1.28525                                                 
[71]	validation-mlogloss:1.28523                                                 
[72]	validation-mlogloss:1.28525                                                 
[73]	validation-mlogloss:1.28523                                                 
[74]	validation-mlogloss:1.28514                                                 
[75]	validation-mlogloss:1.28507                                                 
[76]	validation-mlogloss:1.28512                                                 
[77]	validation-mlogloss:1.28513                                                 
[78]	validation-

[65]	validation-mlogloss:1.28724                                                 
[66]	validation-mlogloss:1.28717                                                 
[67]	validation-mlogloss:1.28707                                                 
[68]	validation-mlogloss:1.28701                                                 
[69]	validation-mlogloss:1.28694                                                 
[70]	validation-mlogloss:1.28679                                                 
[71]	validation-mlogloss:1.28672                                                 
[72]	validation-mlogloss:1.28669                                                 
[73]	validation-mlogloss:1.28664                                                 
[74]	validation-mlogloss:1.28659                                                 
[75]	validation-mlogloss:1.28653                                                 
[76]	validation-mlogloss:1.28644                                                 
[77]	validation-

[64]	validation-mlogloss:1.28624                                                 
[65]	validation-mlogloss:1.28612                                                 
[66]	validation-mlogloss:1.28610                                                 
[67]	validation-mlogloss:1.28602                                                 
[68]	validation-mlogloss:1.28593                                                 
[69]	validation-mlogloss:1.28586                                                 
[70]	validation-mlogloss:1.28574                                                 
[71]	validation-mlogloss:1.28553                                                 
[72]	validation-mlogloss:1.28557                                                 
[73]	validation-mlogloss:1.28553                                                 
[74]	validation-mlogloss:1.28537                                                 
[75]	validation-mlogloss:1.28527                                                 
[76]	validation-

[63]	validation-mlogloss:1.29102                                                 
[64]	validation-mlogloss:1.29084                                                 
[65]	validation-mlogloss:1.29068                                                 
[66]	validation-mlogloss:1.29054                                                 
[67]	validation-mlogloss:1.29039                                                 
[68]	validation-mlogloss:1.29026                                                 
[69]	validation-mlogloss:1.29013                                                 
[70]	validation-mlogloss:1.28996                                                 
[71]	validation-mlogloss:1.28982                                                 
[72]	validation-mlogloss:1.28978                                                 
[73]	validation-mlogloss:1.28968                                                 
[74]	validation-mlogloss:1.28957                                                 
[75]	validation-

[62]	validation-mlogloss:1.28367                                                 
[63]	validation-mlogloss:1.28368                                                 
[64]	validation-mlogloss:1.28384                                                 
[65]	validation-mlogloss:1.28393                                                 
[66]	validation-mlogloss:1.28392                                                 
[67]	validation-mlogloss:1.28385                                                 
[68]	validation-mlogloss:1.28378                                                 
[69]	validation-mlogloss:1.28381                                                 
[70]	validation-mlogloss:1.28365                                                 
[71]	validation-mlogloss:1.28372                                                 
[72]	validation-mlogloss:1.28379                                                 
[73]	validation-mlogloss:1.28381                                                 
[74]	validation-

[61]	validation-mlogloss:1.28949                                                 
[62]	validation-mlogloss:1.28935                                                 
[63]	validation-mlogloss:1.28925                                                 
[64]	validation-mlogloss:1.28913                                                 
[65]	validation-mlogloss:1.28905                                                 
[66]	validation-mlogloss:1.28898                                                 
[67]	validation-mlogloss:1.28892                                                 
[68]	validation-mlogloss:1.28875                                                 
[69]	validation-mlogloss:1.28865                                                 
[70]	validation-mlogloss:1.28849                                                 
[71]	validation-mlogloss:1.28838                                                 
[72]	validation-mlogloss:1.28838                                                 
[73]	validation-

[60]	validation-mlogloss:1.28372                                                 
[61]	validation-mlogloss:1.28372                                                 
[62]	validation-mlogloss:1.28361                                                 
[63]	validation-mlogloss:1.28363                                                 
[64]	validation-mlogloss:1.28367                                                 
[65]	validation-mlogloss:1.28356                                                 
[66]	validation-mlogloss:1.28358                                                 
[67]	validation-mlogloss:1.28351                                                 
[68]	validation-mlogloss:1.28335                                                 
[69]	validation-mlogloss:1.28339                                                 
[70]	validation-mlogloss:1.28346                                                 
[71]	validation-mlogloss:1.28343                                                 
[72]	validation-

[59]	validation-mlogloss:1.28569                                                 
[60]	validation-mlogloss:1.28580                                                 
[61]	validation-mlogloss:1.28578                                                 
[62]	validation-mlogloss:1.28565                                                 
[63]	validation-mlogloss:1.28564                                                 
[64]	validation-mlogloss:1.28557                                                 
[65]	validation-mlogloss:1.28558                                                 
[66]	validation-mlogloss:1.28545                                                 
[67]	validation-mlogloss:1.28543                                                 
[68]	validation-mlogloss:1.28544                                                 
[69]	validation-mlogloss:1.28555                                                 
[70]	validation-mlogloss:1.28543                                                 
[71]	validation-

[58]	validation-mlogloss:1.28341                                                 
[59]	validation-mlogloss:1.28332                                                 
[60]	validation-mlogloss:1.28329                                                 
[61]	validation-mlogloss:1.28333                                                 
[62]	validation-mlogloss:1.28318                                                 
[63]	validation-mlogloss:1.28314                                                 
[64]	validation-mlogloss:1.28310                                                 
[65]	validation-mlogloss:1.28310                                                 
[66]	validation-mlogloss:1.28306                                                 
[67]	validation-mlogloss:1.28304                                                 
[68]	validation-mlogloss:1.28290                                                 
[69]	validation-mlogloss:1.28284                                                 
[70]	validation-

[57]	validation-mlogloss:1.28776                                                 
[58]	validation-mlogloss:1.28763                                                 
[59]	validation-mlogloss:1.28747                                                 
[60]	validation-mlogloss:1.28727                                                 
[61]	validation-mlogloss:1.28717                                                 
[62]	validation-mlogloss:1.28698                                                 
[63]	validation-mlogloss:1.28690                                                 
[64]	validation-mlogloss:1.28690                                                 
[65]	validation-mlogloss:1.28677                                                 
[66]	validation-mlogloss:1.28664                                                 
[67]	validation-mlogloss:1.28653                                                 
[68]	validation-mlogloss:1.28644                                                 
[69]	validation-

[56]	validation-mlogloss:1.28410                                                 
[57]	validation-mlogloss:1.28394                                                 
[58]	validation-mlogloss:1.28380                                                 
[59]	validation-mlogloss:1.28376                                                 
[60]	validation-mlogloss:1.28390                                                 
[61]	validation-mlogloss:1.28392                                                 
[62]	validation-mlogloss:1.28405                                                 
[63]	validation-mlogloss:1.28401                                                 
[64]	validation-mlogloss:1.28404                                                 
[65]	validation-mlogloss:1.28410                                                 
[66]	validation-mlogloss:1.28414                                                 
[67]	validation-mlogloss:1.28410                                                 
[68]	validation-

[55]	validation-mlogloss:1.29016                                                 
[56]	validation-mlogloss:1.29007                                                 
[57]	validation-mlogloss:1.28990                                                 
[58]	validation-mlogloss:1.28966                                                 
[59]	validation-mlogloss:1.28945                                                 
[60]	validation-mlogloss:1.28930                                                 
[61]	validation-mlogloss:1.28924                                                 
[62]	validation-mlogloss:1.28912                                                 
[63]	validation-mlogloss:1.28897                                                 
[64]	validation-mlogloss:1.28878                                                 
[65]	validation-mlogloss:1.28870                                                 
[66]	validation-mlogloss:1.28853                                                 
[67]	validation-

[54]	validation-mlogloss:1.28479                                                 
[55]	validation-mlogloss:1.28458                                                 
[56]	validation-mlogloss:1.28456                                                 
[57]	validation-mlogloss:1.28443                                                 
[58]	validation-mlogloss:1.28424                                                 
[59]	validation-mlogloss:1.28403                                                 
[60]	validation-mlogloss:1.28384                                                 
[61]	validation-mlogloss:1.28367                                                 
[62]	validation-mlogloss:1.28368                                                 
[63]	validation-mlogloss:1.28361                                                 
[64]	validation-mlogloss:1.28349                                                 
[65]	validation-mlogloss:1.28341                                                 
[66]	validation-

[53]	validation-mlogloss:1.28714                                                 
[54]	validation-mlogloss:1.28699                                                 
[55]	validation-mlogloss:1.28686                                                 
[56]	validation-mlogloss:1.28682                                                 
[57]	validation-mlogloss:1.28676                                                 
[58]	validation-mlogloss:1.28677                                                 
[59]	validation-mlogloss:1.28665                                                 
[60]	validation-mlogloss:1.28671                                                 
[61]	validation-mlogloss:1.28657                                                 
[62]	validation-mlogloss:1.28654                                                 
[63]	validation-mlogloss:1.28645                                                 
[64]	validation-mlogloss:1.28646                                                 
[65]	validation-

[52]	validation-mlogloss:1.28336                                                 
[53]	validation-mlogloss:1.28338                                                 
[54]	validation-mlogloss:1.28332                                                 
[55]	validation-mlogloss:1.28336                                                 
[56]	validation-mlogloss:1.28339                                                 
[57]	validation-mlogloss:1.28344                                                 
[58]	validation-mlogloss:1.28348                                                 
[59]	validation-mlogloss:1.28342                                                 
[60]	validation-mlogloss:1.28359                                                 
[61]	validation-mlogloss:1.28352                                                 
[62]	validation-mlogloss:1.28360                                                 
[63]	validation-mlogloss:1.28357                                                 
[64]	validation-

[51]	validation-mlogloss:1.28924                                                 
[52]	validation-mlogloss:1.28887                                                 
[53]	validation-mlogloss:1.28868                                                 
[54]	validation-mlogloss:1.28837                                                 
[55]	validation-mlogloss:1.28812                                                 
[56]	validation-mlogloss:1.28801                                                 
[57]	validation-mlogloss:1.28771                                                 
[58]	validation-mlogloss:1.28744                                                 
[59]	validation-mlogloss:1.28708                                                 
[60]	validation-mlogloss:1.28681                                                 
[61]	validation-mlogloss:1.28656                                                 
[62]	validation-mlogloss:1.28636                                                 
[63]	validation-

[50]	validation-mlogloss:1.28514                                                 
[51]	validation-mlogloss:1.28513                                                 
[52]	validation-mlogloss:1.28502                                                 
[53]	validation-mlogloss:1.28490                                                 
[54]	validation-mlogloss:1.28499                                                 
[55]	validation-mlogloss:1.28486                                                 
[56]	validation-mlogloss:1.28507                                                 
[57]	validation-mlogloss:1.28517                                                 
[58]	validation-mlogloss:1.28511                                                 
[59]	validation-mlogloss:1.28512                                                 
[60]	validation-mlogloss:1.28513                                                 
[61]	validation-mlogloss:1.28511                                                 
[62]	validation-

[49]	validation-mlogloss:1.28717                                                 
[50]	validation-mlogloss:1.28705                                                 
[51]	validation-mlogloss:1.28750                                                 
[52]	validation-mlogloss:1.28787                                                 
[53]	validation-mlogloss:1.28820                                                 
[54]	validation-mlogloss:1.28835                                                 
[55]	validation-mlogloss:1.28846                                                 
[56]	validation-mlogloss:1.28833                                                 
[57]	validation-mlogloss:1.28806                                                 
[58]	validation-mlogloss:1.28788                                                 
[59]	validation-mlogloss:1.28807                                                 
[60]	validation-mlogloss:1.28812                                                 
[61]	validation-

[81]	validation-mlogloss:1.28607                                                 
[82]	validation-mlogloss:1.28595                                                 
[83]	validation-mlogloss:1.28601                                                 
[84]	validation-mlogloss:1.28624                                                 
[85]	validation-mlogloss:1.28604                                                 
[86]	validation-mlogloss:1.28590                                                 
[87]	validation-mlogloss:1.28615                                                 
[88]	validation-mlogloss:1.28614                                                 
[89]	validation-mlogloss:1.28610                                                 
[90]	validation-mlogloss:1.28622                                                 
[91]	validation-mlogloss:1.28631                                                 
[92]	validation-mlogloss:1.28644                                                 
[0]	validation-m

[87]	validation-mlogloss:1.28518                                                 
[88]	validation-mlogloss:1.28521                                                 
[89]	validation-mlogloss:1.28531                                                 
[90]	validation-mlogloss:1.28526                                                 
[91]	validation-mlogloss:1.28524                                                 
[92]	validation-mlogloss:1.28512                                                 
[93]	validation-mlogloss:1.28507                                                 
[94]	validation-mlogloss:1.28504                                                 
[95]	validation-mlogloss:1.28499                                                 
[96]	validation-mlogloss:1.28486                                                 
[97]	validation-mlogloss:1.28476                                                 
[98]	validation-mlogloss:1.28471                                                 
[99]	validation-

In [11]:
import hyperopt
# Train the final model with the best hyperparameters
xgb_best = xgb.train(
    params=hyperopt.space_eval(xgb_space, xgb_best_result),
    dtrain=dtrain, 
    num_boost_round=100,
    evals=[(dval, 'validation')])

# Predict on the validation set with the best model
y_val_pred_xgb = xgb_best.predict(dval).astype(int)

# Generate the classification report
xgb_report = classification_report(y_val, y_val_pred_xgb)
print(xgb_report)


[0]	validation-mlogloss:1.37878
[1]	validation-mlogloss:1.37199
[2]	validation-mlogloss:1.36593
[3]	validation-mlogloss:1.36013
[4]	validation-mlogloss:1.35479
[5]	validation-mlogloss:1.34971
[6]	validation-mlogloss:1.34546
[7]	validation-mlogloss:1.34129
[8]	validation-mlogloss:1.33758
[9]	validation-mlogloss:1.33396
[10]	validation-mlogloss:1.33081
[11]	validation-mlogloss:1.32799
[12]	validation-mlogloss:1.32523
[13]	validation-mlogloss:1.32272
[14]	validation-mlogloss:1.32075
[15]	validation-mlogloss:1.31863
[16]	validation-mlogloss:1.31660
[17]	validation-mlogloss:1.31474
[18]	validation-mlogloss:1.31311
[19]	validation-mlogloss:1.31150
[20]	validation-mlogloss:1.31006
[21]	validation-mlogloss:1.30869
[22]	validation-mlogloss:1.30747
[23]	validation-mlogloss:1.30633
[24]	validation-mlogloss:1.30520
[25]	validation-mlogloss:1.30419
[26]	validation-mlogloss:1.30316
[27]	validation-mlogloss:1.30223
[28]	validation-mlogloss:1.30139
[29]	validation-mlogloss:1.30059
[30]	validation-mlog

In [12]:
# Model 2: gb
gb_best_result = fmin(
    fn=gradient_boosting_objective,
    space=gb_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=Trials()
)


 22%|██▏       | 11/50 [00:09<00:30,  1.27trial/s, best loss: 0.5879142941954514]

  _warn_prf(average, modifier, msg_start, len(result))



 34%|███▍      | 17/50 [00:15<00:30,  1.10trial/s, best loss: 0.5879142941954514]

  _warn_prf(average, modifier, msg_start, len(result))



 42%|████▏     | 21/50 [00:19<00:27,  1.06trial/s, best loss: 0.5870592975319289]

  _warn_prf(average, modifier, msg_start, len(result))



 54%|█████▍    | 27/50 [00:25<00:25,  1.09s/trial, best loss: 0.5870592975319289]

  _warn_prf(average, modifier, msg_start, len(result))



 60%|██████    | 30/50 [00:28<00:18,  1.09trial/s, best loss: 0.5870592975319289]

  _warn_prf(average, modifier, msg_start, len(result))



 62%|██████▏   | 31/50 [00:28<00:15,  1.24trial/s, best loss: 0.5870592975319289]

  _warn_prf(average, modifier, msg_start, len(result))



 82%|████████▏ | 41/50 [00:36<00:06,  1.29trial/s, best loss: 0.5870592975319289]

  _warn_prf(average, modifier, msg_start, len(result))



 92%|█████████▏| 46/50 [00:41<00:03,  1.22trial/s, best loss: 0.5870592975319289]

  _warn_prf(average, modifier, msg_start, len(result))



100%|██████████| 50/50 [00:45<00:00,  1.10trial/s, best loss: 0.5870592975319289]


In [13]:
# Retrieve the best hyperparameters from the Trials object
gb_best_params = hyperopt.space_eval(gb_space, gb_best_result)

# Train the final model with the best hyperparameters
gb_best = GradientBoostingClassifier(random_state=0, **gb_best_params)
gb_best.fit(X_train, y_train)

# Predict on the validation set with the best model
y_val_pred_gb = gb_best.predict(X_val).astype(int)

# Generate the classification report
gb_report = classification_report(y_val, y_val_pred_gb)
print(gb_report)



              precision    recall  f1-score   support

           0       0.68      0.08      0.14       559
           1       0.41      0.01      0.02       880
           2       0.33      0.05      0.08      1185
           3       0.38      0.98      0.55      1511

    accuracy                           0.39      4135
   macro avg       0.45      0.28      0.20      4135
weighted avg       0.41      0.39      0.25      4135



In [14]:
# Model 3: lr
lr_best_result = fmin(
    fn=lr_objective,
    space=lr_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=Trials()
)


100%|██████████| 50/50 [04:53<00:00,  5.87s/trial, best loss: 0.6177271752360818]


In [15]:
# Retrieve the best hyperparameters from the Trials object
lr_best_params = hyperopt.space_eval(lr_space, lr_best_result)

# Train the final model with the best hyperparameters
lr_best = LogisticRegression(random_state=0, **lr_best_params)
lr_best.fit(X_train, y_train)

# Predict on the validation set with the best model
y_val_pred_lr = lr_best.predict(X_val).astype(int)

# Generate the classification report
lr_report = classification_report(y_val, y_val_pred_lr)
print(lr_report)



              precision    recall  f1-score   support

           0       0.60      0.10      0.17       559
           1       0.30      0.08      0.12       880
           2       0.32      0.08      0.13      1185
           3       0.40      0.93      0.56      1511

    accuracy                           0.39      4135
   macro avg       0.40      0.30      0.24      4135
weighted avg       0.38      0.39      0.29      4135



In [26]:
# Model 4: dt
dt_best_result = fmin(
    fn=dt_objective,
    space=dt_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=Trials()
)


 12%|█▎         | 6/50 [00:00<00:04, 10.42trial/s, best loss: 0.6597434073048168]

  _warn_prf(average, modifier, msg_start, len(result))



 20%|██        | 10/50 [00:00<00:03, 10.39trial/s, best loss: 0.6594417211074189]

  _warn_prf(average, modifier, msg_start, len(result))



 28%|██▊       | 14/50 [00:01<00:03, 10.07trial/s, best loss: 0.6594417211074189]

  _warn_prf(average, modifier, msg_start, len(result))



 88%|████████▊ | 44/50 [00:04<00:00,  9.27trial/s, best loss: 0.6563790232148397]

  _warn_prf(average, modifier, msg_start, len(result))



100%|██████████| 50/50 [00:05<00:00,  9.63trial/s, best loss: 0.6558752135704268]


In [27]:
dt_best_params = hyperopt.space_eval(dt_space, dt_best_result)

# Train the final model with the best hyperparameters
dt_best = DecisionTreeClassifier(random_state=0, **dt_best_params)
dt_best.fit(X_train, y_train)

# Predict on the validation set with the best model
y_val_pred_dt = dt_best.predict(X_val).astype(int)

# Generate the classification report
dt_report = classification_report(y_val, y_val_pred_dt)
print(dt_report)



              precision    recall  f1-score   support

           0       0.24      0.41      0.31       559
           1       0.27      0.10      0.14       880
           2       0.35      0.02      0.04      1185
           3       0.42      0.77      0.54      1511

    accuracy                           0.37      4135
   macro avg       0.32      0.33      0.26      4135
weighted avg       0.34      0.37      0.28      4135



# With categorical variables

In [36]:
# OneHot encoding
X = pd.get_dummies(
    df.drop([target, 'importer_rank'], axis=1),
    columns=categoricals
)
y = df[target]

X_train, X_val, y_train, y_val = split_data(X, y)
dtrain = xgb.DMatrix(X_train, label=y_train)
dval = xgb.DMatrix(X_val, label=y_val)

y_train = y_train.ravel()
y_val = y_val.ravel()

In [39]:
# Model 1: xgb
xgb_best_result = fmin(
    fn=xgb_objective,
    space=xgb_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=Trials()
)


[0]	validation-mlogloss:1.37290                                                  
[1]	validation-mlogloss:1.36078                                                  
[2]	validation-mlogloss:1.35023                                                  
[3]	validation-mlogloss:1.34072                                                  
[4]	validation-mlogloss:1.33296                                                  
[5]	validation-mlogloss:1.32589                                                  
[6]	validation-mlogloss:1.32000                                                  
[7]	validation-mlogloss:1.31427                                                  
[8]	validation-mlogloss:1.30951                                                  
[9]	validation-mlogloss:1.30505                                                  
[10]	validation-mlogloss:1.30153                                                 
[11]	validation-mlogloss:1.29818                                                 
[12]	validation-

[99]	validation-mlogloss:1.25106                                                 
[0]	validation-mlogloss:1.35883                                                  
[1]	validation-mlogloss:1.33990                                                  
[2]	validation-mlogloss:1.32490                                                  
[3]	validation-mlogloss:1.31457                                                  
[4]	validation-mlogloss:1.30633                                                  
[5]	validation-mlogloss:1.29973                                                  
[6]	validation-mlogloss:1.29453                                                  
[7]	validation-mlogloss:1.28973                                                  
[8]	validation-mlogloss:1.28640                                                  
[9]	validation-mlogloss:1.28278                                                  
[10]	validation-mlogloss:1.28045                                                 
[11]	validation-

[98]	validation-mlogloss:1.24183                                                 
[99]	validation-mlogloss:1.24166                                                 
[0]	validation-mlogloss:1.36304                                                  
[1]	validation-mlogloss:1.34574                                                  
[2]	validation-mlogloss:1.33141                                                  
[3]	validation-mlogloss:1.32023                                                  
[4]	validation-mlogloss:1.31164                                                  
[5]	validation-mlogloss:1.30458                                                  
[6]	validation-mlogloss:1.29943                                                  
[7]	validation-mlogloss:1.29449                                                  
[8]	validation-mlogloss:1.29036                                                  
[9]	validation-mlogloss:1.28743                                                  
[10]	validation-

[97]	validation-mlogloss:1.24876                                                 
[98]	validation-mlogloss:1.24887                                                 
[99]	validation-mlogloss:1.24899                                                 
[0]	validation-mlogloss:1.37750                                                  
[1]	validation-mlogloss:1.36989                                                  
[2]	validation-mlogloss:1.36259                                                  
[3]	validation-mlogloss:1.35586                                                  
[4]	validation-mlogloss:1.35017                                                  
[5]	validation-mlogloss:1.34471                                                  
[6]	validation-mlogloss:1.33976                                                  
[7]	validation-mlogloss:1.33482                                                  
[8]	validation-mlogloss:1.33043                                                  
[9]	validation-m

[96]	validation-mlogloss:1.26364                                                 
[97]	validation-mlogloss:1.26330                                                 
[98]	validation-mlogloss:1.26309                                                 
[99]	validation-mlogloss:1.26293                                                 
[0]	validation-mlogloss:1.35095                                                  
[1]	validation-mlogloss:1.32818                                                  
[2]	validation-mlogloss:1.31298                                                  
[3]	validation-mlogloss:1.30128                                                  
[4]	validation-mlogloss:1.29405                                                  
[5]	validation-mlogloss:1.28854                                                  
[6]	validation-mlogloss:1.28532                                                  
[7]	validation-mlogloss:1.28239                                                  
[8]	validation-m

[95]	validation-mlogloss:1.24061                                                 
[96]	validation-mlogloss:1.24043                                                 
[97]	validation-mlogloss:1.24019                                                 
[98]	validation-mlogloss:1.24023                                                 
[99]	validation-mlogloss:1.24043                                                 
[0]	validation-mlogloss:1.36174                                                  
[1]	validation-mlogloss:1.34299                                                  
[2]	validation-mlogloss:1.32836                                                  
[3]	validation-mlogloss:1.31714                                                  
[4]	validation-mlogloss:1.30938                                                  
[5]	validation-mlogloss:1.30265                                                  
[6]	validation-mlogloss:1.29807                                                  
[7]	validation-m

[94]	validation-mlogloss:1.24738                                                 
[95]	validation-mlogloss:1.24697                                                 
[96]	validation-mlogloss:1.24683                                                 
[97]	validation-mlogloss:1.24675                                                 
[98]	validation-mlogloss:1.24672                                                 
[99]	validation-mlogloss:1.24636                                                 
[0]	validation-mlogloss:1.35886                                                  
[1]	validation-mlogloss:1.33910                                                  
[2]	validation-mlogloss:1.32404                                                  
[3]	validation-mlogloss:1.31247                                                  
[4]	validation-mlogloss:1.30386                                                  
[5]	validation-mlogloss:1.29660                                                  
[6]	validation-m

[93]	validation-mlogloss:1.24020                                                 
[94]	validation-mlogloss:1.23992                                                 
[95]	validation-mlogloss:1.24003                                                 
[96]	validation-mlogloss:1.23968                                                 
[97]	validation-mlogloss:1.23968                                                 
[98]	validation-mlogloss:1.23964                                                 
[99]	validation-mlogloss:1.23944                                                 
[0]	validation-mlogloss:1.38009                                                  
[1]	validation-mlogloss:1.37426                                                  
[2]	validation-mlogloss:1.36864                                                  
[3]	validation-mlogloss:1.36317                                                  
[4]	validation-mlogloss:1.35835                                                  
[5]	validation-m

[92]	validation-mlogloss:1.27088                                                 
[93]	validation-mlogloss:1.27057                                                 
[94]	validation-mlogloss:1.27031                                                 
[95]	validation-mlogloss:1.27016                                                 
[96]	validation-mlogloss:1.26995                                                 
[97]	validation-mlogloss:1.26980                                                 
[98]	validation-mlogloss:1.26960                                                 
[99]	validation-mlogloss:1.26944                                                 
[0]	validation-mlogloss:1.37761                                                  
[1]	validation-mlogloss:1.36934                                                  
[2]	validation-mlogloss:1.36150                                                  
[3]	validation-mlogloss:1.35425                                                  
[4]	validation-m

[91]	validation-mlogloss:1.25821                                                 
[92]	validation-mlogloss:1.25796                                                 
[93]	validation-mlogloss:1.25768                                                 
[94]	validation-mlogloss:1.25748                                                 
[95]	validation-mlogloss:1.25732                                                 
[96]	validation-mlogloss:1.25728                                                 
[97]	validation-mlogloss:1.25717                                                 
[98]	validation-mlogloss:1.25697                                                 
[99]	validation-mlogloss:1.25679                                                 
[0]	validation-mlogloss:1.37512                                                  
[1]	validation-mlogloss:1.36469                                                  
[2]	validation-mlogloss:1.35543                                                  
[3]	validation-m

[90]	validation-mlogloss:1.26125                                                 
[91]	validation-mlogloss:1.26116                                                 
[92]	validation-mlogloss:1.26106                                                 
[93]	validation-mlogloss:1.26084                                                 
[94]	validation-mlogloss:1.26058                                                 
[95]	validation-mlogloss:1.26031                                                 
[96]	validation-mlogloss:1.26014                                                 
[97]	validation-mlogloss:1.25994                                                 
[98]	validation-mlogloss:1.25975                                                 
[99]	validation-mlogloss:1.25972                                                 
[0]	validation-mlogloss:1.37943                                                  
[1]	validation-mlogloss:1.37293                                                  
[2]	validation-m

[89]	validation-mlogloss:1.26813                                                 
[90]	validation-mlogloss:1.26801                                                 
[91]	validation-mlogloss:1.26791                                                 
[92]	validation-mlogloss:1.26764                                                 
[93]	validation-mlogloss:1.26737                                                 
[94]	validation-mlogloss:1.26699                                                 
[95]	validation-mlogloss:1.26679                                                 
[96]	validation-mlogloss:1.26651                                                 
[97]	validation-mlogloss:1.26628                                                 
[98]	validation-mlogloss:1.26608                                                 
[99]	validation-mlogloss:1.26585                                                 
[0]	validation-mlogloss:1.37038                                                  
[1]	validation-m

[88]	validation-mlogloss:1.26033                                                 
[89]	validation-mlogloss:1.26028                                                 
[90]	validation-mlogloss:1.26012                                                 
[91]	validation-mlogloss:1.26012                                                 
[92]	validation-mlogloss:1.25978                                                 
[93]	validation-mlogloss:1.25956                                                 
[94]	validation-mlogloss:1.25933                                                 
[95]	validation-mlogloss:1.25906                                                 
[96]	validation-mlogloss:1.25891                                                 
[97]	validation-mlogloss:1.25871                                                 
[98]	validation-mlogloss:1.25855                                                 
[99]	validation-mlogloss:1.25811                                                 
[0]	validation-m

[87]	validation-mlogloss:1.27028                                                 
[88]	validation-mlogloss:1.27012                                                 
[89]	validation-mlogloss:1.27001                                                 
[90]	validation-mlogloss:1.26984                                                 
[91]	validation-mlogloss:1.26971                                                 
[92]	validation-mlogloss:1.26942                                                 
[93]	validation-mlogloss:1.26928                                                 
[94]	validation-mlogloss:1.26908                                                 
[95]	validation-mlogloss:1.26885                                                 
[96]	validation-mlogloss:1.26868                                                 
[97]	validation-mlogloss:1.26847                                                 
[98]	validation-mlogloss:1.26832                                                 
[99]	validation-

[86]	validation-mlogloss:1.26078                                                 
[87]	validation-mlogloss:1.26062                                                 
[88]	validation-mlogloss:1.26039                                                 
[89]	validation-mlogloss:1.26034                                                 
[90]	validation-mlogloss:1.26017                                                 
[91]	validation-mlogloss:1.26001                                                 
[92]	validation-mlogloss:1.25989                                                 
[93]	validation-mlogloss:1.25983                                                 
[94]	validation-mlogloss:1.25972                                                 
[95]	validation-mlogloss:1.25949                                                 
[96]	validation-mlogloss:1.25935                                                 
[97]	validation-mlogloss:1.25919                                                 
[98]	validation-

[85]	validation-mlogloss:1.26925                                                 
[86]	validation-mlogloss:1.26911                                                 
[87]	validation-mlogloss:1.26898                                                 
[88]	validation-mlogloss:1.26884                                                 
[89]	validation-mlogloss:1.26875                                                 
[90]	validation-mlogloss:1.26859                                                 
[91]	validation-mlogloss:1.26855                                                 
[92]	validation-mlogloss:1.26839                                                 
[93]	validation-mlogloss:1.26809                                                 
[94]	validation-mlogloss:1.26786                                                 
[95]	validation-mlogloss:1.26773                                                 
[96]	validation-mlogloss:1.26747                                                 
[97]	validation-

[84]	validation-mlogloss:1.25472                                                 
[85]	validation-mlogloss:1.25460                                                 
[86]	validation-mlogloss:1.25447                                                 
[87]	validation-mlogloss:1.25431                                                 
[88]	validation-mlogloss:1.25405                                                 
[89]	validation-mlogloss:1.25385                                                 
[90]	validation-mlogloss:1.25385                                                 
[91]	validation-mlogloss:1.25370                                                 
[92]	validation-mlogloss:1.25360                                                 
[93]	validation-mlogloss:1.25341                                                 
[94]	validation-mlogloss:1.25331                                                 
[95]	validation-mlogloss:1.25315                                                 
[96]	validation-

[83]	validation-mlogloss:1.26151                                                 
[84]	validation-mlogloss:1.26123                                                 
[85]	validation-mlogloss:1.26103                                                 
[86]	validation-mlogloss:1.26095                                                 
[87]	validation-mlogloss:1.26081                                                 
[88]	validation-mlogloss:1.26064                                                 
[89]	validation-mlogloss:1.26051                                                 
[90]	validation-mlogloss:1.26034                                                 
[91]	validation-mlogloss:1.26021                                                 
[92]	validation-mlogloss:1.25990                                                 
[93]	validation-mlogloss:1.25972                                                 
[94]	validation-mlogloss:1.25942                                                 
[95]	validation-

[82]	validation-mlogloss:1.25133                                                 
[83]	validation-mlogloss:1.25115                                                 
[84]	validation-mlogloss:1.25113                                                 
[85]	validation-mlogloss:1.25119                                                 
[86]	validation-mlogloss:1.25129                                                 
[87]	validation-mlogloss:1.25114                                                 
[88]	validation-mlogloss:1.25092                                                 
[89]	validation-mlogloss:1.25094                                                 
[90]	validation-mlogloss:1.25080                                                 
[91]	validation-mlogloss:1.25104                                                 
[92]	validation-mlogloss:1.25073                                                 
[93]	validation-mlogloss:1.25054                                                 
[94]	validation-

[81]	validation-mlogloss:1.26364                                                 
[82]	validation-mlogloss:1.26351                                                 
[83]	validation-mlogloss:1.26353                                                 
[84]	validation-mlogloss:1.26356                                                 
[85]	validation-mlogloss:1.26360                                                 
[86]	validation-mlogloss:1.26356                                                 
[87]	validation-mlogloss:1.26368                                                 
[88]	validation-mlogloss:1.26337                                                 
[89]	validation-mlogloss:1.26330                                                 
[90]	validation-mlogloss:1.26323                                                 
[91]	validation-mlogloss:1.26307                                                 
[92]	validation-mlogloss:1.26301                                                 
[93]	validation-

[80]	validation-mlogloss:1.25295                                                 
[81]	validation-mlogloss:1.25257                                                 
[82]	validation-mlogloss:1.25219                                                 
[83]	validation-mlogloss:1.25194                                                 
[84]	validation-mlogloss:1.25178                                                 
[85]	validation-mlogloss:1.25171                                                 
[86]	validation-mlogloss:1.25163                                                 
[87]	validation-mlogloss:1.25154                                                 
[88]	validation-mlogloss:1.25132                                                 
[89]	validation-mlogloss:1.25119                                                 
[90]	validation-mlogloss:1.25100                                                 
[91]	validation-mlogloss:1.25105                                                 
[92]	validation-

[79]	validation-mlogloss:1.25466                                                 
[80]	validation-mlogloss:1.25447                                                 
[81]	validation-mlogloss:1.25436                                                 
[82]	validation-mlogloss:1.25421                                                 
[83]	validation-mlogloss:1.25402                                                 
[84]	validation-mlogloss:1.25395                                                 
[85]	validation-mlogloss:1.25371                                                 
[86]	validation-mlogloss:1.25351                                                 
[87]	validation-mlogloss:1.25327                                                 
[88]	validation-mlogloss:1.25295                                                 
[89]	validation-mlogloss:1.25243                                                 
[90]	validation-mlogloss:1.25231                                                 
[91]	validation-

[78]	validation-mlogloss:1.24908                                                 
[79]	validation-mlogloss:1.24892                                                 
[80]	validation-mlogloss:1.24863                                                 
[81]	validation-mlogloss:1.24864                                                 
[82]	validation-mlogloss:1.24824                                                 
[83]	validation-mlogloss:1.24785                                                 
[84]	validation-mlogloss:1.24762                                                 
[85]	validation-mlogloss:1.24761                                                 
[86]	validation-mlogloss:1.24736                                                 
[87]	validation-mlogloss:1.24704                                                 
[88]	validation-mlogloss:1.24679                                                 
[89]	validation-mlogloss:1.24659                                                 
[90]	validation-

[77]	validation-mlogloss:1.26217                                                 
[78]	validation-mlogloss:1.26178                                                 
[79]	validation-mlogloss:1.26167                                                 
[80]	validation-mlogloss:1.26164                                                 
[81]	validation-mlogloss:1.26174                                                 
[82]	validation-mlogloss:1.26158                                                 
[83]	validation-mlogloss:1.26140                                                 
[84]	validation-mlogloss:1.26127                                                 
[85]	validation-mlogloss:1.26132                                                 
[86]	validation-mlogloss:1.26114                                                 
[87]	validation-mlogloss:1.26105                                                 
[88]	validation-mlogloss:1.26094                                                 
[89]	validation-

[76]	validation-mlogloss:1.25194                                                 
[77]	validation-mlogloss:1.25184                                                 
[78]	validation-mlogloss:1.25150                                                 
[79]	validation-mlogloss:1.25137                                                 
[80]	validation-mlogloss:1.25120                                                 
[81]	validation-mlogloss:1.25117                                                 
[82]	validation-mlogloss:1.25105                                                 
[83]	validation-mlogloss:1.25078                                                 
[84]	validation-mlogloss:1.25050                                                 
[85]	validation-mlogloss:1.25024                                                 
[86]	validation-mlogloss:1.24981                                                 
[87]	validation-mlogloss:1.24950                                                 
[88]	validation-

[75]	validation-mlogloss:1.24660                                                 
[76]	validation-mlogloss:1.24640                                                 
[77]	validation-mlogloss:1.24619                                                 
[78]	validation-mlogloss:1.24596                                                 
[79]	validation-mlogloss:1.24603                                                 
[80]	validation-mlogloss:1.24574                                                 
[81]	validation-mlogloss:1.24589                                                 
[82]	validation-mlogloss:1.24586                                                 
[83]	validation-mlogloss:1.24578                                                 
[84]	validation-mlogloss:1.24568                                                 
[85]	validation-mlogloss:1.24575                                                 
[86]	validation-mlogloss:1.24547                                                 
[87]	validation-

[74]	validation-mlogloss:1.25236                                                 
[75]	validation-mlogloss:1.25234                                                 
[76]	validation-mlogloss:1.25194                                                 
[77]	validation-mlogloss:1.25192                                                 
[78]	validation-mlogloss:1.25210                                                 
[79]	validation-mlogloss:1.25183                                                 
[80]	validation-mlogloss:1.25158                                                 
[81]	validation-mlogloss:1.25175                                                 
[82]	validation-mlogloss:1.25150                                                 
[83]	validation-mlogloss:1.25127                                                 
[84]	validation-mlogloss:1.25103                                                 
[85]	validation-mlogloss:1.25113                                                 
[86]	validation-

[73]	validation-mlogloss:1.25125                                                 
[74]	validation-mlogloss:1.25104                                                 
[75]	validation-mlogloss:1.25071                                                 
[76]	validation-mlogloss:1.25037                                                 
[77]	validation-mlogloss:1.25034                                                 
[78]	validation-mlogloss:1.25006                                                 
[79]	validation-mlogloss:1.25010                                                 
[80]	validation-mlogloss:1.24996                                                 
[81]	validation-mlogloss:1.24984                                                 
[82]	validation-mlogloss:1.24986                                                 
[83]	validation-mlogloss:1.24969                                                 
[84]	validation-mlogloss:1.24958                                                 
[85]	validation-

[72]	validation-mlogloss:1.25930                                                 
[73]	validation-mlogloss:1.25918                                                 
[74]	validation-mlogloss:1.25891                                                 
[75]	validation-mlogloss:1.25867                                                 
[76]	validation-mlogloss:1.25836                                                 
[77]	validation-mlogloss:1.25816                                                 
[78]	validation-mlogloss:1.25789                                                 
[79]	validation-mlogloss:1.25769                                                 
[80]	validation-mlogloss:1.25742                                                 
[81]	validation-mlogloss:1.25721                                                 
[82]	validation-mlogloss:1.25694                                                 
[83]	validation-mlogloss:1.25668                                                 
[84]	validation-

[71]	validation-mlogloss:1.25665                                                 
[72]	validation-mlogloss:1.25633                                                 
[73]	validation-mlogloss:1.25620                                                 
[74]	validation-mlogloss:1.25598                                                 
[75]	validation-mlogloss:1.25584                                                 
[76]	validation-mlogloss:1.25558                                                 
[77]	validation-mlogloss:1.25528                                                 
[78]	validation-mlogloss:1.25502                                                 
[79]	validation-mlogloss:1.25482                                                 
[80]	validation-mlogloss:1.25473                                                 
[81]	validation-mlogloss:1.25465                                                 
[82]	validation-mlogloss:1.25449                                                 
[83]	validation-

[70]	validation-mlogloss:1.24396                                                 
[71]	validation-mlogloss:1.24395                                                 
[72]	validation-mlogloss:1.24362                                                 
[73]	validation-mlogloss:1.24352                                                 
[74]	validation-mlogloss:1.24307                                                 
[75]	validation-mlogloss:1.24257                                                 
[76]	validation-mlogloss:1.24250                                                 
[77]	validation-mlogloss:1.24267                                                 
[78]	validation-mlogloss:1.24224                                                 
[79]	validation-mlogloss:1.24237                                                 
[80]	validation-mlogloss:1.24221                                                 
[81]	validation-mlogloss:1.24214                                                 
[82]	validation-

[69]	validation-mlogloss:1.25813                                                 
[70]	validation-mlogloss:1.25771                                                 
[71]	validation-mlogloss:1.25747                                                 
[72]	validation-mlogloss:1.25708                                                 
[73]	validation-mlogloss:1.25676                                                 
[74]	validation-mlogloss:1.25631                                                 
[75]	validation-mlogloss:1.25613                                                 
[76]	validation-mlogloss:1.25585                                                 
[77]	validation-mlogloss:1.25554                                                 
[78]	validation-mlogloss:1.25537                                                 
[79]	validation-mlogloss:1.25509                                                 
[80]	validation-mlogloss:1.25479                                                 
[81]	validation-

[68]	validation-mlogloss:1.27072                                                 
[69]	validation-mlogloss:1.27044                                                 
[70]	validation-mlogloss:1.27012                                                 
[71]	validation-mlogloss:1.26984                                                 
[72]	validation-mlogloss:1.26958                                                 
[73]	validation-mlogloss:1.26925                                                 
[74]	validation-mlogloss:1.26905                                                 
[75]	validation-mlogloss:1.26870                                                 
[76]	validation-mlogloss:1.26844                                                 
[77]	validation-mlogloss:1.26826                                                 
[78]	validation-mlogloss:1.26809                                                 
[79]	validation-mlogloss:1.26782                                                 
[80]	validation-

[67]	validation-mlogloss:1.26269                                                 
[68]	validation-mlogloss:1.26256                                                 
[69]	validation-mlogloss:1.26222                                                 
[70]	validation-mlogloss:1.26171                                                 
[71]	validation-mlogloss:1.26143                                                 
[72]	validation-mlogloss:1.26114                                                 
[73]	validation-mlogloss:1.26094                                                 
[74]	validation-mlogloss:1.26060                                                 
[75]	validation-mlogloss:1.26045                                                 
[76]	validation-mlogloss:1.26013                                                 
[77]	validation-mlogloss:1.25985                                                 
[78]	validation-mlogloss:1.25963                                                 
[79]	validation-

[66]	validation-mlogloss:1.26641                                                 
[67]	validation-mlogloss:1.26603                                                 
[68]	validation-mlogloss:1.26581                                                 
[69]	validation-mlogloss:1.26548                                                 
[70]	validation-mlogloss:1.26508                                                 
[71]	validation-mlogloss:1.26483                                                 
[72]	validation-mlogloss:1.26443                                                 
[73]	validation-mlogloss:1.26420                                                 
[74]	validation-mlogloss:1.26391                                                 
[75]	validation-mlogloss:1.26363                                                 
[76]	validation-mlogloss:1.26347                                                 
[77]	validation-mlogloss:1.26318                                                 
[78]	validation-

[65]	validation-mlogloss:1.27461                                                 
[66]	validation-mlogloss:1.27436                                                 
[67]	validation-mlogloss:1.27404                                                 
[68]	validation-mlogloss:1.27393                                                 
[69]	validation-mlogloss:1.27371                                                 
[70]	validation-mlogloss:1.27336                                                 
[71]	validation-mlogloss:1.27298                                                 
[72]	validation-mlogloss:1.27285                                                 
[73]	validation-mlogloss:1.27261                                                 
[74]	validation-mlogloss:1.27238                                                 
[75]	validation-mlogloss:1.27229                                                 
[76]	validation-mlogloss:1.27202                                                 
[77]	validation-

[64]	validation-mlogloss:1.25323                                                 
[65]	validation-mlogloss:1.25299                                                 
[66]	validation-mlogloss:1.25266                                                 
[67]	validation-mlogloss:1.25217                                                 
[68]	validation-mlogloss:1.25202                                                 
[69]	validation-mlogloss:1.25172                                                 
[70]	validation-mlogloss:1.25129                                                 
[71]	validation-mlogloss:1.25071                                                 
[72]	validation-mlogloss:1.25033                                                 
[73]	validation-mlogloss:1.24987                                                 
[74]	validation-mlogloss:1.24972                                                 
[75]	validation-mlogloss:1.24942                                                 
[76]	validation-

[63]	validation-mlogloss:1.24981                                                 
[64]	validation-mlogloss:1.24948                                                 
[65]	validation-mlogloss:1.24946                                                 
[66]	validation-mlogloss:1.24926                                                 
[67]	validation-mlogloss:1.24887                                                 
[68]	validation-mlogloss:1.24882                                                 
[69]	validation-mlogloss:1.24858                                                 
[70]	validation-mlogloss:1.24829                                                 
[71]	validation-mlogloss:1.24807                                                 
[72]	validation-mlogloss:1.24783                                                 
[73]	validation-mlogloss:1.24793                                                 
[74]	validation-mlogloss:1.24774                                                 
[75]	validation-

[62]	validation-mlogloss:1.26418                                                 
[63]	validation-mlogloss:1.26384                                                 
[64]	validation-mlogloss:1.26364                                                 
[65]	validation-mlogloss:1.26342                                                 
[66]	validation-mlogloss:1.26309                                                 
[67]	validation-mlogloss:1.26269                                                 
[68]	validation-mlogloss:1.26231                                                 
[69]	validation-mlogloss:1.26203                                                 
[70]	validation-mlogloss:1.26156                                                 
[71]	validation-mlogloss:1.26103                                                 
[72]	validation-mlogloss:1.26065                                                 
[73]	validation-mlogloss:1.26025                                                 
[74]	validation-

[61]	validation-mlogloss:1.25510                                                 
[62]	validation-mlogloss:1.25467                                                 
[63]	validation-mlogloss:1.25446                                                 
[64]	validation-mlogloss:1.25426                                                 
[65]	validation-mlogloss:1.25416                                                 
[66]	validation-mlogloss:1.25388                                                 
[67]	validation-mlogloss:1.25356                                                 
[68]	validation-mlogloss:1.25337                                                 
[69]	validation-mlogloss:1.25301                                                 
[70]	validation-mlogloss:1.25251                                                 
[71]	validation-mlogloss:1.25236                                                 
[72]	validation-mlogloss:1.25193                                                 
[73]	validation-

[60]	validation-mlogloss:1.24535                                                 
[61]	validation-mlogloss:1.24534                                                 
[62]	validation-mlogloss:1.24487                                                 
[63]	validation-mlogloss:1.24465                                                 
[64]	validation-mlogloss:1.24452                                                 
[65]	validation-mlogloss:1.24444                                                 
[66]	validation-mlogloss:1.24432                                                 
[67]	validation-mlogloss:1.24428                                                 
[68]	validation-mlogloss:1.24412                                                 
[69]	validation-mlogloss:1.24354                                                 
[70]	validation-mlogloss:1.24282                                                 
[71]	validation-mlogloss:1.24266                                                 
[72]	validation-

[59]	validation-mlogloss:1.27086                                                 
[60]	validation-mlogloss:1.27045                                                 
[61]	validation-mlogloss:1.27012                                                 
[62]	validation-mlogloss:1.26980                                                 
[63]	validation-mlogloss:1.26949                                                 
[64]	validation-mlogloss:1.26921                                                 
[65]	validation-mlogloss:1.26877                                                 
[66]	validation-mlogloss:1.26847                                                 
[67]	validation-mlogloss:1.26807                                                 
[68]	validation-mlogloss:1.26782                                                 
[69]	validation-mlogloss:1.26740                                                 
[70]	validation-mlogloss:1.26698                                                 
[71]	validation-

[58]	validation-mlogloss:1.26303                                                 
[59]	validation-mlogloss:1.26237                                                 
[60]	validation-mlogloss:1.26210                                                 
[61]	validation-mlogloss:1.26174                                                 
[62]	validation-mlogloss:1.26149                                                 
[63]	validation-mlogloss:1.26114                                                 
[64]	validation-mlogloss:1.26089                                                 
[65]	validation-mlogloss:1.26081                                                 
[66]	validation-mlogloss:1.26072                                                 
[67]	validation-mlogloss:1.26034                                                 
[68]	validation-mlogloss:1.26003                                                 
[69]	validation-mlogloss:1.25975                                                 
[70]	validation-

[57]	validation-mlogloss:1.26718                                                 
[58]	validation-mlogloss:1.26683                                                 
[59]	validation-mlogloss:1.26657                                                 
[60]	validation-mlogloss:1.26624                                                 
[61]	validation-mlogloss:1.26597                                                 
[62]	validation-mlogloss:1.26597                                                 
[63]	validation-mlogloss:1.26578                                                 
[64]	validation-mlogloss:1.26552                                                 
[65]	validation-mlogloss:1.26532                                                 
[66]	validation-mlogloss:1.26510                                                 
[67]	validation-mlogloss:1.26478                                                 
[68]	validation-mlogloss:1.26475                                                 
[69]	validation-

[56]	validation-mlogloss:1.26780                                                 
[57]	validation-mlogloss:1.26753                                                 
[58]	validation-mlogloss:1.26724                                                 
[59]	validation-mlogloss:1.26656                                                 
[60]	validation-mlogloss:1.26617                                                 
[61]	validation-mlogloss:1.26588                                                 
[62]	validation-mlogloss:1.26550                                                 
[63]	validation-mlogloss:1.26502                                                 
[64]	validation-mlogloss:1.26471                                                 
[65]	validation-mlogloss:1.26450                                                 
[66]	validation-mlogloss:1.26428                                                 
[67]	validation-mlogloss:1.26383                                                 
[68]	validation-

[55]	validation-mlogloss:1.26365                                                 
[56]	validation-mlogloss:1.26327                                                 
[57]	validation-mlogloss:1.26278                                                 
[58]	validation-mlogloss:1.26255                                                 
[59]	validation-mlogloss:1.26209                                                 
[60]	validation-mlogloss:1.26166                                                 
[61]	validation-mlogloss:1.26133                                                 
[62]	validation-mlogloss:1.26104                                                 
[63]	validation-mlogloss:1.26073                                                 
[64]	validation-mlogloss:1.26055                                                 
[65]	validation-mlogloss:1.26034                                                 
[66]	validation-mlogloss:1.25994                                                 
[67]	validation-

[54]	validation-mlogloss:1.26140                                                 
[55]	validation-mlogloss:1.26114                                                 
[56]	validation-mlogloss:1.26074                                                 
[57]	validation-mlogloss:1.26061                                                 
[58]	validation-mlogloss:1.26038                                                 
[59]	validation-mlogloss:1.26000                                                 
[60]	validation-mlogloss:1.25989                                                 
[61]	validation-mlogloss:1.25951                                                 
[62]	validation-mlogloss:1.25931                                                 
[63]	validation-mlogloss:1.25899                                                 
[64]	validation-mlogloss:1.25882                                                 
[65]	validation-mlogloss:1.25853                                                 
[66]	validation-

[53]	validation-mlogloss:1.27536                                                 
[54]	validation-mlogloss:1.27505                                                 
[55]	validation-mlogloss:1.27467                                                 
[56]	validation-mlogloss:1.27433                                                 
[57]	validation-mlogloss:1.27378                                                 
[58]	validation-mlogloss:1.27359                                                 
[59]	validation-mlogloss:1.27333                                                 
[60]	validation-mlogloss:1.27301                                                 
[61]	validation-mlogloss:1.27263                                                 
[62]	validation-mlogloss:1.27228                                                 
[63]	validation-mlogloss:1.27206                                                 
[64]	validation-mlogloss:1.27187                                                 
[65]	validation-

[52]	validation-mlogloss:1.27404                                                 
[53]	validation-mlogloss:1.27352                                                 
[54]	validation-mlogloss:1.27312                                                 
[55]	validation-mlogloss:1.27272                                                 
[56]	validation-mlogloss:1.27236                                                 
[57]	validation-mlogloss:1.27199                                                 
[58]	validation-mlogloss:1.27165                                                 
[59]	validation-mlogloss:1.27127                                                 
[60]	validation-mlogloss:1.27090                                                 
[61]	validation-mlogloss:1.27053                                                 
[62]	validation-mlogloss:1.27016                                                 
[63]	validation-mlogloss:1.26986                                                 
[64]	validation-

[51]	validation-mlogloss:1.26841                                                 
[52]	validation-mlogloss:1.26803                                                 
[53]	validation-mlogloss:1.26778                                                 
[54]	validation-mlogloss:1.26735                                                 
[55]	validation-mlogloss:1.26696                                                 
[56]	validation-mlogloss:1.26681                                                 
[57]	validation-mlogloss:1.26653                                                 
[58]	validation-mlogloss:1.26628                                                 
[59]	validation-mlogloss:1.26597                                                 
[60]	validation-mlogloss:1.26552                                                 
[61]	validation-mlogloss:1.26526                                                 
[62]	validation-mlogloss:1.26503                                                 
[63]	validation-

[50]	validation-mlogloss:1.26776                                                 
[51]	validation-mlogloss:1.26713                                                 
[52]	validation-mlogloss:1.26693                                                 
[53]	validation-mlogloss:1.26665                                                 
[54]	validation-mlogloss:1.26636                                                 
[55]	validation-mlogloss:1.26619                                                 
[56]	validation-mlogloss:1.26607                                                 
[57]	validation-mlogloss:1.26592                                                 
[58]	validation-mlogloss:1.26581                                                 
[59]	validation-mlogloss:1.26564                                                 
[60]	validation-mlogloss:1.26546                                                 
[61]	validation-mlogloss:1.26517                                                 
[62]	validation-

In [40]:
import hyperopt
# Train the final model with the best hyperparameters
xgb_best = xgb.train(
    params=hyperopt.space_eval(xgb_space, xgb_best_result),
    dtrain=dtrain, 
    num_boost_round=100,
    evals=[(dval, 'validation')])

# Predict on the validation set with the best model
y_val_pred_xgb = xgb_best.predict(dval).astype(int)

# Generate the classification report
xgb_report = classification_report(y_val, y_val_pred_xgb)
print(xgb_report)


[0]	validation-mlogloss:1.37608
[1]	validation-mlogloss:1.36615
[2]	validation-mlogloss:1.35714
[3]	validation-mlogloss:1.34903
[4]	validation-mlogloss:1.34227
[5]	validation-mlogloss:1.33587
[6]	validation-mlogloss:1.33047
[7]	validation-mlogloss:1.32504
[8]	validation-mlogloss:1.31982
[9]	validation-mlogloss:1.31508
[10]	validation-mlogloss:1.31111
[11]	validation-mlogloss:1.30777
[12]	validation-mlogloss:1.30439
[13]	validation-mlogloss:1.30137
[14]	validation-mlogloss:1.29860
[15]	validation-mlogloss:1.29626
[16]	validation-mlogloss:1.29372
[17]	validation-mlogloss:1.29179
[18]	validation-mlogloss:1.28973
[19]	validation-mlogloss:1.28790
[20]	validation-mlogloss:1.28616
[21]	validation-mlogloss:1.28457
[22]	validation-mlogloss:1.28334
[23]	validation-mlogloss:1.28185
[24]	validation-mlogloss:1.28052
[25]	validation-mlogloss:1.27913
[26]	validation-mlogloss:1.27818
[27]	validation-mlogloss:1.27714
[28]	validation-mlogloss:1.27585
[29]	validation-mlogloss:1.27465
[30]	validation-mlog

In [41]:
# Model 2: gb
gb_best_result = fmin(
    fn=gradient_boosting_objective,
    space=gb_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=Trials()
)


 28%|██▊       | 14/50 [01:32<03:12,  5.35s/trial, best loss: 0.5883857145300394]

  _warn_prf(average, modifier, msg_start, len(result))



 30%|███       | 15/50 [01:35<02:39,  4.57s/trial, best loss: 0.5883857145300394]

  _warn_prf(average, modifier, msg_start, len(result))



100%|██████████| 50/50 [07:43<00:00,  9.27s/trial, best loss: 0.5798865170756197]


In [42]:
# Retrieve the best hyperparameters from the Trials object
gb_best_params = hyperopt.space_eval(gb_space, gb_best_result)

# Train the final model with the best hyperparameters
gb_best = GradientBoostingClassifier(random_state=0, **gb_best_params)
gb_best.fit(X_train, y_train)

# Predict on the validation set with the best model
y_val_pred_gb = gb_best.predict(X_val).astype(int)

# Generate the classification report
gb_report = classification_report(y_val, y_val_pred_gb)
print(gb_report)



              precision    recall  f1-score   support

           0       0.65      0.19      0.29       559
           1       0.36      0.11      0.17       880
           2       0.37      0.12      0.18      1185
           3       0.41      0.90      0.57      1511

    accuracy                           0.41      4135
   macro avg       0.45      0.33      0.30      4135
weighted avg       0.42      0.41      0.33      4135



In [None]:
# Model 3: lr
lr_best_result = fmin(
    fn=lr_objective,
    space=lr_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=Trials()
)


  6%|▍      | 3/50 [50:51<13:15:05, 1015.01s/trial, best loss: 0.589629335659444]

In [None]:
# Retrieve the best hyperparameters from the Trials object
lr_best_params = hyperopt.space_eval(lr_space, lr_best_result)

# Train the final model with the best hyperparameters
lr_best = LogisticRegression(random_state=0, **lr_best_params)
lr_best.fit(X_train, y_train)

# Predict on the validation set with the best model
y_val_pred_lr = lr_best.predict(X_val).astype(int)

# Generate the classification report
lr_report = classification_report(y_val, y_val_pred_lr)
print(lr_report)



In [37]:
# Model 4: dt
dt_best_result = fmin(
    fn=dt_objective,
    space=dt_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=Trials()
)


  2%|▏           | 1/50 [00:00<00:36,  1.36trial/s, best loss: 0.765896675211543]

  _warn_prf(average, modifier, msg_start, len(result))



 34%|███▍      | 17/50 [00:14<00:25,  1.30trial/s, best loss: 0.6659208122046166]

  _warn_prf(average, modifier, msg_start, len(result))



 48%|████▊     | 24/50 [00:19<00:18,  1.41trial/s, best loss: 0.6535511346624179]

  _warn_prf(average, modifier, msg_start, len(result))



 56%|█████▌    | 28/50 [00:21<00:15,  1.44trial/s, best loss: 0.6535511346624179]

  _warn_prf(average, modifier, msg_start, len(result))



 60%|██████    | 30/50 [00:23<00:13,  1.45trial/s, best loss: 0.6535511346624179]

  _warn_prf(average, modifier, msg_start, len(result))



 74%|███████▍  | 37/50 [00:29<00:10,  1.26trial/s, best loss: 0.6535511346624179]

  _warn_prf(average, modifier, msg_start, len(result))



 88%|████████▊ | 44/50 [00:34<00:04,  1.42trial/s, best loss: 0.6224703396395346]

  _warn_prf(average, modifier, msg_start, len(result))



 98%|█████████▊| 49/50 [00:38<00:00,  1.34trial/s, best loss: 0.6224703396395346]

  _warn_prf(average, modifier, msg_start, len(result))



100%|██████████| 50/50 [00:38<00:00,  1.29trial/s, best loss: 0.6224703396395346]


In [38]:
dt_best_params = hyperopt.space_eval(dt_space, dt_best_result)

# Train the final model with the best hyperparameters
dt_best = DecisionTreeClassifier(random_state=0, **dt_best_params)
dt_best.fit(X_train, y_train)

# Predict on the validation set with the best model
y_val_pred_dt = dt_best.predict(X_val).astype(int)

# Generate the classification report
dt_report = classification_report(y_val, y_val_pred_dt)
print(dt_report)



              precision    recall  f1-score   support

           0       0.20      0.62      0.30       559
           1       0.26      0.04      0.07       880
           2       0.50      0.00      0.00      1185
           3       0.42      0.62      0.50      1511

    accuracy                           0.32      4135
   macro avg       0.34      0.32      0.22      4135
weighted avg       0.38      0.32      0.24      4135



Observations:
 - The model with high precision but not too low-overall accuracy: GB
- The model with highest recall rates (for higher classes): DT
- Most model do better on the extreme classes (0 and 3) than they do on middle classes (1 and 2)

# Conclusion
- We followed a very logical EDA which led to a GB model with 0.68 and 0.41 precision rate for higher classes (class 0 and class 1), of course at the cost of low recall.
- Recall is low but we must consider that we just used only the delivery data. I expect better performance if we have more finance related information about each importer
 


# What's next

I got to finish the assignment in 3 days, most of which spent cleaning datasets. There were more I wanted to do:

- feature selection: I used too many features, and did not visualize the importance of each.

- the shortened hs code contains 4 digits, but I can also try just 2 first (Chapter)

- graph clustering/community detection: We can think of the data as a graph where nodes are importers than hs codes, and the relationships can be the total calculated teu (may be scaled/normalized), etc.. This can reveal what kinds of importer they are: car producers, fruits retailers, etc... However, that might not be super helpful since such an information can be easily found somewhere else.

- hypergraph mining: Each line in `main.csv` can be think of a hyperedge of multiple entities: hs_code, importer, port, etc.. This requires advanced techniques that I'm currently not familiar with. 

- hs_code can also implies the tax rate, which can be useful as a feature