In [1]:
import pandas as pd

from sklearn import metrics
from sklearn.model_selection import GridSearchCV
from xgboost import XGBClassifier

X_data = pd.read_table('data/orange_small_train.data', sep='\t')
X_data.fillna(0, inplace=True)
obj_columns = X_data.select_dtypes(include=['object']).columns
X_data[obj_columns] = X_data[obj_columns].astype('category').apply(lambda x: x.cat.codes)

#X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.3, random_state=1)

In [2]:
def gsearch(X_data, y_data):
    param_grid = {
        'max_depth': [3],
        'n_estimators': [100],
        'nthread': [8],
        'subsample': [0.9, 1.0],
        'gamma': [0, 0.001, 0.01, 0.1]
    }

    grid = GridSearchCV(XGBClassifier(), param_grid, refit=True, verbose=3, scoring='roc_auc', n_jobs=4)
    grid.fit(X_data, y_data.values.ravel())
    
    print('Params', grid.best_params_)
    print('Score', grid.best_score_)

    return grid

In [3]:
y_data = pd.read_table('data/labels/orange_small_train_churn.labels', sep='\t', header=None)
grid1 = gsearch(X_data, y_data)

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.


Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=4)]: Done  24 out of  24 | elapsed:  3.3min finished


Params {'gamma': 0, 'max_depth': 3, 'n_estimators': 100, 'nthread': 8, 'subsample': 1.0}
Score 0.7336236977971333


In [4]:
y_data = pd.read_table('data/labels/orange_small_train_appetency.labels', sep='\t', header=None)
grid2 = gsearch(X_data, y_data)

Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  24 out of  24 | elapsed:  3.2min finished


Params {'gamma': 0.1, 'max_depth': 3, 'n_estimators': 100, 'nthread': 8, 'subsample': 0.9}
Score 0.8242510937607236


In [5]:
y_data = pd.read_table('data/labels/orange_small_train_upselling.labels', sep='\t', header=None)
grid3 = gsearch(X_data, y_data)

Fitting 3 folds for each of 8 candidates, totalling 24 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  24 out of  24 | elapsed:  3.2min finished


Params {'gamma': 0.01, 'max_depth': 3, 'n_estimators': 100, 'nthread': 8, 'subsample': 1.0}
Score 0.8652135577813339
