<a href="https://colab.research.google.com/github/unm63/combination/blob/master/scikitlearnlab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import pandas as pd

In [0]:
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data', header=None,
                     names=['Wine','Alcohol','Malic acid','Ash','Alcalinity of ash','Magnesium',
                            'Total phenols','Flavanoids','Nonflavanoid phenols','Proanthocyanins',
                            'Color intensity','Hue','OD280/OD315 of diluted wines','Proline'])

In [0]:
print(df.shape)
print(df)

(178, 14)
     Wine  Alcohol  Malic acid  ...   Hue  OD280/OD315 of diluted wines  Proline
0       1    14.23        1.71  ...  1.04                          3.92     1065
1       1    13.20        1.78  ...  1.05                          3.40     1050
2       1    13.16        2.36  ...  1.03                          3.17     1185
3       1    14.37        1.95  ...  0.86                          3.45     1480
4       1    13.24        2.59  ...  1.04                          2.93      735
..    ...      ...         ...  ...   ...                           ...      ...
173     3    13.71        5.65  ...  0.64                          1.74      740
174     3    13.40        3.91  ...  0.70                          1.56      750
175     3    13.27        4.28  ...  0.59                          1.56      835
176     3    13.17        2.59  ...  0.60                          1.62      840
177     3    14.13        4.10  ...  0.61                          1.60      560

[178 rows x 14 co

In [0]:
print(df.columns)

Index(['Wine', 'Alcohol', 'Malic acid', 'Ash', 'Alcalinity of ash',
       'Magnesium', 'Total phenols', 'Flavanoids', 'Nonflavanoid phenols',
       'Proanthocyanins', 'Color intensity', 'Hue',
       'OD280/OD315 of diluted wines', 'Proline'],
      dtype='object')


In [0]:
########################################################################
#	Decision Tree
########################################################################

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.tree import DecisionTreeClassifier

# To apply an classifier on this data, we need to flatten the image, to
# turn the data in a (samples, feature) matrix:
n_samples = len(df)
X = df.loc[:, df.columns != 'Wine']
y = df.loc[:, df.columns == 'Wine']
print(X)
print(y)

# Split the dataset in two equal parts
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.5, random_state=0)

# Set the parameters by cross-validation
tuned_parameters = [{'max_features': ['log2','sqrt',None],
                     'max_depth': [5,10,None],
                     'min_samples_leaf': [1,2,3],
                     'min_weight_fraction_leaf': [0,0.2,0.3]}]

scores = ['precision', 'recall']

for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()

    clf = GridSearchCV(DecisionTreeClassifier(random_state=0), tuned_parameters, cv=5,
                       scoring='%s_macro' % score)
    clf.fit(X_train, y_train)

    print("Best parameters set found on development set:")
    print()
    print(clf.best_params_)
    print()
    print("Grid scores on development set:")
    print()
    means = clf.cv_results_['mean_test_score']
    stds = clf.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, clf.cv_results_['params']):
        print("%0.3f (+/-%0.03f) for %r"
              % (mean, std * 2, params))
    print()

    print("Detailed classification report:")
    print()
    print("The model is trained on the full development set.")
    print("The scores are computed on the full evaluation set.")
    print()
    y_true, y_pred = y_test, clf.predict(X_test)
    print(classification_report(y_true, y_pred))
    print()

     Alcohol  Malic acid   Ash  ...   Hue  OD280/OD315 of diluted wines  Proline
0      14.23        1.71  2.43  ...  1.04                          3.92     1065
1      13.20        1.78  2.14  ...  1.05                          3.40     1050
2      13.16        2.36  2.67  ...  1.03                          3.17     1185
3      14.37        1.95  2.50  ...  0.86                          3.45     1480
4      13.24        2.59  2.87  ...  1.04                          2.93      735
..       ...         ...   ...  ...   ...                           ...      ...
173    13.71        5.65  2.45  ...  0.64                          1.74      740
174    13.40        3.91  2.48  ...  0.70                          1.56      750
175    13.27        4.28  2.26  ...  0.59                          1.56      835
176    13.17        2.59  2.37  ...  0.60                          1.62      840
177    14.13        4.10  2.74  ...  0.61                          1.60      560

[178 rows x 13 columns]
   

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

Best parameters set found on development set:

{'max_depth': 5, 'max_features': None, 'min_samples_leaf': 3, 'min_weight_fraction_leaf': 0}

Grid scores on development set:

0.862 (+/-0.210) for {'max_depth': 5, 'max_features': 'log2', 'min_samples_leaf': 1, 'min_weight_fraction_leaf': 0}
0.775 (+/-0.297) for {'max_depth': 5, 'max_features': 'log2', 'min_samples_leaf': 1, 'min_weight_fraction_leaf': 0.2}
0.413 (+/-0.062) for {'max_depth': 5, 'max_features': 'log2', 'min_samples_leaf': 1, 'min_weight_fraction_leaf': 0.3}
0.850 (+/-0.184) for {'max_depth': 5, 'max_features': 'log2', 'min_samples_leaf': 2, 'min_weight_fraction_leaf': 0}
0.775 (+/-0.297) for {'max_depth': 5, 'max_features': 'log2', 'min_samples_leaf': 2, 'min_weight_fraction_leaf': 0.2}
0.413 (+/-0.062) for {'max_depth': 5, 'max_features': 'log2', 'min_samples_leaf': 2, 'min_weight_fraction_leaf': 0.3}
0.868 (+/-0.199) for {'max_depth': 5, 'max_features': 'log2', 'min_samples_leaf': 3, 'min_weight_fraction_leaf': 0}
0.775 



In [0]:
########################################################################
#	Neural Net
########################################################################

from sklearn.neural_network import MLPClassifier

# Set the parameters by cross-validation
tuned_parameters = [{'activation': ['relu','identity'],
                     'learning_rate_init': [0.1,0.001],
                     'tol': [1e-4,1e-3,1e-2],
                     'momentum': [0.9,0.8,0.7]}]

scores = ['precision', 'recall']

for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()

    clf = GridSearchCV(MLPClassifier(), tuned_parameters, cv=5,
                       scoring='%s_macro' % score)
    clf.fit(X_train, y_train)

    print("Best parameters set found on development set:")
    print()
    print(clf.best_params_)
    print()
    print("Grid scores on development set:")
    print()
    means = clf.cv_results_['mean_test_score']
    stds = clf.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, clf.cv_results_['params']):
        print("%0.3f (+/-%0.03f) for %r"
              % (mean, std * 2, params))
    print()

    print("Detailed classification report:")
    print()
    print("The model is trained on the full development set.")
    print("The scores are computed on the full evaluation set.")
    print()
    y_true, y_pred = y_test, clf.predict(X_test)
    print(classification_report(y_true, y_pred))
    print()

# Tuning hyper-parameters for precision



  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  y = column_or_1d(y, warn=True)
  'precision', 'predi

Best parameters set found on development set:

{'activation': 'identity', 'learning_rate_init': 0.001, 'momentum': 0.7, 'tol': 0.01}

Grid scores on development set:

0.245 (+/-0.329) for {'activation': 'relu', 'learning_rate_init': 0.1, 'momentum': 0.9, 'tol': 0.0001}
0.324 (+/-0.608) for {'activation': 'relu', 'learning_rate_init': 0.1, 'momentum': 0.9, 'tol': 0.001}
0.314 (+/-0.341) for {'activation': 'relu', 'learning_rate_init': 0.1, 'momentum': 0.9, 'tol': 0.01}
0.193 (+/-0.282) for {'activation': 'relu', 'learning_rate_init': 0.1, 'momentum': 0.8, 'tol': 0.0001}
0.337 (+/-0.610) for {'activation': 'relu', 'learning_rate_init': 0.1, 'momentum': 0.8, 'tol': 0.001}
0.319 (+/-0.584) for {'activation': 'relu', 'learning_rate_init': 0.1, 'momentum': 0.8, 'tol': 0.01}
0.169 (+/-0.234) for {'activation': 'relu', 'learning_rate_init': 0.1, 'momentum': 0.7, 'tol': 0.0001}
0.336 (+/-0.360) for {'activation': 'relu', 'learning_rate_init': 0.1, 'momentum': 0.7, 'tol': 0.001}
0.357 (+/-0.337)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = colu

Best parameters set found on development set:

{'activation': 'identity', 'learning_rate_init': 0.001, 'momentum': 0.7, 'tol': 0.0001}

Grid scores on development set:

0.591 (+/-0.244) for {'activation': 'relu', 'learning_rate_init': 0.1, 'momentum': 0.9, 'tol': 0.0001}
0.459 (+/-0.305) for {'activation': 'relu', 'learning_rate_init': 0.1, 'momentum': 0.9, 'tol': 0.001}
0.333 (+/-0.000) for {'activation': 'relu', 'learning_rate_init': 0.1, 'momentum': 0.9, 'tol': 0.01}
0.394 (+/-0.234) for {'activation': 'relu', 'learning_rate_init': 0.1, 'momentum': 0.8, 'tol': 0.0001}
0.368 (+/-0.149) for {'activation': 'relu', 'learning_rate_init': 0.1, 'momentum': 0.8, 'tol': 0.001}
0.391 (+/-0.230) for {'activation': 'relu', 'learning_rate_init': 0.1, 'momentum': 0.8, 'tol': 0.01}
0.441 (+/-0.414) for {'activation': 'relu', 'learning_rate_init': 0.1, 'momentum': 0.7, 'tol': 0.0001}
0.483 (+/-0.387) for {'activation': 'relu', 'learning_rate_init': 0.1, 'momentum': 0.7, 'tol': 0.001}
0.439 (+/-0.27

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)


In [0]:
########################################################################
#	Support Vector Machine
########################################################################

from sklearn.svm import SVC

# Set the parameters by cross-validation
tuned_parameters = [{'kernel': ['rbf','poly'],
                     'degree': [2,3,4],
                     'C': [1, 10, 100, 1000],
                     'max_iter': [10,100,200]}]

scores = ['precision', 'recall']

for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()

    clf = GridSearchCV(SVC(), tuned_parameters, cv=5,
                       scoring='%s_macro' % score)
    clf.fit(X_train, y_train)

    print("Best parameters set found on development set:")
    print()
    print(clf.best_params_)
    print()
    print("Grid scores on development set:")
    print()
    means = clf.cv_results_['mean_test_score']
    stds = clf.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, clf.cv_results_['params']):
        print("%0.3f (+/-%0.03f) for %r"
              % (mean, std * 2, params))
    print()

    print("Detailed classification report:")
    print()
    print("The model is trained on the full development set.")
    print("The scores are computed on the full evaluation set.")
    print()
    y_true, y_pred = y_test, clf.predict(X_test)
    print(classification_report(y_true, y_pred))
    print()

# Tuning hyper-parameters for precision



  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  y = column_or_1d(y, warn=True)
  

Best parameters set found on development set:

{'C': 1, 'degree': 2, 'kernel': 'rbf', 'max_iter': 10}

Grid scores on development set:

0.784 (+/-0.161) for {'C': 1, 'degree': 2, 'kernel': 'rbf', 'max_iter': 10}
0.265 (+/-0.338) for {'C': 1, 'degree': 2, 'kernel': 'rbf', 'max_iter': 100}
0.265 (+/-0.338) for {'C': 1, 'degree': 2, 'kernel': 'rbf', 'max_iter': 200}
0.539 (+/-0.141) for {'C': 1, 'degree': 2, 'kernel': 'poly', 'max_iter': 10}
0.622 (+/-0.364) for {'C': 1, 'degree': 2, 'kernel': 'poly', 'max_iter': 100}
0.713 (+/-0.258) for {'C': 1, 'degree': 2, 'kernel': 'poly', 'max_iter': 200}
0.784 (+/-0.161) for {'C': 1, 'degree': 3, 'kernel': 'rbf', 'max_iter': 10}
0.265 (+/-0.338) for {'C': 1, 'degree': 3, 'kernel': 'rbf', 'max_iter': 100}
0.265 (+/-0.338) for {'C': 1, 'degree': 3, 'kernel': 'rbf', 'max_iter': 200}
0.675 (+/-0.340) for {'C': 1, 'degree': 3, 'kernel': 'poly', 'max_iter': 10}
0.536 (+/-0.405) for {'C': 1, 'degree': 3, 'kernel': 'poly', 'max_iter': 100}
0.637 (+/-0.325)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = colu

Best parameters set found on development set:

{'C': 1, 'degree': 2, 'kernel': 'rbf', 'max_iter': 10}

Grid scores on development set:

0.722 (+/-0.174) for {'C': 1, 'degree': 2, 'kernel': 'rbf', 'max_iter': 10}
0.356 (+/-0.055) for {'C': 1, 'degree': 2, 'kernel': 'rbf', 'max_iter': 100}
0.356 (+/-0.055) for {'C': 1, 'degree': 2, 'kernel': 'rbf', 'max_iter': 200}
0.600 (+/-0.084) for {'C': 1, 'degree': 2, 'kernel': 'poly', 'max_iter': 10}
0.641 (+/-0.288) for {'C': 1, 'degree': 2, 'kernel': 'poly', 'max_iter': 100}
0.705 (+/-0.236) for {'C': 1, 'degree': 2, 'kernel': 'poly', 'max_iter': 200}
0.722 (+/-0.174) for {'C': 1, 'degree': 3, 'kernel': 'rbf', 'max_iter': 10}
0.356 (+/-0.055) for {'C': 1, 'degree': 3, 'kernel': 'rbf', 'max_iter': 100}
0.356 (+/-0.055) for {'C': 1, 'degree': 3, 'kernel': 'rbf', 'max_iter': 200}
0.653 (+/-0.197) for {'C': 1, 'degree': 3, 'kernel': 'poly', 'max_iter': 10}
0.518 (+/-0.324) for {'C': 1, 'degree': 3, 'kernel': 'poly', 'max_iter': 100}
0.650 (+/-0.220)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [0]:
########################################################################
# Gaussian Naive Bayes
########################################################################

from sklearn.naive_bayes import GaussianNB

# Set the parameters by cross-validation
tuned_parameters = [{'priors': [None]}]

scores = ['precision', 'recall']

for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()

    clf = GridSearchCV(GaussianNB(), tuned_parameters, cv=5,
                       scoring='%s_macro' % score)
    clf.fit(X_train, y_train)

    print("Best parameters set found on development set:")
    print()
    print(clf.best_params_)
    print()
    print("Grid scores on development set:")
    print()
    means = clf.cv_results_['mean_test_score']
    stds = clf.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, clf.cv_results_['params']):
        print("%0.3f (+/-%0.03f) for %r"
              % (mean, std * 2, params))
    print()

    print("Detailed classification report:")
    print()
    print("The model is trained on the full development set.")
    print("The scores are computed on the full evaluation set.")
    print()
    y_true, y_pred = y_test, clf.predict(X_test)
    print(classification_report(y_true, y_pred))
    print()

# Tuning hyper-parameters for precision

Best parameters set found on development set:

{'priors': None}

Grid scores on development set:

0.978 (+/-0.056) for {'priors': None}

Detailed classification report:

The model is trained on the full development set.
The scores are computed on the full evaluation set.

              precision    recall  f1-score   support

           1       0.89      1.00      0.94        25
           2       1.00      0.88      0.93        40
           3       0.92      1.00      0.96        24

    accuracy                           0.94        89
   macro avg       0.94      0.96      0.95        89
weighted avg       0.95      0.94      0.94        89


# Tuning hyper-parameters for recall

Best parameters set found on development set:

{'priors': None}

Grid scores on development set:

0.980 (+/-0.050) for {'priors': None}

Detailed classification report:

The model is trained on the full development set.
The scores are computed on the full evaluation

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [0]:
########################################################################
# Logistic Regression
########################################################################

from sklearn.linear_model import LogisticRegression

# Set the parameters by cross-validation
tuned_parameters = [{'penalty': ['l1','l2'],
                     'tol': [0.01,0.0001],
                     'C': [1,10,100],
                     'fit_intercept': [True,False]}]

scores = ['precision', 'recall']

for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()

    clf = GridSearchCV(LogisticRegression(), tuned_parameters, cv=5,
                       scoring='%s_macro' % score)
    clf.fit(X_train, y_train)

    print("Best parameters set found on development set:")
    print()
    print(clf.best_params_)
    y_true, y_pred = y_test, clf.predict(X_test)
    print(classification_report(y_true, y_pred))
    print()

# Tuning hyper-parameters for precision



  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = colu

Best parameters set found on development set:

{'C': 100, 'fit_intercept': True, 'penalty': 'l1', 'tol': 0.01}
              precision    recall  f1-score   support

           1       0.89      1.00      0.94        25
           2       1.00      0.85      0.92        40
           3       0.89      1.00      0.94        24

    accuracy                           0.93        89
   macro avg       0.93      0.95      0.93        89
weighted avg       0.94      0.93      0.93        89


# Tuning hyper-parameters for recall



  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = colu

Best parameters set found on development set:

{'C': 100, 'fit_intercept': True, 'penalty': 'l1', 'tol': 0.01}
              precision    recall  f1-score   support

           1       0.89      1.00      0.94        25
           2       1.00      0.85      0.92        40
           3       0.89      1.00      0.94        24

    accuracy                           0.93        89
   macro avg       0.93      0.95      0.93        89
weighted avg       0.94      0.93      0.93        89




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [0]:
########################################################################
# KNN
########################################################################

from sklearn.neighbors import KNeighborsClassifier

# Set the parameters by cross-validation
tuned_parameters = [{'n_neighbors': [2,4,8],
                     'weights': ['distance','uniform'],
                     'algorithm': ['auto','kd_tree'],
                     'p': [1,2]}]

scores = ['precision', 'recall']

for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()

    clf = GridSearchCV(KNeighborsClassifier(), tuned_parameters, cv=5,
                       scoring='%s_macro' % score)
    clf.fit(X_train, y_train)

    print("Best parameters set found on development set:")
    print()
    print(clf.best_params_)
    y_true, y_pred = y_test, clf.predict(X_test)
    print(classification_report(y_true, y_pred))
    print()

# Tuning hyper-parameters for precision



  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

Best parameters set found on development set:

{'algorithm': 'auto', 'n_neighbors': 2, 'p': 1, 'weights': 'distance'}
              precision    recall  f1-score   support

           1       0.84      0.84      0.84        25
           2       0.77      0.82      0.80        40
           3       0.71      0.62      0.67        24

    accuracy                           0.78        89
   macro avg       0.77      0.76      0.77        89
weighted avg       0.77      0.78      0.77        89


# Tuning hyper-parameters for recall



  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

Best parameters set found on development set:

{'algorithm': 'auto', 'n_neighbors': 2, 'p': 1, 'weights': 'distance'}
              precision    recall  f1-score   support

           1       0.84      0.84      0.84        25
           2       0.77      0.82      0.80        40
           3       0.71      0.62      0.67        24

    accuracy                           0.78        89
   macro avg       0.77      0.76      0.77        89
weighted avg       0.77      0.78      0.77        89




  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  self.best_estimator_.fit(X, y, **fit_params)


In [0]:
########################################################################
# Bagging
########################################################################

from sklearn.ensemble import BaggingClassifier

# Set the parameters by cross-validation
tuned_parameters = [{'n_estimators': [5,10,20],
                     'max_samples': [1,2],
                     'max_features': [1,2],
                     'random_state': [1,None]}]

scores = ['precision', 'recall']

for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()

    clf = GridSearchCV(BaggingClassifier(), tuned_parameters, cv=5,
                       scoring='%s_macro' % score)
    clf.fit(X_train, y_train)

    print("Best parameters set found on development set:")
    print()
    print(clf.best_params_)
    y_true, y_pred = y_test, clf.predict(X_test)
    print(classification_report(y_true, y_pred))
    print()

# Tuning hyper-parameters for precision



  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)
  y = column_or_1d(y, warn=True)
  'prec

Best parameters set found on development set:

{'max_features': 2, 'max_samples': 2, 'n_estimators': 10, 'random_state': None}
              precision    recall  f1-score   support

           1       0.40      0.88      0.55        25
           2       0.67      0.30      0.41        40
           3       0.06      0.04      0.05        24

    accuracy                           0.39        89
   macro avg       0.38      0.41      0.34        89
weighted avg       0.43      0.39      0.35        89


# Tuning hyper-parameters for recall



  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = colu

Best parameters set found on development set:

{'max_features': 1, 'max_samples': 2, 'n_estimators': 20, 'random_state': 1}
              precision    recall  f1-score   support

           1       0.60      0.96      0.74        25
           2       0.94      0.42      0.59        40
           3       0.71      0.92      0.80        24

    accuracy                           0.71        89
   macro avg       0.75      0.77      0.71        89
weighted avg       0.78      0.71      0.69        89




  y = column_or_1d(y, warn=True)


In [0]:
########################################################################
# Random Forest
########################################################################

from sklearn.ensemble import RandomForestClassifier

# Set the parameters by cross-validation
tuned_parameters = [{'n_estimators': [5,10,15],
                     'max_depth': [1,5,None],
                     'max_features': ['sqrt',None],
                     'criterion': ['gini','entropy']}]

scores = ['precision', 'recall']

for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()

    clf = GridSearchCV(RandomForestClassifier(), tuned_parameters, cv=5,
                       scoring='%s_macro' % score)
    clf.fit(X_train, y_train)

    print("Best parameters set found on development set:")
    print()
    print(clf.best_params_)
    y_true, y_pred = y_test, clf.predict(X_test)
    print(classification_report(y_true, y_pred))
    print()

# Tuning hyper-parameters for precision



  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  'precision', 'predicted', average, warn_for)
  estimator.fit(X_train, y_train, **fit_params)
  'precision', 'predicted', average, warn_for)
  estimator.fit(X_train, y_train, **fit_pa

Best parameters set found on development set:

{'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'n_estimators': 15}
              precision    recall  f1-score   support

           1       0.93      1.00      0.96        25
           2       1.00      0.90      0.95        40
           3       0.92      1.00      0.96        24

    accuracy                           0.96        89
   macro avg       0.95      0.97      0.96        89
weighted avg       0.96      0.96      0.95        89


# Tuning hyper-parameters for recall



  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

Best parameters set found on development set:

{'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'n_estimators': 10}
              precision    recall  f1-score   support

           1       0.86      1.00      0.93        25
           2       1.00      0.88      0.93        40
           3       0.96      1.00      0.98        24

    accuracy                           0.94        89
   macro avg       0.94      0.96      0.95        89
weighted avg       0.95      0.94      0.94        89




  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  self.best_estimator_.fit(X, y, **fit_params)


In [0]:
########################################################################
# AdaBoost
########################################################################

from sklearn.ensemble import AdaBoostClassifier

# Set the parameters by cross-validation
tuned_parameters = [{'n_estimators': [25,50,100],
                     'learning_rate': [.5,1,2],
                     'algorithm': ['SAMME','SAMME.R'],
                     'random_state': [1,None]}]

scores = ['precision', 'recall']

for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()

    clf = GridSearchCV(AdaBoostClassifier(), tuned_parameters, cv=5,
                       scoring='%s_macro' % score)
    clf.fit(X_train, y_train)

    print("Best parameters set found on development set:")
    print()
    print(clf.best_params_)
    y_true, y_pred = y_test, clf.predict(X_test)
    print(classification_report(y_true, y_pred))
    print()

# Tuning hyper-parameters for precision



  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = colu

Best parameters set found on development set:

{'algorithm': 'SAMME', 'learning_rate': 1, 'n_estimators': 100, 'random_state': 1}
              precision    recall  f1-score   support

           1       0.96      1.00      0.98        25
           2       1.00      0.95      0.97        40
           3       0.96      1.00      0.98        24

    accuracy                           0.98        89
   macro avg       0.97      0.98      0.98        89
weighted avg       0.98      0.98      0.98        89


# Tuning hyper-parameters for recall



  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = colu

Best parameters set found on development set:

{'algorithm': 'SAMME', 'learning_rate': 1, 'n_estimators': 100, 'random_state': 1}
              precision    recall  f1-score   support

           1       0.96      1.00      0.98        25
           2       1.00      0.95      0.97        40
           3       0.96      1.00      0.98        24

    accuracy                           0.98        89
   macro avg       0.97      0.98      0.98        89
weighted avg       0.98      0.98      0.98        89




  y = column_or_1d(y, warn=True)


In [0]:
########################################################################
# Gradient Boost
########################################################################

from sklearn.ensemble import GradientBoostingClassifier

# Set the parameters by cross-validation
tuned_parameters = [{'n_estimators': [25,50,100],
                     'learning_rate': [.1,.01],
                     'loss': ['deviance'],
                     'max_depth': [3,4]}]

scores = ['precision', 'recall']

for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()

    clf = GridSearchCV(GradientBoostingClassifier(), tuned_parameters, cv=5,
                       scoring='%s_macro' % score)
    clf.fit(X_train, y_train)

    print("Best parameters set found on development set:")
    print()
    print(clf.best_params_)
    y_true, y_pred = y_test, clf.predict(X_test)
    print(classification_report(y_true, y_pred))
    print()

# Tuning hyper-parameters for precision



  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = colu

Best parameters set found on development set:

{'learning_rate': 0.01, 'loss': 'deviance', 'max_depth': 3, 'n_estimators': 25}
              precision    recall  f1-score   support

           1       0.86      1.00      0.93        25
           2       0.97      0.85      0.91        40
           3       0.92      0.96      0.94        24

    accuracy                           0.92        89
   macro avg       0.92      0.94      0.92        89
weighted avg       0.93      0.92      0.92        89


# Tuning hyper-parameters for recall



  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = colu

Best parameters set found on development set:

{'learning_rate': 0.1, 'loss': 'deviance', 'max_depth': 3, 'n_estimators': 100}
              precision    recall  f1-score   support

           1       0.88      0.92      0.90        25
           2       0.94      0.85      0.89        40
           3       0.89      1.00      0.94        24

    accuracy                           0.91        89
   macro avg       0.91      0.92      0.91        89
weighted avg       0.91      0.91      0.91        89




  y = column_or_1d(y, warn=True)


In [0]:
########################################################################
# XGBoost 
########################################################################

from xgboost import XGBClassifier

# Set the parameters by cross-validation
tuned_parameters = [{'n_estimators': [50,100,150],
                     'learning_rate': [.1,.01],
                     'min_child_weight': [1,2],
                     'max_delta_step': [0,1]}]

scores = ['precision', 'recall']

for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()

    clf = GridSearchCV(XGBClassifier(), tuned_parameters, cv=5,
                       scoring='%s_macro' % score)
    clf.fit(X_train, y_train)

    print("Best parameters set found on development set:")
    print()
    print(clf.best_params_)
    y_true, y_pred = y_test, clf.predict(X_test)
    print(classification_report(y_true, y_pred))
    print()

# Tuning hyper-parameters for precision



  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Best parameters set found on development set:

{'learning_rate': 0.1, 'max_delta_step': 1, 'min_child_weight': 2, 'n_estimators': 100}
              precision    recall  f1-score   support

           1       0.96      1.00      0.98        25
           2       1.00      0.93      0.96        40
           3       0.92      1.00      0.96        24

    accuracy                           0.97        89
   macro avg       0.96      0.97      0.97        89
weighted avg       0.97      0.97      0.97        89


# Tuning hyper-parameters for recall

Best parameters set found on development set:

{'learning_rate': 0.1, 'max_delta_step': 1, 'min_child_weight': 2, 'n_estimators': 100}
              precision    recall  f1-score   support

           1       0.96      1.00      0.98        25
           2       1.00      0.93      0.96        40
           3       0.92      1.00      0.96        24

    accuracy                           0.97        89
   macro avg       0.96      0.97     

