SIB - P7
A Jupyter Notebook with examples of how to use the voting classifier. 7/11/2022

In [1]:
# imports
from si.io.csv import read_csv
from si.linear_model.logistic_regression import LogisticRegression
from si.model_selection.cross_validate import cross_validate
from si.model_selection.grid_search import grid_search_cv

In [2]:
# datasets
breast_bin_dataset = read_csv('../datasets/breast-bin.csv', features=False, label=True)

In [3]:
# standardization
from sklearn.preprocessing import StandardScaler
breast_bin_dataset.X = StandardScaler().fit_transform(breast_bin_dataset.X)
breast_bin_dataset.X

array([[ 0.20885295, -0.69912815, -0.74242297, ..., -1.0000359 ,
        -0.61132565, -0.34418721],
       [-0.8578253 , -0.69912815, -0.74242297, ..., -0.58991542,
        -0.61132565, -0.34418721],
       [-0.8578253 , -0.69912815, -0.74242297, ..., -0.17979494,
        -0.61132565, -0.34418721],
       ...,
       [ 0.20885295, -0.37139715, -0.40592217, ..., -1.0000359 ,
        -0.61132565,  0.23892607],
       [-0.8578253 , -0.04366616, -0.40592217, ..., -0.17979494,
        -0.61132565, -0.34418721],
       [ 0.91997179,  0.93952681,  0.94008103, ...,  1.46068699,
        -0.61132565, -0.34418721]])

In [4]:
# cross validation
lg = LogisticRegression()
scores = cross_validate(lg, breast_bin_dataset, cv=5)
scores

{'seeds': [668, 155, 473, 195, 111],
 'train': [0.962432915921288,
  0.9660107334525939,
  0.9677996422182469,
  0.9660107334525939,
  0.9660107334525939],
 'test': [0.9928057553956835,
  0.9712230215827338,
  0.9640287769784173,
  0.9712230215827338,
  0.9712230215827338]}

In [5]:
# grid search cv

lg = LogisticRegression()

# parameter grid
parameter_grid = {
    'l2_penalty': (1, 10),
    'alpha': (0.001, 0.0001),
    'max_iter': (1000, 2000)
}

# cross validate the model
scores = grid_search_cv(lg,
                        breast_bin_dataset,
                        parameter_grid=parameter_grid,
                        cv=3)

scores

[{'seeds': [897, 756, 808],
  'train': [0.9785330948121646, 0.9660107334525939, 0.9660107334525939],
  'test': [0.920863309352518, 0.9712230215827338, 0.9712230215827338],
  'parameters': {'l2_penalty': 1, 'alpha': 0.001, 'max_iter': 1000}},
 {'seeds': [968, 155, 473],
  'train': [0.9660107334525939, 0.9660107334525939, 0.9695885509838998],
  'test': [0.9712230215827338, 0.9712230215827338, 0.9640287769784173],
  'parameters': {'l2_penalty': 1, 'alpha': 0.001, 'max_iter': 2000}},
 {'seeds': [195, 111, 897],
  'train': [0.9660107334525939, 0.9660107334525939, 0.9785330948121646],
  'test': [0.9712230215827338, 0.9712230215827338, 0.920863309352518],
  'parameters': {'l2_penalty': 1, 'alpha': 0.0001, 'max_iter': 1000}},
 {'seeds': [756, 808, 968],
  'train': [0.9660107334525939, 0.9660107334525939, 0.9660107334525939],
  'test': [0.9712230215827338, 0.9712230215827338, 0.9712230215827338],
  'parameters': {'l2_penalty': 1, 'alpha': 0.0001, 'max_iter': 2000}},
 {'seeds': [155, 473, 195],
