In [27]:
from si.data.dataset import Dataset
from si.io.CSV import read_csv
from si.model_selection.cross_validate import cross_validate
from si.model_selection.grid_search import grid_search_cv
from si.model_selection.randomized_search import randomized_search_cv
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd

#Models
from si.linear_model.logistic_regression import LogisticRegression

# Cross validation

In [28]:
#Importing data
data= read_csv("D:/Mestrado/2ano/1semestre/SIB/si/datasets/breast/breast-bin.data", ",", False, True)
data.X = StandardScaler().fit_transform(data.X)

In [29]:
model = LogisticRegression()
scores = cross_validate(model, data, cv=5, test_size=0.3)
print(scores)

{'seeds': [132, 390, 152, 325, 200], 'train': [0.9754601226993865, 0.9631901840490797, 0.9713701431492843, 0.9611451942740287, 0.967280163599182], 'test': [0.9754601226993865, 0.9631901840490797, 0.9713701431492843, 0.9611451942740287, 0.967280163599182]}


# Grid search

In [30]:
model = LogisticRegression()

parameters = {"l2_penalty": [1, 10],
              "alpha": [0.001, 0.0001],
              "max_iter": [1000, 2000]}

scores = grid_search_cv(model, data, parameters, cv=3, test_size=0.3)

for elem in scores:
    print("\n|\n")
    print("\nScores:\n-------")
    print(elem)



|


Scores:
-------
{'seeds': [928, 21, 11], 'train': [0.967280163599182, 0.9693251533742331, 0.9631901840490797], 'test': [0.967280163599182, 0.9693251533742331, 0.9631901840490797], 'parameters': {'l2_penalty': 1, 'alpha': 0.001, 'max_iter': 1000}}

|


Scores:
-------
{'seeds': [653, 805, 353], 'train': [0.9693251533742331, 0.9591002044989775, 0.967280163599182], 'test': [0.9693251533742331, 0.9591002044989775, 0.967280163599182], 'parameters': {'l2_penalty': 1, 'alpha': 0.001, 'max_iter': 2000}}

|


Scores:
-------
{'seeds': [980, 292, 701], 'train': [0.9570552147239264, 0.9713701431492843, 0.9591002044989775], 'test': [0.9570552147239264, 0.9713701431492843, 0.9591002044989775], 'parameters': {'l2_penalty': 1, 'alpha': 0.0001, 'max_iter': 1000}}

|


Scores:
-------
{'seeds': [651, 961, 45], 'train': [0.9754601226993865, 0.9631901840490797, 0.9693251533742331], 'test': [0.9754601226993865, 0.9631901840490797, 0.9693251533742331], 'parameters': {'l2_penalty': 1, 'alpha': 0.0001, 

# Randomized search

In [31]:
model = LogisticRegression()

parameters = {"l2_penalty": np.linspace(1,10,10).astype(int),
              "alpha": np.linspace(0.001, 0.0001, 100),
              "max_iter": np.linspace(1000, 2000, 200).astype(int)}

scores = randomized_search_cv(model, data, parameters, cv=3, n_iter=10, test_size=0.3)

#print(scores)

for elem in scores:
    print("\n|\n")
    print("\nScores:\n-------")
    print(elem)


|

Parameters:
-----------
{'l2_penalty': 9, 'alpha': 0.00016363636363636363, 'max_iter': 1135}

Scores:
-------
{'seeds': [677, 985, 652], 'train': [0.9652351738241309, 0.9734151329243353, 0.9754601226993865], 'test': [0.9652351738241309, 0.9734151329243353, 0.9754601226993865], 'parameters': {'l2_penalty': 9, 'alpha': 0.00016363636363636363, 'max_iter': 1135}}

|

Parameters:
-----------
{'l2_penalty': 5, 'alpha': 0.0006090909090909092, 'max_iter': 1984}

Scores:
-------
{'seeds': [831, 851, 433], 'train': [0.967280163599182, 0.9631901840490797, 0.9591002044989775], 'test': [0.967280163599182, 0.9631901840490797, 0.9591002044989775], 'parameters': {'l2_penalty': 5, 'alpha': 0.0006090909090909092, 'max_iter': 1984}}

|

Parameters:
-----------
{'l2_penalty': 10, 'alpha': 0.0008818181818181819, 'max_iter': 1321}

Scores:
-------
{'seeds': [524, 1, 468], 'train': [0.9754601226993865, 0.9775051124744376, 0.9652351738241309], 'test': [0.9754601226993865, 0.9775051124744376, 0.96523517382