In [1]:
from si.data.dataset import Dataset
from si.io.CSV import read_csv
from si.model_selection.cross_validate import cross_validate
from si.model_selection.grid_search import grid_search_cv
from si.model_selection.randomized_search import randomized_search_cv
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd

#Models
from si.linear_model.logistic_regression import LogisticRegression

[0.73105858 0.88079708 0.95257413]


# Cross validation

In [2]:
#Importing data
data= read_csv("D:/Mestrado/2ano/1semestre/SIB/si/datasets/breast/breast-bin.data", ",", False, True)
data.X = StandardScaler().fit_transform(data.X)

In [3]:
model = LogisticRegression()
scores = cross_validate(model, data, cv=5, test_size=0.3)

#print(scores)
scores_dataframe = pd.DataFrame(scores)
print(scores_dataframe)

   seeds     train      test
0    114  0.969325  0.969325
1    372  0.969325  0.969325
2    496  0.969325  0.969325
3    846  0.967280  0.967280
4    937  0.969325  0.969325


# Grid search

In [4]:
model = LogisticRegression()

parameters = {"l2_penalty": [1, 10],
              "alpha": [0.001, 0.0001],
              "max_iter": [1000, 2000]}

scores = grid_search_cv(model, data, parameters, cv=3, test_size=0.3)

#print(scores)

for elem in scores:
    print("\n|\n")
    print("Parameters:\n-----------")
    print(elem["parameters"])
    print("\nScores:\n-------")
    print(pd.DataFrame({k:v for k,v in elem.items() if k!="parameters"}))
#scores_dataframe = pd.DataFrame(scores)
#print(scores_dataframe)


|

Parameters:
-----------
{'l2_penalty': 1, 'alpha': 0.001, 'max_iter': 1000}

Scores:
-------
   seeds     train      test
0    632  0.961145  0.961145
1    813  0.955010  0.955010
2     27  0.965235  0.965235

|

Parameters:
-----------
{'l2_penalty': 1, 'alpha': 0.001, 'max_iter': 2000}

Scores:
-------
   seeds     train      test
0    779  0.967280  0.967280
1    898  0.963190  0.963190
2    672  0.969325  0.969325

|

Parameters:
-----------
{'l2_penalty': 1, 'alpha': 0.0001, 'max_iter': 1000}

Scores:
-------
   seeds     train      test
0    986  0.967280  0.967280
1    358  0.963190  0.963190
2    662  0.969325  0.969325

|

Parameters:
-----------
{'l2_penalty': 1, 'alpha': 0.0001, 'max_iter': 2000}

Scores:
-------
   seeds     train      test
0    509  0.973415  0.973415
1    441  0.965235  0.965235
2    471  0.973415  0.973415

|

Parameters:
-----------
{'l2_penalty': 10, 'alpha': 0.001, 'max_iter': 1000}

Scores:
-------
   seeds     train      test
0    325  0.961145 

# Randomized search

In [5]:
model = LogisticRegression()

parameters = {"l2_penalty": np.linspace(1,10,10).astype(int),
              "alpha": np.linspace(0.001, 0.0001, 100),
              "max_iter": np.linspace(1000, 2000, 200).astype(int)}

scores = randomized_search_cv(model, data, parameters, cv=3, n_iter=10, test_size=0.3)

#print(scores)

for elem in scores:
    print("\n|\n")
    print("Parameters:\n-----------")
    print(elem["parameters"])
    print("\nScores:\n-------")
    print(pd.DataFrame({k:v for k,v in elem.items() if k!="parameters"}))


|

Parameters:
-----------
{'l2_penalty': 2, 'alpha': 0.0002818181818181818, 'max_iter': 1804}

Scores:
-------
   seeds     train      test
0    866  0.965235  0.965235
1    913  0.973415  0.973415
2    833  0.963190  0.963190

|

Parameters:
-----------
{'l2_penalty': 8, 'alpha': 0.00030909090909090914, 'max_iter': 1603}

Scores:
-------
   seeds     train      test
0    881  0.963190  0.963190
1    334  0.967280  0.967280
2    430  0.969325  0.969325

|

Parameters:
-----------
{'l2_penalty': 2, 'alpha': 0.0005363636363636364, 'max_iter': 1613}

Scores:
-------
   seeds     train      test
0    606  0.961145  0.961145
1    251  0.965235  0.965235
2    364  0.955010  0.955010

|

Parameters:
-----------
{'l2_penalty': 6, 'alpha': 0.00016363636363636363, 'max_iter': 1587}

Scores:
-------
   seeds     train      test
0    215  0.967280  0.967280
1    438  0.965235  0.965235
2    345  0.963190  0.963190

|

Parameters:
-----------
{'l2_penalty': 8, 'alpha': 0.00025454545454545456, 'ma