In [1]:
from sklearn import datasets
from sklearn import metrics
from sklearn.linear_model import LogisticRegression

In [5]:
cancer = datasets.load_breast_cancer()
logistic_regression = LogisticRegression(max_iter=100000)
model = logistic_regression.fit(cancer.data, cancer.target)

print(f'Accuracy: {model.score(cancer.data, cancer.target):.2f}')

Accuracy: 0.96


In [7]:
predictions = model.predict(cancer.data)

print(f'Accuracy: {metrics.accuracy_score(cancer.target, predictions):.2f}')
print(f'ROC AUC: {metrics.roc_auc_score(cancer.target, predictions):.2f}')
print(f'F1: {metrics.f1_score(cancer.target, predictions):.2f}')

Accuracy: 0.96
ROC AUC: 0.95
F1: 0.97


In [8]:
from sklearn.model_selection import train_test_split

In [10]:
X_train, X_test, y_train, y_test = train_test_split(
    cancer.data, cancer.target,
    test_size=0.2, random_state=12)
model = logistic_regression.fit(X_train, y_train)
print(f'Train accuracy: {model.score(X_train, y_train):.2f}')
print(f'Test accuracy: {model.score(X_test, y_test):.2f}')

Train accuracy: 0.96
Test accuracy: 0.94


In [11]:
from sklearn.linear_model import Lasso, Ridge, ElasticNet

In [12]:
boston = datasets.load_boston()

lasso = Lasso()
ridge = Ridge()
elastic = ElasticNet()

In [13]:
for model in [lasso, ridge, elastic]:
    x_train, x_test, y_train, y_test = train_test_split(boston.data, boston.target, test_size=0.2)
    model.fit(x_train, y_train)
    pred = model.predict(x_test)
    print(model.__class__)
    print(f'MSE: {metrics.mean_squared_error(y_test, pred)}')

<class 'sklearn.linear_model._coordinate_descent.Lasso'>
MSE: 19.524304342851448
<class 'sklearn.linear_model._ridge.Ridge'>
MSE: 21.221554762515407
<class 'sklearn.linear_model._coordinate_descent.ElasticNet'>
MSE: 32.023817752664975


In [16]:
print(f'R2: {model.score(x_test, y_test):.2f}')
print(f'R2: {metrics.r2_score(y_test, pred):.2f}')

R2: 0.68
R2: 0.68


## Cross-validation

In [17]:
from sklearn.model_selection import KFold, cross_val_score

In [18]:
iris = datasets.load_iris()
iris.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [20]:
print(iris.DESCR[:475])

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Vi


In [22]:
logistic_regression = LogisticRegression(max_iter=100000)
cv = KFold(n_splits=5)

for split_idx, (train_idx, test_idx) in enumerate(cv.split(iris.data)):
    x_train, x_test = iris.data[train_idx], iris.data[test_idx]
    y_train, y_test = iris.target[train_idx], iris.target[test_idx]
    
    logistic_regression.fit(x_train, y_train)
    score = logistic_regression.score(x_test, y_test)
    print(f'Split {split_idx} Score: {score:.2f}')

Split 0 Score: 1.00
Split 1 Score: 1.00
Split 2 Score: 0.87
Split 3 Score: 0.93
Split 4 Score: 0.83


In [24]:
cv_score = cross_val_score(
    logistic_regression, iris.data, iris.target, scoring='accuracy', cv=cv)

print(f'Cross val score: {cv_score}')
print(f'Mean cross val score: {cv_score.mean():.2f}')

Cross val score: [1.         1.         0.86666667 0.93333333 0.83333333]
Mean cross val score: 0.93
