## Regularization 實作

## Load data

In [None]:
import pandas as pd
from sklearn import datasets
import seaborn as sns

In [None]:
data = datasets.load_breast_cancer()
data

## Format data

In [None]:
cancer = pd.DataFrame(data["data"], columns=data["feature_names"])
cancer["target"] = data["target"]
cancer

## Training

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

In [None]:
X = cancer.iloc[:, 0:29].values
y = cancer["target"].values

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)

## L2 regularization

In [None]:
model1 = LogisticRegression(C=0.001)
model2 = LogisticRegression(C=1.0)
model3 = LogisticRegression(C=1000.0)

In [None]:
model1.fit(X=X_train, y=y_train)
model2.fit(X=X_train, y=y_train)
model3.fit(X=X_train, y=y_train)

## Mean accuracy

In [None]:
model1.score(X=X_test, y=y_test)

In [None]:
model2.score(X=X_test, y=y_test)

In [None]:
model3.score(X=X_test, y=y_test)

## Coefficients

In [None]:
coefs = pd.DataFrame({"column": cancer.columns[0:29], "model1": model1.coef_[0],
                      "model2": model2.coef_[0], "model3": model3.coef_[0]})

In [None]:
coefs

In [None]:
sns.histplot(coefs["model1"], kde=True)

In [None]:
sns.histplot(coefs["model2"], kde=True)

In [None]:
sns.histplot(coefs["model3"], kde=True)

In [None]:
settings = {"histtype": "step", "linewidth": 3, "alpha": 0.8}
sns.distplot(coefs["model1"], kde=False, hist_kws=settings)
sns.distplot(coefs["model2"], kde=False, hist_kws=settings)
sns.distplot(coefs["model3"], kde=False, hist_kws=settings)

In [None]:
coefs = coefs.set_index("column")

In [None]:
sns.heatmap(coefs, vmin=-1, vmax=1.2, cmap="RdBu_r")

## Prediction

In [None]:
predict1 = model1.predict(X=X_test)
predict2 = model2.predict(X=X_test)
predict3 = model3.predict(X=X_test)

## Accuracy, precision and recall

In [None]:
from sklearn import metrics

In [None]:
metrics.accuracy_score(y_test, predict1)

In [None]:
metrics.accuracy_score(y_test, predict2)

In [None]:
metrics.accuracy_score(y_test, predict3)

In [None]:
metrics.precision_score(y_test, predict1)

In [None]:
metrics.precision_score(y_test, predict2)

In [None]:
metrics.precision_score(y_test, predict3)

In [None]:
metrics.recall_score(y_test, predict1)

In [None]:
metrics.recall_score(y_test, predict2)

In [None]:
metrics.recall_score(y_test, predict3)

## L1 regularization

In [None]:
model1 = LogisticRegression(penalty="l1", C=0.01, solver="liblinear")
model2 = LogisticRegression(penalty="l1", C=1.0, solver="liblinear")
model3 = LogisticRegression(penalty="l1", C=100.0, solver="liblinear")

In [None]:
model1.fit(X=X_train, y=y_train)
model2.fit(X=X_train, y=y_train)
model3.fit(X=X_train, y=y_train)

## Coefficients

In [None]:
coefs = pd.DataFrame({"column": cancer.columns[0:29], "model1": model1.coef_[0],
                      "model2": model2.coef_[0], "model3": model3.coef_[0]})

In [None]:
coefs

In [None]:
sns.distplot(coefs["model1"], kde=False)

In [None]:
sns.distplot(coefs["model2"], kde=False)

In [None]:
sns.distplot(coefs["model3"], kde=False)

In [None]:
settings = {"element": "step", "linewidth": 3, "alpha": 0.8}
sns.distplot(coefs["model1"], kde=False, hist_kws=settings)
sns.distplot(coefs["model2"], kde=False, hist_kws=settings)
sns.distplot(coefs["model3"], kde=False, hist_kws=settings)

In [None]:
coefs = coefs.set_index("column")

In [None]:
sns.heatmap(coefs, vmin=-1, vmax=1.2, cmap="RdBu_r")