## Regularization 實作

## Load data

In [None]:
import pandas as pd
from sklearn import datasets
import seaborn as sns

In [None]:
data = datasets.load_breast_cancer()
data

## Format data

In [None]:
cancer = pd.DataFrame(data["data"], columns=data["feature_names"])
cancer["target"] = data["target"]
cancer

## Training

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
X = cancer.iloc[:, 0:29].values
y = cancer["target"].values

## L2 regularization

In [None]:
model1 = LogisticRegression(C=0.1)
model2 = LogisticRegression(C=1.0)
model3 = LogisticRegression(C=10.0)

In [None]:
model1.fit(X=X, y=y)
model2.fit(X=X, y=y)
model3.fit(X=X, y=y)

## Mean accuracy

In [None]:
model1.score(X=X, y=y)

In [None]:
model2.score(X=X, y=y)

In [None]:
model3.score(X=X, y=y)

## Coefficients

In [None]:
coefs = pd.DataFrame({"column": cancer.columns[0:29], "model1": model1.coef_[0],
                      "model2": model2.coef_[0], "model3": model3.coef_[0]})

In [None]:
coefs

In [None]:
sns.distplot(coefs["model1"])

In [None]:
sns.distplot(coefs["model2"])

In [None]:
sns.distplot(coefs["model3"])

In [None]:
settings = {"histtype": "step", "linewidth": 3, "alpha": 0.8}
sns.distplot(coefs["model1"], kde=False, hist_kws=settings)
sns.distplot(coefs["model2"], kde=False, hist_kws=settings)
sns.distplot(coefs["model3"], kde=False, hist_kws=settings)

In [None]:
coefs = coefs.set_index("column")

In [None]:
sns.heatmap(coefs, vmin=-1, vmax=1.2, cmap="RdBu_r")

## Prediction

In [None]:
predict1 = model1.predict(X=X)
predict2 = model2.predict(X=X)
predict3 = model3.predict(X=X)

## Accuracy, precision and recall

In [None]:
from sklearn import metrics

In [None]:
metrics.accuracy_score(y, predict1)

In [None]:
metrics.accuracy_score(y, predict2)

In [None]:
metrics.accuracy_score(y, predict3)

In [None]:
metrics.precision_score(y, predict1)

In [None]:
metrics.precision_score(y, predict2)

In [None]:
metrics.precision_score(y, predict3)

In [None]:
metrics.recall_score(y, predict1)

In [None]:
metrics.recall_score(y, predict2)

In [None]:
metrics.recall_score(y, predict3)

## L1 regularization

In [None]:
model1 = LogisticRegression(penalty="l1", C=0.1, solver="liblinear")
model2 = LogisticRegression(penalty="l1", C=1.0, solver="liblinear")
model3 = LogisticRegression(penalty="l1", C=10.0, solver="liblinear")

In [None]:
model1.fit(X=X, y=y)
model2.fit(X=X, y=y)
model3.fit(X=X, y=y)

## Coefficients

In [None]:
coefs = pd.DataFrame({"column": cancer.columns[0:29], "model1": model1.coef_[0],
                      "model2": model2.coef_[0], "model3": model3.coef_[0]})

In [None]:
coefs

In [None]:
sns.distplot(coefs["model1"], kde=False)

In [None]:
sns.distplot(coefs["model2"], kde=False)

In [None]:
sns.distplot(coefs["model3"], kde=False)

In [None]:
settings = {"histtype": "step", "linewidth": 3, "alpha": 0.8}
sns.distplot(coefs["model1"], kde=False, hist_kws=settings)
sns.distplot(coefs["model2"], kde=False, hist_kws=settings)
sns.distplot(coefs["model3"], kde=False, hist_kws=settings)

In [None]:
coefs = coefs.set_index("column")

In [None]:
sns.heatmap(coefs, vmin=-1, vmax=1.2, cmap="RdBu_r")