In [None]:
import numpy as np

from sklearn.datasets import load_breast_cancer

data = load_breast_cancer()

X = data.data
y = data.target

In [None]:
print(data.DESCR)

In [None]:
from sklearn.model_selection import ShuffleSplit
ss = ShuffleSplit(n_splits=1, 
                  train_size=0.8, 
                  test_size=0.2, 
                  random_state=0)

train_index, test_index = next(ss.split(X, y))

X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]

In [None]:
from sklearn import linear_model
clf = linear_model.LogisticRegression()

In [None]:
clf.fit(X_train, y_train)

In [None]:
clf.score(X_test, y_test)

In [None]:
y_test

In [None]:
y_pred = clf.predict(X_test)

In [None]:
conf_mat = np.zeros( [2, 2] )

for true_label, est_label in zip(y_test, y_pred):
    conf_mat[true_label, est_label] += 1

In [None]:
print(conf_mat)

In [None]:
import pandas as pd
df = pd.DataFrame(conf_mat, 
                  columns=["pred 0", "pred 1"], 
                  index=["true 0", "true 1"])

In [None]:
print(df)

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix


In [None]:
accuracy_score(y_test, y_pred)

In [None]:
cmat = confusion_matrix(y_test, y_pred)
cmat

In [None]:
TP = cmat[0,0] # true positive 真陽性
TP

In [None]:
TN = cmat[1,1] # true negative 真陰性
TN

In [None]:
FP = cmat[1,0] # false positive 偽陽性
FP

In [None]:
FN = cmat[0,1] # false negative 偽陰性
FN

10 class problem

In [None]:
from sklearn.datasets import load_digits
data = load_digits()

X = data.data
y = data.target

In [None]:
img = data.images

In [None]:
X[0].shape, img[0].shape

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.gray()
plt.imshow(img[0], interpolation='none')
plt.axis('off');

In [None]:
for i in range(10):
    i_th_digit = data.images[data.target == i]
    for j in range(0,15):
        plt.subplot(10, 15, i * 15 + j +1)
        plt.axis('off')
        plt.imshow(i_th_digit[j], interpolation='none')

In [None]:
from sklearn.model_selection import ShuffleSplit
ss = ShuffleSplit(n_splits=1,
                  train_size=0.8,
                  test_size=0.2, 
                  random_state=0)

train_index, test_index = next(ss.split(X, y))

X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]

In [None]:
clf.fit(X_train, y_train)

In [None]:
clf.score(X_test, y_test)

In [None]:
y_pred = clf.predict(X_test)

In [None]:
accuracy_score(y_test, y_pred)

In [None]:
conf_mat = confusion_matrix(y_test, y_pred)

df = pd.DataFrame(conf_mat, 
                  columns=range(0,10), 
                  index=range(0,10))
print(df)

In [None]:
from sklearn.decomposition import PCA
pca = PCA(whiten=True)
pca.fit(X_train)

X_train_pca = pca.transform(X_train)
X_test_pca  = pca.transform(X_test)

In [None]:
for i in range(10):
    i_th_digit = X_train_pca[y_train == i]
    for j in range(0,15):
        plt.subplot(10, 15, i * 15 + j +1)
        plt.axis('off')
        plt.imshow(i_th_digit[j].reshape(8,8), interpolation='none')

In [None]:
clf.fit(X_train_pca, y_train)
clf.score(X_test_pca, y_test)

In [None]:
y_pred_pca = clf.predict(X_test_pca)
conf_mat = confusion_matrix(y_test, y_pred_pca)

df = pd.DataFrame(conf_mat, 
                  columns=range(0,10), 
                  index=range(0,10))
print(df)

In [None]:
X_train_zca = X_train_pca.dot(pca.components_)
X_test_zca  = X_test_pca.dot(pca.components_)

In [None]:
for i in range(10):
    i_th_digit = X_train_zca[y_train == i]
    for j in range(0,15):
        plt.subplot(10, 15, i * 15 + j +1)
        plt.axis('off')
        plt.imshow(i_th_digit[j].reshape(8,8), interpolation='none')

In [None]:
clf.fit(X_train_zca, y_train)
clf.score(X_test_zca, y_test)

In [None]:
y_pred_zca = clf.predict(X_test_zca)
conf_mat = confusion_matrix(y_test, y_pred_zca)

df = pd.DataFrame(conf_mat, 
                  columns=range(0,10), 
                  index=range(0,10))
print(df)

In [None]:
scores = []
for i in range(1,65):
    clf.fit(X_train_pca[:, 0:i], y_train)
    score = clf.score(X_test_pca[:, 0:i], y_test)
    print(i, score)
    scores.append(score)
scores = np.array(scores)

In [None]:
plt.plot(scores)
plt.ylim(0.9, 1)