# 비정형 빅데이터 응용과 실습 - Week 05
Scikit Learn 라이브러리를 활용한 Digit Recogntion을 연습합니다
- https://scikit-learn.org/stable/auto_examples/classification/plot_digits_classification.html
____

In [None]:
import sys
!{sys.executable} -m pip install tensorflow

## 1. Dataset 다운로드

In [None]:
import tensorflow as tf
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data()

In [None]:
sample_size = 1000
train_images = train_images[:sample_size]
train_labels = train_labels[:sample_size]

## 2. 관련 라이브러리 import

In [None]:

# Standard scientific Python imports
import matplotlib.pyplot as plt

# Import datasets, classifiers and performance metrics
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

In [None]:
_, axes = plt.subplots(1, 4)
images_and_labels = list(zip(train_images, train_labels))

for ax, (image, label) in zip(axes, images_and_labels[:4]):
    ax.set_axis_off()
    ax.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')
    ax.set_title('Training: %i' % label)

## 3. 모델 훈련

In [None]:
classifier = LogisticRegression(max_iter=10000)
X_train, X_test, y_train, y_test =\
    train_images.reshape((len(train_images), -1)), test_images.reshape((len(test_images), -1)), \
    train_labels, test_labels

classifier.fit(X_train, y_train)

## 4. 모델 검증

In [None]:
predicted = classifier.predict(X_test)
_, axes = plt.subplots(1, 4)
images_and_predictions = list(zip(test_images, predicted))
for ax, (image, prediction) in zip(axes, images_and_predictions[:4]):
    ax.set_axis_off()
    ax.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')
    ax.set_title('Prediction: %i' % prediction)

print("Classification report for classifier %s:\n%s\n"
      % (classifier, metrics.classification_report(y_test, predicted)))
disp = metrics.plot_confusion_matrix(classifier, X_test, y_test)
disp.figure_.suptitle("Confusion Matrix")
print("Confusion matrix:\n%s" % disp.confusion_matrix)

plt.show()

## 5. 모델 저장

In [None]:
from joblib import dump, load
dump(classifier, 'my_digit_model.joblib') 


## 6. 저장 확인

In [None]:
clf = load('my_digit_model.joblib') 
print(clf)

In [None]:
y_test[:10]

In [None]:
X_test.shape

In [None]:
clf.predict(X_test[:10])