# 9. MNIST 데이터셋에 SVM 분류기 OvR 전략을 사용해 훈련

In [1]:
from sklearn.datasets import fetch_openml
from matplotlib import pyplot as plt
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.multiclass import OneVsRestClassifier
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from scipy.stats import reciprocal, uniform
from sklearn.metrics import accuracy_score

In [2]:
mnist = fetch_openml('mnist_784', version=1, as_frame=False)
train = mnist['data']
test = mnist['target']

# 학습, 테스트 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(train, test, train_size=0.8, random_state=42)

In [3]:
scaler = StandardScaler()
Scaled_X_train = scaler.fit_transform(X_train)
Scaled_X_test = scaler.transform(X_test)

In [4]:
svm_clf = SVC(gamma="0.005", C=5)
ovr_clf = OneVsRestClassifier(svm_clf)
param_distribution = {
    "gamma": reciprocal(0.001, 0.1),
    "C": uniform(1, 10)
}
random_search = RandomizedSearchCV(svm_clf, param_distribution, n_iter=10, verbose=2, cv=3, random_state=42)
random_search.fit(Scaled_X_train[:5000], y_train[:5000])

Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] END .....C=4.745401188473625, gamma=0.07969454818643935; total time=  24.1s
[CV] END .....C=4.745401188473625, gamma=0.07969454818643935; total time=  24.5s
[CV] END .....C=4.745401188473625, gamma=0.07969454818643935; total time=  13.7s
[CV] END .....C=8.31993941811405, gamma=0.015751320499779727; total time=  11.9s
[CV] END .....C=8.31993941811405, gamma=0.015751320499779727; total time=  12.5s
[CV] END .....C=8.31993941811405, gamma=0.015751320499779727; total time=   9.5s
[CV] END ....C=2.560186404424365, gamma=0.002051110418843397; total time=   4.6s
[CV] END ....C=2.560186404424365, gamma=0.002051110418843397; total time=   4.4s
[CV] END ....C=2.560186404424365, gamma=0.002051110418843397; total time=   5.0s
[CV] END ....C=1.5808361216819946, gamma=0.05399484409787434; total time=  11.4s
[CV] END ....C=1.5808361216819946, gamma=0.05399484409787434; total time=  13.9s
[CV] END ....C=1.5808361216819946, gamma=0.05399

In [5]:
y_pred = random_search.best_estimator_.predict(Scaled_X_test)
acc = accuracy_score(y_test, y_pred)
print(acc)

0.9274285714285714


# 10. 캘리포니아 주택 가격 데이터셋에 SVM 회귀를 훈련

In [3]:
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.svm import SVR
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import reciprocal, uniform

In [4]:
housing = fetch_california_housing()
train = housing.data
test = housing.target

# 학습, 테스트 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(train, test, train_size=0.8, random_state=42)

In [9]:
housing.feature_names

['MedInc',
 'HouseAge',
 'AveRooms',
 'AveBedrms',
 'Population',
 'AveOccup',
 'Latitude',
 'Longitude']

In [11]:
train[0]

array([   8.3252    ,   41.        ,    6.98412698,    1.02380952,
        322.        ,    2.55555556,   37.88      , -122.23      ])

In [12]:
scaler = StandardScaler()
Scaled_X_train = scaler.fit_transform(X_train)
Scaled_X_test = scaler.transform(X_test)

In [13]:
param_distribution = {
    "gamma": reciprocal(0.001, 0.1),
    "C": uniform(1, 10)
}
random_search = RandomizedSearchCV(SVR(), param_distribution, n_iter=10, verbose=2, cv=3, random_state=42)
random_search.fit(Scaled_X_train, y_train)

Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] END .....C=4.745401188473625, gamma=0.07969454818643935; total time=   9.0s
[CV] END .....C=4.745401188473625, gamma=0.07969454818643935; total time=   8.5s
[CV] END .....C=4.745401188473625, gamma=0.07969454818643935; total time=   8.4s
[CV] END .....C=8.31993941811405, gamma=0.015751320499779727; total time=   7.2s
[CV] END .....C=8.31993941811405, gamma=0.015751320499779727; total time=   8.0s
[CV] END .....C=8.31993941811405, gamma=0.015751320499779727; total time=   8.5s
[CV] END ....C=2.560186404424365, gamma=0.002051110418843397; total time=   6.6s
[CV] END ....C=2.560186404424365, gamma=0.002051110418843397; total time=   7.6s
[CV] END ....C=2.560186404424365, gamma=0.002051110418843397; total time=   7.0s
[CV] END ....C=1.5808361216819946, gamma=0.05399484409787434; total time=   7.2s
[CV] END ....C=1.5808361216819946, gamma=0.05399484409787434; total time=   8.2s
[CV] END ....C=1.5808361216819946, gamma=0.05399

In [14]:
y_pred = random_search.best_estimator_.predict(Scaled_X_test)
mse = mean_squared_error(y_test, y_pred)
print(mse)

NameError: name 'acc' is not defined