#### 서포트벡터머신 SVM 
- classification
- data :  breast-cancer-wisconsin.csv
- 유방암 데이터 정상, 환자 에측

In [2]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
data = pd.read_csv('breast-cancer-wisconsin.csv')
X = data[data.columns[1:10]]
y = data[['Class']]

# train, test 나누기
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

# 정규화
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(X_train)
X_scaled_train = scaler.transform(X_train)
X_scaled_test = scaler.transform(X_test)

In [4]:
# 모델적용
from sklearn.svm import SVC
model = SVC() # 기본설정 하이퍼파라미터 C=1
model.fit(X_scaled_train, y_train)
pred_train = model.predict(X_scaled_train)
model.score(X_scaled_train, y_train)

0.984375

In [6]:
from sklearn.metrics import confusion_matrix
confusion_train = confusion_matrix(y_train, pred_train)
print('train data 오차행렬 : \n', confusion_train)

train data 오차행렬 : 
 [[329   4]
 [  4 175]]


In [10]:
from sklearn.metrics import classification_report
cfreport_train = classification_report(y_train, pred_train)
print('train data 분류예측레포트:\n' ,cfreport_train)

train data 분류예측레포트:
               precision    recall  f1-score   support

           0       0.99      0.99      0.99       333
           1       0.98      0.98      0.98       179

    accuracy                           0.98       512
   macro avg       0.98      0.98      0.98       512
weighted avg       0.98      0.98      0.98       512



In [11]:
pred_test = model.predict(X_scaled_test)
model.score(X_scaled_test, y_test)

0.9649122807017544

In [13]:
confusion_test = confusion_matrix(y_test, pred_test)
print('test data 오차행렬:\n', confusion_test)

test data 오차행렬:
 [[106   5]
 [  1  59]]


In [15]:
cfreport_test = classification_report(y_test, pred_test)
print('testdata 분류에츨레포트:\n', cfreport_test)

testdata 분류에츨레포트:
               precision    recall  f1-score   support

           0       0.99      0.95      0.97       111
           1       0.92      0.98      0.95        60

    accuracy                           0.96       171
   macro avg       0.96      0.97      0.96       171
weighted avg       0.97      0.96      0.97       171



In [16]:
# Grid Search
param_grid = {'kernel': ['rbf'],
             'C': [0.001,0.01,0.1,1,10,100],
             'gamma': [0.001,0.01,0.1,1,10,100]}
from sklearn.model_selection import GridSearchCV
grid_search = GridSearchCV(SVC(),param_grid, cv=5)
grid_search.fit(X_scaled_train, y_train)

GridSearchCV(cv=5, estimator=SVC(),
             param_grid={'C': [0.001, 0.01, 0.1, 1, 10, 100],
                         'gamma': [0.001, 0.01, 0.1, 1, 10, 100],
                         'kernel': ['rbf']})

In [19]:
print("Best Parameter: {}".format(grid_search.best_params_))
print("Best socre {:.4f}".format(grid_search.best_score_))
print("TestSet score {:.4f}".format(grid_search.score(X_scaled_test, y_test)))

Best Parameter: {'C': 100, 'gamma': 0.01, 'kernel': 'rbf'}
Best socre 0.9746
TestSet score 0.9591


In [20]:
# Random Search
from scipy.stats import randint
param_distribs = {'kernel': ['rbf'],
                 'C': randint(low=0.001, high=100),
                 'gamma': randint(low=0.001, high=100)}
from sklearn.model_selection import RandomizedSearchCV
random_search = RandomizedSearchCV(SVC(),param_distributions=param_distribs,
                                  n_iter=100, cv=5)
random_search.fit(X_scaled_train, y_train)

RandomizedSearchCV(cv=5, estimator=SVC(), n_iter=100,
                   param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7fb5301f1e10>,
                                        'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7fb5301f1f90>,
                                        'kernel': ['rbf']})

In [21]:
print("Best Parameter: {}".format(random_search.best_params_))
print("Best Score: {:.4f}".format(random_search.best_score_))
print("Testset Score: {:.4f}".format(random_search.score(X_scaled_test,y_test)))

Best Parameter: {'C': 18, 'gamma': 1, 'kernel': 'rbf'}
Best Score: 0.9628
Testset Score: 0.9532
