## SVR 
<b>회귀의 경우</b>
> sklearn.svm.SVR(...)

<br>
- kernel : 알고리즘에서 사용할 커널 유형 (선형, 폴리곤, 방사형, 시그모이드)<br>
&emsp;&emsp;&emsp;&emsp;({‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’}, default=’rbf’)<br>
- degree : kernel이 ploy일 때 그 차수 (int, default=3)<br>
- gamma : 결정 경계의 곡률로 kernel이 'rbf', 'poly', 'sigmoid'일 때 튜닝 ({‘scale’, ‘auto’} or float, default=’scale’)<br>
- C : 데이터 샘플들이 다른 클래스에 놓이는 것을 허용하는 정도 (float, default=1.0)

### 장점

- 범주나 수치 예측 문제에 사용 가능
- 오류 데이터에 대한 영향이 없음
- 과적합되는 경우가 적음
- 신경망에 비해 사용하기 쉬움

<br>

### 단점

- 여러 개의 조합 테스트 필요
- 학습 속도가 느림
- 해석이 어렵고 복잡한 모델


<b>[예제]</b><br>kc_house_data 데이터의 train 데이터로 price를 예측하는 SVR 모델 만들어라

In [7]:
import pandas as pd 
df = pd.read_csv('C:/adp/data/kc_house_data.csv')
df = df.drop(['id','date'], axis=1)
df = df[:1000]

X = df.drop('price',axis=1)
y = df['price']

X = pd.get_dummies(data = X, columns=['waterfront'])

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3, random_state=2021)

In [8]:
X_train.tail()

Unnamed: 0,bedrooms,bathrooms,sqft_living,sqft_lot,floors,view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,sqft_living15,sqft_lot15,waterfront_river_view,waterfront_standard
621,3,2.5,2060,5750,1.0,0,3,7,1330,730,1976,0,1920,5750,0,1
128,5,3.5,2320,4960,2.0,0,5,7,1720,600,1926,0,1700,4960,0,1
57,3,2.5,2420,4750,2.0,0,3,8,2420,0,2003,0,2690,4750,0,1
341,4,3.5,3040,8414,2.0,0,3,9,2420,620,2010,0,3470,8066,0,1
116,3,3.5,1590,1102,3.0,0,3,8,1590,0,2010,0,1620,3166,0,1


In [9]:
# 정규화
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler() 
X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train),columns=X_train.columns)
X_test_scaled = pd.DataFrame(scaler.transform(X_test),columns=X_train.columns)

In [10]:
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV

# 테스트하고자 하는 파라미터 값들을 사전타입으로 정의

# param_grid = [
#     {'kernel': ['linear'], 'C': [10, 30, 100, 300, 1000,10000]},
#     {'kernel': ['rbf'], 'C': [1, 3, 10, 30, 100, 300],
#                         'gamma': [0.03, 0.1, 0.3, 1.0, 3.0]},
# ]

param_grid = [
    {'kernel': ['linear'], 'C': [10, 30, 100, 300, 1000]},
    {'kernel': ['rbf'], 'C': [1, 3, 10, 30, 100, 300],
                        'gamma': [0.03, 0.1, 0.3, 1.0]},
]

grid_svm = GridSearchCV(SVR(), param_grid =param_grid, cv = 5)
grid_svm.fit(X_train_scaled, y_train)

result = pd.DataFrame(grid_svm.cv_results_['params'])
result['mean_test_score'] = grid_svm.cv_results_['mean_test_score']
result.sort_values(by='mean_test_score', ascending=False)

Unnamed: 0,C,kernel,gamma,mean_test_score
4,1000,linear,,0.55995
3,300,linear,,0.470288
2,100,linear,,0.301292
1,30,linear,,0.075465
0,10,linear,,-0.020733
25,300,rbf,0.03,-0.04531
26,300,rbf,0.1,-0.0518
27,300,rbf,0.3,-0.071668
21,100,rbf,0.03,-0.071987
22,100,rbf,0.1,-0.074404


In [11]:
print("Best Parameters:\n", grid_svm.best_params_)
print("Best Estimators:\n", grid_svm.best_estimator_)

Best Parameters:
 {'C': 1000, 'kernel': 'linear'}
Best Estimators:
 SVR(C=1000, kernel='linear')


In [12]:
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV

# 테스트하고자 하는 파라미터 값들을 사전타입으로 정의

param_grid = [
    { 'C': [10000,100000],'gamma': [0.001, 0.01, 0.1]}
]

grid_svm = GridSearchCV(SVR(), param_grid =param_grid, cv = 5)
grid_svm.fit(X_train_scaled, y_train)

result = pd.DataFrame(grid_svm.cv_results_['params'])
result['mean_test_score'] = grid_svm.cv_results_['mean_test_score']
result.sort_values(by='mean_test_score', ascending=False)

Unnamed: 0,C,gamma,mean_test_score
4,100000,0.01,0.559529
5,100000,0.1,0.45784
3,100000,0.001,0.408848
1,10000,0.01,0.334372
2,10000,0.1,0.280932
0,10000,0.001,0.025947


In [13]:
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV

# 테스트하고자 하는 파라미터 값들을 사전타입으로 정의

param_grid = [
    { 'C': [10000000,100000000],'gamma': [0.001, 0.01]}
]

grid_svm = GridSearchCV(SVR(), param_grid =param_grid, cv = 3)
grid_svm.fit(X_train_scaled, y_train)

result = pd.DataFrame(grid_svm.cv_results_['params'])
result['mean_test_score'] = grid_svm.cv_results_['mean_test_score']
result.sort_values(by='mean_test_score', ascending=False)

Unnamed: 0,C,gamma,mean_test_score
0,10000000,0.001,0.52636
2,100000000,0.001,0.452758
1,10000000,0.01,0.449436
3,100000000,0.01,0.412481
