## SVR 
<b>회귀의 경우</b>
> sklearn.svm.SVR(...)

<br>
- kernel : 알고리즘에서 사용할 커널 유형 (선형, 폴리곤, 방사형, 시그모이드)<br>
&emsp;&emsp;&emsp;&emsp;({‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’}, default=’rbf’)<br>
- degree : kernel이 ploy일 때 그 차수 (int, default=3)<br>
- gamma : 결정 경계의 곡률로 kernel이 'rbf', 'poly', 'sigmoid'일 때 튜닝 ({‘scale’, ‘auto’} or float, default=’scale’)<br>
- C : 데이터 샘플들이 다른 클래스에 놓이는 것을 허용하는 정도 (float, default=1.0)

### 장점

- 범주나 수치 예측 문제에 사용 가능
- 오류 데이터에 대한 영향이 없음
- 과적합되는 경우가 적음
- 신경망에 비해 사용하기 쉬움

<br>

### 단점

- 여러 개의 조합 테스트 필요
- 학습 속도가 느림
- 해석이 어렵고 복잡한 모델
- __차원을 다루는 개념이기에 전처리가 필요함__


<b>[예제]</b><br>kc_house_data 데이터의 train 데이터로 price를 예측하는 SVR 모델 만들어라

In [2]:
import pandas as pd 
df = pd.read_csv('../data/kc_house_data.csv')
df = df.drop(['id','date'], axis=1)

X = df.drop('price',axis=1)
y = df['price']

X = pd.get_dummies(data = X, columns=['waterfront'])

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3, random_state=2021)

In [7]:
X_train

Unnamed: 0,bedrooms,bathrooms,sqft_living,sqft_lot,floors,view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,sqft_living15,sqft_lot15,waterfront_river_view,waterfront_standard
835,3,1.75,1920,11340,1.0,0,4,7,1230,690,1977,0,1980,9638,0,1
11093,5,3.50,3700,8504,2.0,0,3,8,2750,950,1950,2014,2370,6246,0,1
21186,6,4.50,3390,7200,2.0,0,3,8,2440,950,2007,0,2040,7214,0,1
1962,3,2.00,2660,4500,1.0,0,4,7,1330,1330,1922,0,1790,5000,0,1
14013,4,2.25,3780,5160,2.0,0,4,9,2510,1270,1907,0,2740,5160,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16347,3,1.75,1500,6180,1.0,0,4,6,1060,440,1947,0,1740,6180,0,1
20318,4,2.50,2190,3855,2.0,0,3,8,2190,0,2010,0,2190,3600,0,1
2669,3,1.00,1220,6000,1.0,0,5,7,1220,0,1961,0,1420,13137,0,1
17536,3,1.75,1230,12000,1.0,0,3,6,1230,0,1970,0,1550,12000,0,1


In [8]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler() 
X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train),columns=X_train.columns)
X_test_scaled = pd.DataFrame(scaler.transform(X_test),columns=X_train.columns)

In [10]:
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV

# 테스트하고자 하는 파라미터 값들을 사전타입으로 정의

param_grid = [
    {'kernel': ['linear'], 'C': [10, 30, 100, 300, 1000,10000]},
    {'kernel': ['rbf'], 'C': [1, 3, 10, 30, 100, 300],
                        'gamma': [0.03, 0.1, 0.3, 1.0, 3.0]},
]

grid_svm = GridSearchCV(SVR(), param_grid =param_grid, cv = 5)
grid_svm.fit(X_train_scaled, y_train)

result = pd.DataFrame(grid_svm.cv_results_['params'])
result['mean_test_score'] = grid_svm.cv_results_['mean_test_score']
result.sort_values(by='mean_test_score', ascending=False)

Unnamed: 0,C,kernel,gamma,mean_test_score
7,30000.0,linear,,0.623048
6,10000.0,linear,,0.622943
5,3000.0,linear,,0.622394
4,1000.0,linear,,0.620853
3,300.0,linear,,0.614536
2,100.0,linear,,0.597755
1,30.0,linear,,0.543296
0,10.0,linear,,0.417117
44,1000.0,rbf,0.01,0.395513
45,1000.0,rbf,0.03,0.390394


In [11]:
print("Best Parameters:\n", grid_svm.best_params_)
print("Best Estimators:\n", grid_svm.best_estimator_)

Best Parameters:
 {'C': 30000.0, 'kernel': 'linear'}
Best Estimators:
 SVR(C=30000.0, kernel='linear')


In [14]:
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV

# 테스트하고자 하는 파라미터 값들을 사전타입으로 정의

param_grid = [
    { 'C': [10000,100000],'gamma': [0.001, 0.01, 0.1]}
]

grid_svm = GridSearchCV(SVR(), param_grid =param_grid, cv = 5)
grid_svm.fit(X_train_scaled, y_train)

result = pd.DataFrame(grid_svm.cv_results_['params'])
result['mean_test_score'] = grid_svm.cv_results_['mean_test_score']
result.sort_values(by='mean_test_score', ascending=False)

Unnamed: 0,C,gamma,mean_test_score
4,100000,0.01,0.665219
3,100000,0.001,0.623832
5,100000,0.1,0.594745
1,10000,0.01,0.568972
0,10000,0.001,0.491987
2,10000,0.1,0.45977


In [16]:
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV

# 테스트하고자 하는 파라미터 값들을 사전타입으로 정의

param_grid = [
    { 'C': [10000000,100000000],'gamma': [0.001, 0.01]}
]

grid_svm = GridSearchCV(SVR(), param_grid =param_grid, cv = 3)
grid_svm.fit(X_train_scaled, y_train)

result = pd.DataFrame(grid_svm.cv_results_['params'])
result['mean_test_score'] = grid_svm.cv_results_['mean_test_score']
result.sort_values(by='mean_test_score', ascending=False)

Unnamed: 0,C,gamma,mean_test_score
1,10000000,0.01,0.736007
2,100000000,0.001,0.722155
0,10000000,0.001,0.719326
3,100000000,0.01,0.714029
