In [2]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
import pandas as pd

In [3]:
### data load
house = pd.read_csv("http://youngho.iwinv.net/data/house_price_prediction.csv")
house.head()

Unnamed: 0,date,price,price_cat,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,sqft_above,sqft_basement,yr_built,yr_renovated,street,city,statezip,country
0,2014-05-02 0:00,313000.0,Class3,3,1.5,1340,7912,1.5,0,0,3,1340,0,1955,2005,18810 Densmore Ave N,Shoreline,WA 98133,USA
1,2014-05-02 0:00,342000.0,Class3,3,2.0,1930,11947,1.0,0,0,4,1930,0,1966,0,26206-26214 143rd Ave SE,Kent,WA 98042,USA
2,2014-05-02 0:00,420000.0,Class3,3,2.25,2000,8030,1.0,0,0,4,1000,1000,1963,0,857 170th Pl NE,Bellevue,WA 98008,USA
3,2014-05-02 0:00,550000.0,Class2,4,2.5,1940,10500,1.0,0,0,4,1140,800,1976,1992,9105 170th Ave NE,Redmond,WA 98052,USA
4,2014-05-02 0:00,490000.0,Class2,2,1.0,880,6380,1.0,0,0,3,880,0,1938,1994,522 NE 88th St,Seattle,WA 98115,USA


In [4]:
### 입력변수(x)와 반응변수(y)
x = house.loc[:,['bedrooms','sqft_living','view','condition','yr_built']]
x_std= StandardScaler().fit_transform(x)
y = house.iloc[:,2]

In [5]:
### Train & Test data
x_train, x_test, y_train, y_test = train_test_split(x_std,y,test_size = 0.4)

#### -선형분류
- kernel='linear'

In [6]:
### SVM
svc = SVC(kernel = "linear", C = 1)
model = svc.fit(x_train,y_train)

In [7]:
### 예측
y_pred = model.predict(x_test)

In [8]:
###  accuracy
model.score(x_test,y_test)

0.47260774287801316

#### -비선형 분류
- kernel = 'rbf'

In [12]:
### SVM
svc = SVC(kernel = "rbf", gamma = 1)
model = svc.fit(x_train,y_train)

In [13]:
### 예측
y_pred = model.predict(x_test)

In [14]:
###  accuracy
model.score(x_test,y_test)

0.4996347699050402

#### -최적의 모델 결정

In [15]:
from sklearn.model_selection import GridSearchCV

- 최적의 매개변수

In [18]:
# Set the parameters by cross-validation
tuned_parameters = {'kernel':['rbf'],
                   'gamma':[0.01,0.05,0.1,0.5,1],
                    'C':[0.05,0.1,1,10]
                   }

In [20]:
grid = GridSearchCV(SVC(),tuned_parameters)
%time grid.fit(x_train,y_train)

Wall time: 8.66 s


GridSearchCV(cv=None, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=None,
             param_grid={'C': [0.05, 0.1, 1, 10],
                         'gamma': [0.01, 0.05, 0.1, 0.5, 1],
                         'kernel': ['rbf']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [21]:
grid.best_params_

{'C': 10, 'gamma': 0.1, 'kernel': 'rbf'}

In [22]:
###SVM
svc = SVC(kernel='rbf',C = grid.best_params_['C'],gamma = grid.best_params_['gamma'])
model=svc.fit(x_train,y_train)

In [24]:
### 예측
y_pred=model.predict(x_test)

In [26]:
### accuracy
model.score(x_test,y_test)

0.48794740686632576