# Support Vector Machine

Links:
https://stackoverflow.com/questions/46556795/fitting-sklearns-svm-classifier-with-data-from-a-pandas-dataframe
https://scikit-learn.org/stable/modules/svm.html 
https://medium.com/pursuitnotes/support-vector-regression-in-6-steps-with-python-c4569acd062d 
https://medium.com/@niousha.rf/support-vector-regressor-theory-and-coding-exercise-in-python-ca6a7dfda927

In [24]:
# import the necessary libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
# read in the dataset
df = pd.read_csv('Resources/clean_realestate_data.csv') 

In [3]:
# reorder the columns
df = df[[ 'bed', 'bath','acre_lot','zip_code','house_size', 'price']]
df

Unnamed: 0,bed,bath,acre_lot,zip_code,house_size,price
0,3.0,3.0,0.45,1001.0,2314.0,525000.0
1,3.0,2.0,0.36,1001.0,1276.0,289900.0
2,3.0,2.0,0.46,1001.0,1476.0,384900.0
3,3.0,2.0,1.76,1001.0,1968.0,199999.0
4,4.0,2.0,2.00,1002.0,1607.0,419000.0
...,...,...,...,...,...,...
1086258,4.0,2.0,0.33,99354.0,3600.0,359900.0
1086259,3.0,2.0,0.10,99354.0,1616.0,350000.0
1086260,6.0,3.0,0.50,99354.0,3200.0,440000.0
1086261,2.0,1.0,0.09,99354.0,933.0,179900.0


In [4]:
df[(df.bed < 15) & (df.price < 10000000) & (df.acre_lot != 0)]

Unnamed: 0,bed,bath,acre_lot,zip_code,house_size,price
0,3.0,3.0,0.45,1001.0,2314.0,525000.0
1,3.0,2.0,0.36,1001.0,1276.0,289900.0
2,3.0,2.0,0.46,1001.0,1476.0,384900.0
3,3.0,2.0,1.76,1001.0,1968.0,199999.0
4,4.0,2.0,2.00,1002.0,1607.0,419000.0
...,...,...,...,...,...,...
1086258,4.0,2.0,0.33,99354.0,3600.0,359900.0
1086259,3.0,2.0,0.10,99354.0,1616.0,350000.0
1086260,6.0,3.0,0.50,99354.0,3200.0,440000.0
1086261,2.0,1.0,0.09,99354.0,933.0,179900.0


In [5]:
# split the dataframes into the features and the target
X = df.iloc[:,1:5].values  
y = df.iloc[:,5].values 

In [16]:
# split the data into training and testing sets

# Note: If the test size is any smaller that 0.9, it takes a long time to run

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.9, random_state=42)

In [17]:
# scale the data
scaler = StandardScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [18]:
# fit the data to a linear kernel for Support Vector Regressor
svr_lin = SVR(kernel = 'linear')
svr_lin.fit(X_train_scaled, y_train)


In [22]:
# fit the data to a radial kernel for Support Vector Regressor
svr_rbf = SVR(kernel = 'rbf')
svr_rbf.fit(X_train_scaled, y_train)

In [26]:
# fit the data to a polynomial kernal for Support Vector Regressor
svr_poly = SVR(kernel = 'poly')
svr_poly.fit(X_train_scaled, y_train)

In [19]:
# make predictions for the linear model
y_pred_linear = svr_lin.predict(X_test)

In [25]:
# make predictions for the radial model
y_pred_radial = svr_rbf.predict(X_test)

In [27]:
# make predictions for the polynomial model
y_pred_poly = svr_poly.predict(X_test)

In [35]:
# Evaluating the model

 
# Evaluating the linear model
print('Linear Kernel')
#mse_linear = mean_squared_error(y_test,y_pred_linear)
#print(f'Mean Squared Error: {mse_linear}')
 
#r2_linear = r2_score(y_test,y_pred_linear)
#print(f'R-squared: {r2_linear}')

# Evaluating the radial model
print('Radial Kernel')
mse_radial = mean_squared_error(y_test,y_pred_radial)
print(f'Mean Squared Error: {mse_radial}')
 
r2_radial = r2_score(y_test,y_pred_radial)
print(f'R-squared: {r2_radial}')

# Evaluating the polynomial model
print('Polynomial Kernel')
mse_poly = mean_squared_error(y_test,y_pred_poly)
print(f'Mean Squared Error: {mse_poly}')
 
r2_poly = r2_score(y_test,y_pred_poly)
print(f'R-squared: {r2_poly}')

Linear Kernel
Radial Kernel
Mean Squared Error: 1472448720110.057
R-squared: -0.02328704293147199
Polynomial Kernel
Mean Squared Error: 7.422832246634264e+34
R-squared: -5.158541656559026e+22
