# Support Vector Machine

Links:
https://stackoverflow.com/questions/46556795/fitting-sklearns-svm-classifier-with-data-from-a-pandas-dataframe
https://scikit-learn.org/stable/modules/svm.html 
https://medium.com/pursuitnotes/support-vector-regression-in-6-steps-with-python-c4569acd062d 
https://medium.com/@niousha.rf/support-vector-regressor-theory-and-coding-exercise-in-python-ca6a7dfda927

In [36]:
# import libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sqlalchemy.ext.automap import automap_base
from sqlalchemy import create_engine 
from sqlalchemy import text

In [37]:
#Read data and create dataframe

filtered_df = pd.read_csv("Resources/filtered_df.csv")
#filtered_df

# Create a SQLite database engine
engine = create_engine('sqlite:///filtered_df.sqlite')

# reflect an existing database into a new model
Base=automap_base()

# reflect the tables
Base.prepare(autoload_with=engine)

# Write DataFrame to SQLite database
filtered_df.to_sql('realtor_filtered', con=engine, if_exists='replace', index=False)

# Confirm the data has been written by querying the database
query = "SELECT * FROM realtor_filtered;"  # Example query
with engine.connect() as conn:
    result = conn.execute(text(query))

In [38]:
df = result
df = pd.DataFrame(df)
df

Unnamed: 0,status,price,bed,bath,acre_lot,city,state,zip_code,house_size,prev_sold_date
0,for_sale,180000.0,2.0,1.0,0.34,Agawam,Massachusetts,1001.0,676.0,
1,for_sale,239900.0,3.0,1.0,0.46,Agawam,Massachusetts,1001.0,1196.0,
2,for_sale,525000.0,3.0,3.0,0.45,Agawam,Massachusetts,1001.0,2314.0,2014-06-25
3,for_sale,289900.0,3.0,2.0,0.36,Agawam,Massachusetts,1001.0,1276.0,2012-10-12
4,for_sale,275000.0,4.0,2.0,0.11,Agawam,Massachusetts,1001.0,1732.0,
...,...,...,...,...,...,...,...,...,...,...
1353428,sold,359900.0,4.0,2.0,0.33,Richland,Washington,99354.0,3600.0,2022-03-25
1353429,sold,350000.0,3.0,2.0,0.10,Richland,Washington,99354.0,1616.0,2022-03-25
1353430,sold,440000.0,6.0,3.0,0.50,Richland,Washington,99354.0,3200.0,2022-03-24
1353431,sold,179900.0,2.0,1.0,0.09,Richland,Washington,99354.0,933.0,2022-03-24


In [39]:
# reorder the columns
df = df[[ 'bed', 'bath','acre_lot','zip_code','house_size', 'price']]
df

Unnamed: 0,bed,bath,acre_lot,zip_code,house_size,price
0,2.0,1.0,0.34,1001.0,676.0,180000.0
1,3.0,1.0,0.46,1001.0,1196.0,239900.0
2,3.0,3.0,0.45,1001.0,2314.0,525000.0
3,3.0,2.0,0.36,1001.0,1276.0,289900.0
4,4.0,2.0,0.11,1001.0,1732.0,275000.0
...,...,...,...,...,...,...
1353428,4.0,2.0,0.33,99354.0,3600.0,359900.0
1353429,3.0,2.0,0.10,99354.0,1616.0,350000.0
1353430,6.0,3.0,0.50,99354.0,3200.0,440000.0
1353431,2.0,1.0,0.09,99354.0,933.0,179900.0


In [40]:
df[(df.bed < 15) & (df.price < 10000000) & (df.acre_lot != 0)]

Unnamed: 0,bed,bath,acre_lot,zip_code,house_size,price
0,2.0,1.0,0.34,1001.0,676.0,180000.0
1,3.0,1.0,0.46,1001.0,1196.0,239900.0
2,3.0,3.0,0.45,1001.0,2314.0,525000.0
3,3.0,2.0,0.36,1001.0,1276.0,289900.0
4,4.0,2.0,0.11,1001.0,1732.0,275000.0
...,...,...,...,...,...,...
1353428,4.0,2.0,0.33,99354.0,3600.0,359900.0
1353429,3.0,2.0,0.10,99354.0,1616.0,350000.0
1353430,6.0,3.0,0.50,99354.0,3200.0,440000.0
1353431,2.0,1.0,0.09,99354.0,933.0,179900.0


In [41]:
# split the dataframes into the features and the target
X = df.iloc[:,1:5].values  
y = df.iloc[:,5].values 

In [42]:
# split the data into training and testing sets

# Note: If the test size is any smaller that 0.9, it takes a long time to run

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.9, random_state=42)

In [43]:
# scale the data
scaler = StandardScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [44]:
# fit the data to a linear kernel for Support Vector Regressor
svr_lin = SVR(kernel = 'linear')
svr_lin.fit(X_train_scaled, y_train)


In [45]:
# fit the data to a polynomial kernal for Support Vector Regressor
svr_poly = SVR(kernel = 'poly')
svr_poly.fit(X_train_scaled, y_train)

In [46]:
# make predictions for the linear model
y_pred_linear = svr_lin.predict(X_test)

In [None]:
# make predictions for the polynomial model
y_pred_poly = svr_poly.predict(X_test)

In [None]:
# Evaluating the model

 
# Evaluating the linear model
print('Linear Kernel')
mse_linear = mean_squared_error(y_test,y_pred_linear)
print(f'Mean Squared Error: {mse_linear}')
 
r2_linear = r2_score(y_test,y_pred_linear)
print(f'R-squared: {r2_linear}')

# Evaluating the polynomial model
print('Polynomial Kernel')
mse_poly = mean_squared_error(y_test,y_pred_poly)
print(f'Mean Squared Error: {mse_poly}')
 
r2_poly = r2_score(y_test,y_pred_poly)
print(f'R-squared: {r2_poly}')

Linear Kernel
Radial Kernel
Mean Squared Error: 1472448720110.057
R-squared: -0.02328704293147199
Polynomial Kernel
Mean Squared Error: 7.422832246634264e+34
R-squared: -5.158541656559026e+22
