In [1]:
import numpy as np
import operator
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler 
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from scipy import stats
import joblib

def min_max_normalization(x) :
    x_min = min(x)
    x_max = max(x)
    x = [(a - x_min)/(x_max - x_min) for a in x]
    return x

#Independent variables: livingAreaSqFt, numOfBathrooms, avgSchoolRating, numOfBedrooms, numOfHighSchools, and MedianStudentsPerTeacher
#Dependent varaibles: lateestPrice

df = pd.read_csv('austinHousingData.csv')
df_filtered = df[['livingAreaSqFt','numOfBathrooms', 'avgSchoolRating', 'numOfBedrooms', 'numOfHighSchools', 'MedianStudentsPerTeacher', 'latestPrice']]

z = np.abs(stats.zscore(df_filtered))
threshold = 3
df_filtered_o = df_filtered[(z < threshold).all(axis=1)]

X = df_filtered_o[['livingAreaSqFt', 'numOfBathrooms', 'avgSchoolRating', 'numOfBedrooms', 'numOfHighSchools', 'MedianStudentsPerTeacher']]
y = df_filtered_o.latestPrice
df_filtered_o.to_csv("Poly_test.csv")
y = y.to_numpy()

def createPolyModel(degree, X, y):
    y = min_max_normalization(y)
    poly = PolynomialFeatures(degree)
    X_poly = poly.fit_transform(X)

    tSize = int(len(df)*0.1)
    X_poly_train, X_poly_test, y_train, y_test = X_poly[:-tSize], X_poly[-tSize:], y[:-tSize], y[-tSize:]
    
    model = LinearRegression()
    model.fit(X_poly_train, y_train)

    y_train_pred = model.predict(X_poly_train)
    y_test_pred = model.predict(X_poly_test)

    mse_train = mean_squared_error(y_train,y_train_pred)
    mse_test = mean_squared_error(y_test,y_test_pred)
    print("Training MSE for degree", degree, "=", mse_train)
    print("Testing MSE for degree", degree, "=", mse_test, "\n")
    return model

joblib.dump(createPolyModel(2,X,y), "PolyModel2.sav")
joblib.dump(createPolyModel(3,X,y), "PolyModel3.sav")
joblib.dump(createPolyModel(4,X,y), "PolyModel4.sav")
joblib.dump(createPolyModel(5,X,y), "PolyModel5.sav")
joblib.dump(createPolyModel(6,X,y), "PolyModel6.sav")
joblib.dump(createPolyModel(7,X,y), "PolyModel7.sav")
joblib.dump(createPolyModel(8,X,y), "PolyModel8.sav")
joblib.dump(createPolyModel(9,X,y), "PolyModel9.sav")
joblib.dump(createPolyModel(10,X,y), "PolyModel10.sav")

Training MSE for degree 2 = 0.007679157581606601
Testing MSE for degree 2 = 0.03844624957358749 

Training MSE for degree 3 = 0.007425464090447762
Testing MSE for degree 3 = 0.0363898391569811 

Training MSE for degree 4 = 0.007210704416815869
Testing MSE for degree 4 = 0.03841091371152194 

Training MSE for degree 5 = 0.009064771727103958
Testing MSE for degree 5 = 0.13058023391002613 

Training MSE for degree 6 = 0.007217899081642155
Testing MSE for degree 6 = 0.04180482553235436 

Training MSE for degree 7 = 0.00849712357977055
Testing MSE for degree 7 = 0.17788952862342 

Training MSE for degree 8 = 0.008930166465545872
Testing MSE for degree 8 = 0.22092506911061327 

Training MSE for degree 9 = 0.008332126367766408
Testing MSE for degree 9 = 0.06471608443226295 

Training MSE for degree 10 = 0.010043715515754045
Testing MSE for degree 10 = 0.21442007397684637 



['PolyModel10.sav']