In [1]:
import os
import pymysql
import warnings
warnings.filterwarnings('ignore')
import pickle
import numpy as np
np.random.seed(2830)
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
plt.rcParams['figure.dpi'] = 150
import sklearn
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, GroupKFold, cross_val_score, cross_validate
from sklearn.metrics import mean_absolute_error
import lightgbm as lgb

<font size=7><b>Section 4: Interactive Demo</b></font>

This is the interactive demo of the CarMin database!

<br>

This notebook assumes:
* A valid MySQL Server is running.
* The password of 'root'@'localhost' is "insecure_password".
* `Section2_LoadIntoMySQL.sql` has already been run and the database `CarMin` is already created and populated in the MySQL Server.
* `Section3_TrainModel.ipynb` has already been run and `./Models/MODEL_LGBM.txt` exists with trained parameters.

<br>

Notebook table of contents:
* **Part 1: Global variables and helper functions.**
* **Part 2: Interactive -- edit this!**

# Part 1: Global variables and helper functions

In [11]:
# global variables
mysql_connection = None
mysql_cursor = None
lgbm_model = lgb.Booster(model_file="./Models/MODEL_LGBM.txt")
pickle_file = open("./Models/encoder_dict.pickle", "rb")
encoder_dict = pickle.load(pickle_file)                    
pickle_file.close()

def init():
    """Connect to MySQL Server, sets globals"""
    # globals
    global mysql_connection
    global mysql_cursor
    global lgbm_model
    global encoder_dict
    
    # sanity check
    if mysql_connection is not None:
        print("Error (init): Global mysql_connection must be None when connecting, did you call end?")
    
    # connect to MySQL Server
    # NOTE: For demo purposes, at present, the password for MySQL server on my local machine
    #       is temporarily set to "insecure_password"
    mysql_connection = pymysql.connect(host="localhost", user="root", password = "insecure_password", db="CarMin") 
    mysql_cursor = mysql_connection.cursor() 

def end():
    """Close MySQL connection, uses globals"""
    # globals
    global mysql_connection
    global mysql_cursor
    global lgbm_model
    global encoder_dict
    
    # sanity check
    if mysql_connection is None:
        print("Error (end): Global mysql_connection is None, did you call init?")
        return
    
    # close pymysql varaibles
    mysql_cursor.close()
    mysql_connection.close()
    
    # maintain globals
    mysql_connection = None
    mysql_cursor = None

def execute_sql_query(query):
    """
    Execute an arbitrary MySQL query.
    
    Params:
        query -- the query string
        
    Returns:
        None
    
    Side effects:
        Prints the query result to the notebook
    """
    # globals
    global mysql_connection
    global mysql_cursor
    global lgbm_model
    global encoder_dict
    
    mysql_cursor.execute(query)
    out = mysql_cursor.fetchall()
    for line in out:
        print("".join([(str(item) + " ") for item in line]))

def print_predicted_price(make_name, model_name, production_year=np.nan, odometer=np.nan,
                          exterior_color=np.nan, transmission_type=np.nan, days_on_market=np.nan,
                          has_accidents=np.nan, is_certified_preowned="False", body_type=np.nan,
                          engine_type=np.nan, engine_displacement=np.nan, horsepower=np.nan,
                          fuel_type=np.nan, fuel_tank_gallons=np.nan, city_mpg=np.nan, highway_mpg=np.nan,
                          max_seats=np.nan, drivetrain=np.nan, wheelbase=np.nan,
                          vehicle_length=np.nan, vehicle_width=np.nan, vehicle_height=np.nan,
                          dealer_name=np.nan, dealer_total_listings=np.nan, dealer_avg_rating=np.nan,
                          dealer_zipcode="-1.0", is_franchise_dealer="False"):
    """
    Predict vehicle price given vehicle input information and print.
    
    Params:
        See `Section1_DatabaseCreation.ipynb` for column descriptions
    
    Returns:
        None
        
    Side effects:
        Prints the query result to the notebook
    """
    # globals
    global mysql_connection
    global mysql_cursor
    global lgbm_model
    global encoder_dict
    
    # create new DataFrame
    df = pd.DataFrame(columns=["days_on_market", "odometer", "is_certified_preowned", "has_accidents",
                               "transmission_type", "exterior_color", "horsepower", "engine_type",
                               "engine_displacement", "fuel_type", "city_mpg", "highway_mpg",
                               "make_name", "model_name", "production_year", "body_type", "max_seats",
                               "fuel_tank_gallons", "drivetrain", "vehicle_length", "vehicle_width",
                               "vehicle_height", "wheelbase", "dealer_name", "dealer_total_listings",
                               "dealer_avg_rating", "dealer_zipcode", "is_franchise_dealer"],
                      data=[[days_on_market, odometer, is_certified_preowned, has_accidents,
                             transmission_type, exterior_color, horsepower, engine_type,
                             engine_displacement, fuel_type, city_mpg, highway_mpg,
                             make_name, model_name, production_year, body_type, max_seats,
                             fuel_tank_gallons, drivetrain, vehicle_length, vehicle_width,
                             vehicle_height, wheelbase, dealer_name, dealer_total_listings,
                             dealer_avg_rating, dealer_zipcode, is_franchise_dealer]])
    
    # label encode
    categorical_cols = ["is_certified_preowned", "has_accidents", "transmission_type", "exterior_color",
                        "engine_type", "fuel_type", "make_name", "model_name", "body_type", "drivetrain",
                        "dealer_name", "dealer_zipcode", "is_franchise_dealer"]
    for categorical_col in categorical_cols:
            df[categorical_col] = encoder_dict[categorical_col].transform(df[categorical_col])
    
      
    print(f"The predicted price of the input vehicle is: {lgbm_model.predict(df).item()} dollars.")

# Part 2: Interactive -- edit this!

In [3]:
init()

print_predicted_price(make_name="Chevrolet", model_name="Camaro")

end()

The predicted price of the input vehicle is: 38304.86727672687 dollars.


In [12]:
init()

execute_sql_query("""
SELECT *
FROM MMYT
WHERE make_name = "Ford" AND model_name = "Focus"
LIMIT 5;
""")

end()

MMYT000358 Ford Focus 2005 ZX3 S Hatchback 5.0 14.0 FWD 168.5 66.7 56.8 103.0 2005 Ford Focus ZX3 S 
MMYT000359 Ford Focus 2005 ZX3 SE Hatchback 5.0 14.0 FWD 168.5 66.7 56.8 103.0 2005 Ford Focus ZX3 SE 
MMYT000361 Ford Focus 2005 ZX4 S Sedan 5.0 14.0 FWD 175.2 66.7 56.8 103.0 2005 Ford Focus ZX4 S 
MMYT000362 Ford Focus 2005 ZX4 SE Sedan 5.0 14.0 FWD 175.2 66.7 56.8 103.0 2005 Ford Focus ZX4 SE 
MMYT000363 Ford Focus 2005 ZX4 SES Sedan 5.0 14.0 FWD 175.2 66.7 56.8 103.0 2005 Ford Focus ZX4 SES 
