# **MODELING AND EVALUATION**

---

## Objectives

* Train and test ML model and predict selling price

## Inputs

* Train dataset
* Test dataset
* Validation dataset

## Outputs

* Predicted price for test input data
* Predicted price for validation input data

---

## Import Python Libraries

In [2]:

import os
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR

## Set and confirm working directory

In [3]:
current_dir = os.getcwd()
current_dir

'/workspace/portfolio-project-5-price-predictor/jupyter_notebooks'

In [4]:
os.chdir(os.path.dirname(current_dir))
current_dir = os.getcwd()
current_dir

'/workspace/portfolio-project-5-price-predictor'

## Load dataset

In [5]:
def load_data(path):
    data = pd.read_csv(path)

    return data

#### Drop Outliers

#### Save Model

#### Train and test model

In [14]:
if 'output' not in os.listdir('data/'):
    x_train = load_data('data/input/x_train.csv')
    y_train = load_data('data/input/y_train.csv')
    y_train = y_train.squeeze()

    models = {
        'Linear Regression': LinearRegression(),
        'Decision Tree Regression': DecisionTreeRegressor(),	
        'Random Forest Regression': RandomForestRegressor(),	
        'Gradient Boosting Regression': GradientBoostingRegressor(),	
        'Support Vector Regression': SVR()
    }

    model_obj = {}
    for name, model in models.items():
        scores = cross_val_score(model, x_train, y_train, cv=5, scoring='r2')
        r2_scores = np.mean(scores)
        model_obj[model] = r2_scores

    # get the best model
    model = sorted(model_obj, key=model_obj.get, reverse=True)[0]
    model.fit(x_train, y_train)
    save_model(model, 'data/output/model.pkl')

    model.fit(x_train.drop(columns=['owner']), y_train)
    save_model(model, 'data/output/best_model.pkl')
