In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

In [65]:
#reading the csv file. Data can be extracted from database using sql as well
car_sales = pd.read_csv("clean_car_sales_data.csv")

In [66]:
#Implementing Linear Regression Model
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd

# Function to preprocess, train the model, and predict
def preprocess_and_predict(df, target_column):
    # Splitting into features and target
    X = df.drop(columns=[target_column])
    y = df[target_column]

    # One-hot encode categorical features
    X = pd.get_dummies(X, drop_first=True)

    # Splitting data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    # Training a Linear Regression model
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Making predictions
    y_pred = model.predict(X_test)

    # Evaluating the model
    print(f"Evaluation for {target_column}:")
    print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
    print("R-squared Score:", r2_score(y_test, y_pred))
    print()
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    return model, X_train, mse, r2

In [79]:
sales = car_sales
# Select relevant features and target for prediction
features = ['Car Year', 'Car Make','Car Model', 'Commission Rate', 'Sale Price']
data = sales[features]

In [80]:
# Initialize MinMaxScaler and normalize the Sale Price
scaler = MinMaxScaler()
data['Sale Price Normalized'] = scaler.fit_transform(data[['Sale Price']])

# Drop the original Sale Price column
data = data.drop(columns=['Sale Price'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Sale Price Normalized'] = scaler.fit_transform(data[['Sale Price']])


In [81]:
# Train the model and preprocess
print("Car Sales Dataset")
sales_model, X_train, mse, r2 = preprocess_and_predict(
    data,
    target_column='Sale Price Normalized'
)

Car Sales Dataset
Evaluation for Sale Price Normalized:
Mean Squared Error: 0.08326763876314276
R-squared Score: -3.0215952129264423e-06



In [70]:
evaluation_metrics = {
    'Metric': ['Mean Squared Error', 'R-squared'],
    'Value': [mse, r2]
}
evaluation_df = pd.DataFrame(evaluation_metrics)
evaluation_df

Unnamed: 0,Metric,Value
0,Mean Squared Error,0.083268
1,R-squared,-3e-06


In [71]:
# Function to predict and reverse normalization
def predict_sale_price(input_data, model, scaler, X_train_columns):
    # Convert input data to DataFrame
    input_df = pd.DataFrame([input_data], columns=input_data.keys())

    # One-hot encode input data to match training set structure
    input_encoded = pd.get_dummies(input_df, drop_first=True)

    # Reindex the input_encoded DataFrame to match the training data columns
    # This will add missing columns with 0 and align the columns
    input_encoded = input_encoded.reindex(columns=X_train_columns, fill_value=0)

    # Predict normalized Sale Price
    sale_price_normalized = model.predict(input_encoded)[0]

    # Reverse normalization
    x_min = scaler.data_min_[0]
    x_max = scaler.data_max_[0]
    sale_price = sale_price_normalized * (x_max - x_min) + x_min

    return sale_price

In [72]:
# Example user input

print("Enter the Car Year: ")
c_year = int(input())
print()
print("Enter the Car Make: ")
c_make = input()
print()
print("Enter the Car Model: ")
c_model = input()
print()
print("Enter the Commission Rate: ")
c_rate = float(input())
print()


user_input = {
    'Car Year': c_year,
    'Car Make': c_make,
    'Car Model': c_model,
    'Commission Rate': c_rate
}

Enter the Car Year: 
2015

Enter the Car Make: 
Nissan

Enter the Car Model: 
Civic

Enter the Commission Rate: 
0.04



In [73]:
# Predict sale price for the user input
predicted_price = predict_sale_price(user_input, sales_model, scaler, X_train.columns)
print(f"Predicted Sale Price for input {user_input}: ${predicted_price:.2f}")

Predicted Sale Price for input {'Car Year': 2015, 'Car Make': 'Nissan', 'Car Model': 'Civic', 'Commission Rate': 0.04}: $30009.63


In [75]:
#predicted data
user_data = pd.DataFrame([user_input])
user_data['Sale Price'] = predicted_price
user_data

Unnamed: 0,Car Year,Car Make,Car Model,Commission Rate,Sale Price
0,2015,Nissan,Civic,0.04,30009.631524
