In [None]:
%pip install pandas
import pandas as pd
df = pd.read_csv('cleaned_usa_cars_dataset.csv');

print(df.head())

from sklearn.model_selection import train_test_split


Note: you may need to restart the kernel to use updated packages.
   price    model  mileage   color  car_age  brand_audi  brand_bmw  \
0   6300  cruiser   274117   black       17           0          0   
1   2899       se   190552  silver       14           0          0   
2   5350      mpv    39590  silver        7           0          0   
3  25000     door    64146    blue       11           0          0   
4  27700     1500     6654     red        7           0          0   

   brand_buick  brand_cadillac  brand_chevrolet  ...  state_south carolina  \
0            0               0                0  ...                     0   
1            0               0                0  ...                     0   
2            0               0                0  ...                     0   
3            0               0                0  ...                     0   
4            0               0                1  ...                     0   

   state_tennessee  state_texas  state_utah 

In [None]:
# Install scikit-learn (only needed if not installed)
%pip install scikit-learn  

# Import the correct module
from sklearn.model_selection import train_test_split  

# Define features (X) and target variable (y)
X = df.drop(columns=["price", "model", "color"])  
y = df["price"]  # Price is the target variable

# Split into 80% training and 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Check the shape of the data
print("Training Data Shape:", X_train.shape)
print("Testing Data Shape:", X_test.shape)


In [None]:
from sklearn.linear_model import LinearRegression

# Initialize the model
model = LinearRegression()

# Train the model
model.fit(X_train, y_train)

# Predict car prices on the test set
y_pred = model.predict(X_test)


In [None]:
from sklearn.metrics import mean_absolute_error

# Calculate MAE
mae = mean_absolute_error(y_test, y_pred)
print(f"Linear Regression MAE: ${mae:.2f}")


In [None]:
from sklearn.ensemble import RandomForestRegressor

# Initialize model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)

# Train model
rf_model.fit(X_train, y_train)

# Predict on test data
y_pred_rf = rf_model.predict(X_test)

# Evaluate performance
mae_rf = mean_absolute_error(y_test, y_pred_rf)
print(f"Random Forest MAE: ${mae_rf:.2f}")


In [None]:
import numpy as np

# Create a dictionary with the correct number of features (fill missing ones with 0)
sample_data = {
    "car_age": [10],  # Example: 10-year-old car
    "mileage": [50000],  # Example mileage
    "brand_toyota": [1],  # Example: Toyota car
    "brand_ford": [0],
    "title_status_clean": [1],  # Example: Clean title
    "title_status_salvage": [0],
    "state_texas": [1],  # Example: Car in Texas
    "state_california": [0],
    "state_florida": [0],
}

# Convert to DataFrame and match `X_train` column order
sample_car_df = pd.DataFrame(sample_data)

# Add missing columns with 0 (to match `X_train`)
for col in X_train.columns:
    if col not in sample_car_df.columns:
        sample_car_df[col] = 0

# Ensure the columns are in the same order as `X_train`
sample_car_df = sample_car_df[X_train.columns]

# Predict price using the trained model
predicted_price = rf_model.predict(sample_car_df)
print(f"Estimated Car Price: ${predicted_price[0]:.2f}")


In [None]:
%pip install flask


In [None]:
import joblib

# Save the trained model to a file
joblib.dump(rf_model, "car_price_model.pkl")

# Save feature names (to ensure correct input format)
joblib.dump(X_train.columns.tolist(), "feature_columns.pkl")


In [None]:
from flask import Flask, request, jsonify
import joblib
import pandas as pd

# Load the trained model and feature names
model = joblib.load("car_price_model.pkl")
feature_columns = joblib.load("feature_columns.pkl")

# Initialize Flask app
app = Flask(__name__)

@app.route("/", methods=["GET"])
def home():
    return "Welcome to the Car Price Prediction API!"

@app.route("/predict", methods=["POST"])
def predict():
    try:
        # Get JSON request data
        data = request.get_json()

        # Convert input to DataFrame and match feature order
        input_data = pd.DataFrame([data])
        for col in feature_columns:
            if col not in input_data.columns:
                input_data[col] = 0  # Fill missing features with 0

        input_data = input_data[feature_columns]  # Match order

        # Make prediction
        prediction = model.predict(input_data)[0]
        
        return jsonify({"estimated_price": f"${prediction:.2f}"})

    except Exception as e:
        return jsonify({"error": str(e)})

# Run the app
if __name__ == "__main__":
    app.run(debug=True)
