In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import mean_absolute_error
from xgboost import XGBRegressor
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings("ignore")

In [6]:
# load csv file 
dataset=pd.read_csv("C:\\Users\\Asus\\Downloads\\ford.csv")
dataset

Unnamed: 0,model,year,price,transmission,mileage,fuelType,tax,mpg,engineSize
0,Fiesta,2017,12000,Automatic,15944,Petrol,150,57.7,1.0
1,Focus,2018,14000,Manual,9083,Petrol,150,57.7,1.0
2,Focus,2017,13000,Manual,12456,Petrol,150,57.7,1.0
3,Fiesta,2019,17500,Manual,10460,Petrol,145,40.3,1.5
4,Fiesta,2019,16500,Automatic,1482,Petrol,145,48.7,1.0
...,...,...,...,...,...,...,...,...,...
17961,B-MAX,2017,8999,Manual,16700,Petrol,150,47.1,1.4
17962,B-MAX,2014,7499,Manual,40700,Petrol,30,57.7,1.0
17963,Focus,2015,9999,Manual,7010,Diesel,20,67.3,1.6
17964,KA,2018,8299,Manual,5007,Petrol,145,57.7,1.2


In [8]:
# checking for missing values
dataset.isnull().sum()

model           0
year            0
price           0
transmission    0
mileage         0
fuelType        0
tax             0
mpg             0
engineSize      0
dtype: int64

In [10]:
print(dataset['transmission'].unique())
print(dataset['fuelType'].unique())

['Automatic' 'Manual' 'Semi-Auto']
['Petrol' 'Diesel' 'Hybrid' 'Electric' 'Other']


In [12]:
# encoding the categorical transmission column
dataset.replace({'transmission':{'Automatic':0, 'Manual':1, 'Semi-Auto':2}},inplace=True)
# encoding the categorical fuelType column
dataset.replace({'fuelType':{'Petrol':0, 'Diesel':1, 'Hybrid':2, 'Electric':3, 'Other':4}},inplace=True)
dataset

Unnamed: 0,model,year,price,transmission,mileage,fuelType,tax,mpg,engineSize
0,Fiesta,2017,12000,0,15944,0,150,57.7,1.0
1,Focus,2018,14000,1,9083,0,150,57.7,1.0
2,Focus,2017,13000,1,12456,0,150,57.7,1.0
3,Fiesta,2019,17500,1,10460,0,145,40.3,1.5
4,Fiesta,2019,16500,0,1482,0,145,48.7,1.0
...,...,...,...,...,...,...,...,...,...
17961,B-MAX,2017,8999,1,16700,0,150,47.1,1.4
17962,B-MAX,2014,7499,1,40700,0,30,57.7,1.0
17963,Focus,2015,9999,1,7010,1,20,67.3,1.6
17964,KA,2018,8299,1,5007,0,145,57.7,1.2


In [14]:
x=dataset.drop(['model','price'],axis=1)
y=dataset['price']
y

0        12000
1        14000
2        13000
3        17500
4        16500
         ...  
17961     8999
17962     7499
17963     9999
17964     8299
17965     8299
Name: price, Length: 17966, dtype: int64

In [16]:
scaler=StandardScaler()
scaler.fit(x)

In [18]:
standardized_x=scaler.transform(x)
standardized_x

array([[ 0.06512772, -2.67003231, -0.38099808, ...,  0.59135805,
        -0.02044162, -0.81138621],
       [ 0.55286624,  0.04135139, -0.73335899, ...,  0.59135805,
        -0.02044162, -0.81138621],
       [ 0.06512772,  0.04135139, -0.56013157, ...,  0.59135805,
        -0.02044162, -0.81138621],
       ...,
       [-0.91034931,  0.04135139, -0.83982222, ..., -1.50505332,
         0.92766777,  0.57636151],
       [ 0.55286624,  0.04135139, -0.94269045, ...,  0.51072684,
        -0.02044162, -0.34880364],
       [-0.91034931,  0.04135139, -0.94269045, ..., -1.47280084,
        -0.02044162, -0.81138621]])

In [20]:
x=standardized_x
y=dataset['price']

In [22]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=.1,random_state=42)
print(x.shape, x_train.shape, x_test.shape)
print(y.shape, y_train.shape, y_test.shape)

(17966, 7) (16169, 7) (1797, 7)
(17966,) (16169,) (1797,)


In [24]:
# load the model
xgb_model=XGBRegressor()

In [26]:
# fit our training data into model
xgb_model.fit(x_train,y_train)

In [28]:
# Prediction on training data
training_data_pred=xgb_model.predict(x_train)
#R2 score on training data
score_1=metrics.r2_score(y_train, training_data_pred)
#Mean absolute error
mae=metrics.mean_absolute_error(y_train, training_data_pred)
print("R2 score on training data:",score_1)
print("Mean absolute error on trainig data:",mae)

R2 score on training data: 0.9531964659690857
Mean absolute error on trainig data: 740.4128602406732


In [30]:
# Prediction on test data
test_data_pred=xgb_model.predict(x_test)
#R2 score on test data
score_1=metrics.r2_score(y_test, test_data_pred)
#Mean absolute error
mae=metrics.mean_absolute_error(y_test, test_data_pred)
print("R2 score on training data:",score_1)
print("Mean absolute error on trainig data:",mae)

R2 score on training data: 0.911628007888794
Mean absolute error on trainig data: 907.3636198473694


In [32]:
input_data=(2019,1,10460,0,145,40.3,1.5)
#changing the input into numpy array and reshaping
input_changed=np.array(input_data).reshape(1,-1)
#standardize the input
std_input=scaler.transform(input_changed)
prediction=xgb_model.predict(std_input)
print(prediction)
print("This car price estimation is:",prediction)

[18320.613]
This car price estimation is: [18320.613]


In [34]:
import joblib
# save the model
joblib.dump(xgb_model,'xgb_model.pkl')
#save the standard scale
joblib.dump(xgb_model, 'scaler.pkl')

['scaler.pkl']

In [1]:
import sys
import numpy as np
import joblib
from PyQt6.QtWidgets import QApplication, QWidget, QLabel, QLineEdit, QPushButton, QVBoxLayout, QHBoxLayout, QMessageBox
from PyQt6.QtGui import QFont
from PyQt6.QtCore import Qt

# Load the model
model = joblib.load("C:\\Users\\Asus\\xgb_model.pkl")

class CarPricePredictor(QWidget):
    def __init__(self):
        super().__init__()
        self.init_ui()
    
    def init_ui(self):
        self.setWindowTitle("Car Price Prediction")
        self.setGeometry(100, 100, 400, 500)
        
        font_label = QFont("Arial", 10)
        font_button = QFont("Arial", 12, QFont.Weight.Bold)
        
        # Title
        self.title_label = QLabel("Car Price Prediction", self)
        self.title_label.setFont(QFont("Arial", 16, QFont.Weight.Bold))
        self.title_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
        
        # Input fields and labels
        self.entries = {}
        fields = [
            ("Year (e.g., 2014-2019)", "year"),
            ("Transmission (0=Auto, 1=Manual)", "transmission"),
            ("Mileage (in miles)", "mileage"),
            ("Fuel Type (0=Petrol, 1=Diesel, etc.)", "fuel_type"),
            ("Tax (e.g., 20-150)", "tax"),
            ("MPG (e.g., 40.3 - 67.3)", "mpg"),
            ("Engine Size (e.g., 1.0 - 1.6)", "enginesize"),
        ]
        
        layout = QVBoxLayout()
        layout.addWidget(self.title_label)
        
        for label_text, key in fields:
            label = QLabel(label_text, self)
            label.setFont(font_label)
            entry = QLineEdit(self)
            entry.setFont(font_label)
            self.entries[key] = entry
            h_layout = QHBoxLayout()
            h_layout.addWidget(label)
            h_layout.addWidget(entry)
            layout.addLayout(h_layout)
        
        # Button layout
        button_layout = QHBoxLayout()
        
        # Predict button
        self.predict_button = QPushButton("Check Estimated Price", self)
        self.predict_button.setFont(font_button)
        self.predict_button.setStyleSheet("background-color: #4A90E2; color: white; padding: 8px; border-radius: 5px;")
        self.predict_button.clicked.connect(self.car_price_prediction)
        button_layout.addWidget(self.predict_button)
        
        # Clear button
        self.clear_button = QPushButton("Clear", self)
        self.clear_button.setFont(font_button)
        self.clear_button.setStyleSheet("background-color: #D9534F; color: white; padding: 8px; border-radius: 5px;")
        self.clear_button.clicked.connect(self.clear_fields)
        button_layout.addWidget(self.clear_button)
        
        layout.addLayout(button_layout)
        
        # Result label
        self.result_label = QLabel("", self)
        self.result_label.setFont(QFont("Arial", 12, QFont.Weight.Bold))
        self.result_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
        layout.addWidget(self.result_label)
        
        self.setLayout(layout)
    
    def car_price_prediction(self):
        try:
            # Get inputs
            year = int(self.entries["year"].text())
            transmission = float(self.entries["transmission"].text())
            mileage = int(self.entries["mileage"].text())
            fuel_type = float(self.entries["fuel_type"].text())
            tax = int(self.entries["tax"].text())
            mpg = float(self.entries["mpg"].text())
            enginesize = float(self.entries["enginesize"].text())
            
            # Prepare input data
            input_data = np.array([[year, transmission, mileage, fuel_type, tax, mpg, enginesize]])
            
            # Predict
            prediction = model.predict(input_data)
            
            # Display result
            self.result_label.setText(f"Estimated Car Price: £{round(prediction[0], 2)}")
        
        except ValueError:
            QMessageBox.critical(self, "Input Error", "Please enter valid numeric values!")
    
    def clear_fields(self):
        for entry in self.entries.values():
            entry.clear()
        self.result_label.setText("")

if __name__ == "__main__":
    app = QApplication(sys.argv)
    window = CarPricePredictor()
    window.show()
    sys.exit(app.exec())


SystemExit: 0

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
