#Load Functions

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
from sklearn.metrics import mean_squared_error

#Locoal Function Build

#Data Wash

In [60]:
# Load data
df = pd.read_csv('cars.csv', encoding='utf-16')

In [61]:
df.head()

Unnamed: 0,Brand,Model,Year,Status,Mileage,Dealer,Price
0,Mazda,CX-5,2023,New,,,36703.0
1,Kia,Sportage,2023,New,,Classic Kia,28990.0
2,Chevrolet,Camaro,2024,New,,Classic Chevrolet Beaumont,41425.0
3,Ford,Bronco,2023,Used,1551.0,Mike Smith Chrysler Dodge Jeep RAM,58900.0
4,Acura,TLX,2021,Used,30384.0,Mike Smith Nissan,34499.0


In [62]:
df.isna().sum()

Unnamed: 0,0
Brand,0
Model,0
Year,0
Status,0
Mileage,88953
Dealer,328
Price,3911


In [63]:
# Change Mileage NaN to 0 if Status is new
df['Mileage'] = df.apply(lambda row: 0 if (pd.isna(row['Mileage']) and row['Status'] == 'New') else row['Mileage'], axis=1)
# Drop rows where 'Status' is 'Used' and 'Mileage' is NaN
df = df.drop(df[(df['Status'] == 'Used') & (df['Mileage'].isna())].index)
df.isna().sum()

Unnamed: 0,0
Brand,0
Model,0
Year,0
Status,0
Mileage,0
Dealer,328
Price,3911


In [64]:
# Drop rows with NaN in 'Price' column
df.dropna(subset=['Price'], inplace=True)

In [65]:
# Drop 'Dealer' column
df = df.drop('Dealer', axis=1)

In [66]:
df.isna().sum()

Unnamed: 0,0
Brand,0
Model,0
Year,0
Status,0
Mileage,0
Price,0


#Encodeing Features to Traning Lables

In [67]:
# Check unique brands
print(df['Brand'].unique())
print(df['Brand'].nunique())

['Mazda' 'Kia' 'Chevrolet' 'Ford' 'Acura' 'Volkswagen' 'GMC' 'BMW'
 'Hyundai' 'Jeep' 'Dodge' 'Infiniti' 'Honda' 'Lexus' 'Toyota' 'Cadillac'
 'Buick' 'Lincoln' 'Mercury' 'MINI' 'RAM' 'Land Rover' 'Audi' 'Nissan'
 'Mercedes' 'Porsche' 'Subaru' 'Jaguar' 'Maserati' 'Alfa Romeo' 'Tesla'
 'Bentley' 'Genesis' 'Volvo' 'Chrysler' 'Mitsubishi' 'FIAT' 'Rivian'
 'Aston Martin' 'Lamborghini' 'Hummer' 'Saturn' 'Rolls-Royce' 'Ferrari'
 'Polestar' 'Lucid' 'Scion' 'Pontiac' 'Karma' 'Smart' 'McLaren' 'Lotus'
 'Saab' 'Maybach' 'Suzuki' 'International Scout' 'Plymouth' 'Geo' 'Isuzu'
 'Oldsmobile' 'Bugatti' 'Eagle']
62


In [68]:
# Encoder the Brand to traning lables
Brand_encoder = LabelEncoder()
df['Brand'] = Brand_encoder.fit_transform(df['Brand'])
print(df['Brand'].unique())

[38 28  9 15  0 60 16  4 21 26 11 22 19 31 59  8  7 32 41 35 49 30  3 43
 40 48 56 25 36  1 58  5 17 61 10 42 13 50  2 29 20 53 51 14 46 34 54 47
 27 55 39 33 52 37 57 23 45 18 24 44  6 12]


In [69]:
# Check unique Models
print(df['Model'].unique())
print(df['Model'].nunique())

['CX-5' 'Sportage' 'Camaro' 'Bronco' 'TLX' 'Golf' 'Yukon' 'M340' 'Sonata'
 'Grand Cherokee' 'Wagoneer' 'Durango' 'Mustang' 'F-350' 'QX30' 'Passport'
 'ES' 'Sierra' 'CX-30' 'Tundra' 'CT5' 'Envista' 'Civic' 'Accord' 'Atlas'
 '4Runner' 'Mazda3' 'Sequoia' 'Continental' 'Marquis' 'IS' 'LX' 'RZ 450e'
 'Escalade' 'LS' 'Nautilus' 'Integra' 'MDX' '435 i' 'ILX' 'Palisade' 'X7'
 'Escape' 'Cooper' 'iX M60' 'X3' 'Canyon' 'Land Cruiser' 'LYRIQ' 'Trax'
 '1500' 'Navigator' 'Range Rover' 'RS 5' 'Leaf' 'Altima' 'E-Class'
 'GLS 600' 'Highlander' 'GLS 450' 'Cayenne' 'Q7' 'TT' 'Wrangler' 'GT'
 'F-150' 'F-250' 'Defender' 'QX80' 'QX60' 'Essentia' 'GLE' 'Panamera' 'M3'
 'Telluride' 'Forte' 'A7' 'X5' '330 i' 'iX xDrive50' '740' 'Juke' 'CT6'
 'WRX' 'Crosstrek' 'Ranger' 'Explorer' 'Town Car' 'Fusion' '2500'
 'Gladiator' 'Q5' 'F-PACE' 'Discovery' 'Silverado' 'GLA' 'G 550' 'e-tron'
 'S-Class' 'Ghibli' 'Tahoe' 'ID.4' '6' 'CR-V' 'Elantra' 'GX' 'Odyssey'
 'Giulia' 'Corvette' 'C-Class' 'CX-9' 'Traverse' 'CL' 'X4' '3' 

In [70]:
# Encoder the Model to traning lables
Model_encoder = LabelEncoder()
df['Model'] = Model_encoder.fit_transform(df['Model'])
print(df['Model'].unique())

[127 531 134 105 543 289 624 346 525 292 595 183 392 226 433 414 194 518
 126 571 122 211 151  72  84  18 376 516 156 371 311 331 457 217 329 399
 316 356  11 310 410 604 218 158 638 600 136 335 332 566   1 400 459 453
 338  77 185 270 305 268 142 431 545 597 276 224 225 181 439 437 219 267
 411 345 552 249  64 602   6 639  42 321 123 594 167 460 223 561 251   3
 288 429 228 182 521 264 252 631 473 285 547 309  37 118 201 281 404 287
 162 107 129 565 116 601   4 538 239 546 386 245 468 161 364 641 570 456
 326 365 551 448 406 280 259 146  82 618   0 415 266  67 269   5  17  70
 454 500  29  65 434 477 147 215 164 271 423  78 374 469 324 515 135  91
 419 222 153 528 339 160 517 168 609   7 382 553 464 210 139 527 385 446
 555 319  51  19 387 590 442 317 485 384 227 121 540 427  87 350 407  61
 556 108 585 169 607 125 325 620 234 261 503 413 367 465 120 101 483 581
 588 206  54 507 486 603 354  89 117 320  66 508 273 296 262 479 303 300
 432 488 522 221 447 340 143 155 148 170 619 349  4

In [71]:
# Check unique Status
print(df['Status'].unique())
print(df['Status'].nunique())

['New' 'Used' 'Certified']
3


In [72]:
# Encoder the Status to traning lables
Status_encoder = LabelEncoder()
df['Status'] = Status_encoder.fit_transform(df['Status'])
print(df['Status'].unique())

[1 2 0]


In [73]:
df.head()

Unnamed: 0,Brand,Model,Year,Status,Mileage,Price
0,38,127,2023,1,0.0,36703.0
1,28,531,2023,1,0.0,28990.0
2,9,134,2024,1,0.0,41425.0
3,15,105,2023,2,1551.0,58900.0
4,0,543,2021,2,30384.0,34499.0


#Normalize All Features & Target

In [74]:
# Split data into features (X) and target (y)
X = df.drop('Price', axis=1).values
y = df['Price'].values

# Normalize all features using MinMaxScaler
scaler_X = MinMaxScaler()
X = scaler_X.fit_transform(X)

# Normalize the target variable (y) using MinMaxScaler
scaler_y = MinMaxScaler()
y = scaler_y.fit_transform(y.reshape(-1, 1)).flatten()  # Flatten to match original shape

In [75]:
X[0:10]

array([[0.62295082, 0.19812793, 0.98461538, 0.5       , 0.        ],
       [0.45901639, 0.82839314, 0.98461538, 0.5       , 0.        ],
       [0.14754098, 0.20904836, 1.        , 0.5       , 0.        ],
       [0.24590164, 0.16380655, 0.98461538, 1.        , 0.00370082],
       [0.        , 0.84711388, 0.95384615, 1.        , 0.0724989 ],
       [0.98360656, 0.45085803, 0.96923077, 0.        , 0.03315469],
       [0.26229508, 0.97347894, 0.95384615, 1.        , 0.16346135],
       [0.06557377, 0.53978159, 0.98461538, 0.5       , 0.        ],
       [0.3442623 , 0.81903276, 0.98461538, 0.5       , 0.        ],
       [0.3442623 , 0.81903276, 0.98461538, 0.5       , 0.        ]])

In [76]:
y[0:10]

array([0.00454336, 0.00358857, 0.0051279 , 0.00729114, 0.00427053,
       0.00420876, 0.00705024, 0.00763961, 0.00469711, 0.00414624])

# Model Build

In [95]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

In [96]:
# Define Neural Network
class PricePredictionModel(nn.Module):
    def __init__(self):
        super(PricePredictionModel, self).__init__()
        self.fc1 = nn.Linear(X_train.shape[1], 64)
        self.dropout1 = nn.Dropout(0.5)  # 50% dropout
        self.fc2 = nn.Linear(64, 32)
        self.dropout2 = nn.Dropout(0.5)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.dropout1(x)
        x = torch.relu(self.fc2(x))
        x = self.dropout2(x)
        x = self.fc3(x)
        return x

In [97]:
# Initialize the model, loss function, and optimizer
model = PricePredictionModel()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [119]:
# Train the model
num_epochs = 300
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 30 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [10/100], Loss: 0.0005
Epoch [20/100], Loss: 0.0005
Epoch [30/100], Loss: 0.0005
Epoch [40/100], Loss: 0.0004
Epoch [50/100], Loss: 0.0004
Epoch [60/100], Loss: 0.0003
Epoch [70/100], Loss: 0.0003
Epoch [80/100], Loss: 0.0003
Epoch [90/100], Loss: 0.0003
Epoch [100/100], Loss: 0.0002


In [120]:
# Evaluate the model on the test set
model.eval()
with torch.no_grad():
    predictions = model(X_test)
    mse = mean_squared_error(y_test.numpy(), predictions.numpy())
    print(f'Test MSE: {mse:.4f}')

Test MSE: 0.0001


In [121]:
# Predict price for a new car entry
new_data = pd.DataFrame({
    'Brand': ['Toyota'],
    'Model': ['Camry'],
    'Year': [2020],
    'Status': ['Used'],
    'Mileage': [15000.0],
})

# Encode and normalize new data using the same MinMaxScaler
new_data['Brand'] = Brand_encoder.transform(new_data['Brand'])
new_data['Model'] = Model_encoder.transform(new_data['Model'])
new_data['Status'] = Status_encoder.transform(new_data['Status'])

new_data_scaled = scaler_X.transform(new_data.values)
new_data_tensor = torch.tensor(new_data_scaled, dtype=torch.float32)

In [122]:
new_data_scaled

array([[0.96721311, 0.21060842, 0.93846154, 1.        , 0.03579132]])

In [123]:
# Predict price and inverse transform
model.eval()
with torch.no_grad():
    predicted_price_normalized = model(new_data_tensor)
    predicted_price = scaler_y.inverse_transform(predicted_price_normalized.numpy().reshape(-1, 1))
    print(f"Predicted Price: {predicted_price[0][0]:.2f}")

Predicted Price: 5999.46


In [124]:
predicted_price_normalized

tensor([[0.0007]])