# Model Building

In [3]:
import pandas as pd
import numpy as np

In [4]:
df = pd.read_csv("cleaned_dataset.csv")

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 293 entries, 0 to 292
Data columns (total 6 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Unnamed: 0              293 non-null    int64  
 1   Present_Price           293 non-null    float64
 2   Seller_Type_Individual  293 non-null    int64  
 3   Fuel_Type_Petrol        293 non-null    int64  
 4   age                     293 non-null    int64  
 5   Selling_Price           293 non-null    float64
dtypes: float64(2), int64(4)
memory usage: 13.9 KB


In [16]:
df.drop("Unnamed: 0",axis=1,inplace=True)

In [17]:
# split the data
X = df.drop("Selling_Price",axis=1)
Y=df.Selling_Price

In [39]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_df = scaler.fit_transform(X)
X  = pd.DataFrame(scaled_df,columns=X.columns)

In [40]:
X.head()

Unnamed: 0,Present_Price,Seller_Type_Individual,Fuel_Type_Petrol,age
0,-0.226614,-0.747336,0.491452,-0.133691
1,0.231527,-0.747336,-2.034785,0.209918
2,0.267482,-0.747336,0.491452,-1.164516
3,-0.393633,-0.747336,0.491452,0.897135
4,-0.078153,-0.747336,-2.034785,-0.133691


In [41]:
# Train and test data
from sklearn.model_selection import train_test_split
train_x,test_x,train_y,test_y = train_test_split(X,Y,test_size=0.2,random_state=7)

In [42]:
import keras

In [43]:
from keras.models import Sequential
from keras.layers import Dense,ReLU,Dropout

In [44]:
# Regression model building
model = Sequential()
model.add(Dense(units =4,kernel_initializer="he_uniform",activation="relu",input_dim=4))
model.add(Dense(units=20,kernel_initializer = "he_uniform",activation="relu"))
model.add(Dense(units=10,kernel_initializer = "he_uniform",activation= "relu"))
model.add(Dense(units=1,kernel_initializer="he_uniform"))
model.compile(
    optimizer="adam",loss="mae",metrics=["mae"])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [45]:
model.summary()

In [46]:
model.fit(train_x,train_y,validation_split=0.2,batch_size=10,epochs=30)

Epoch 1/30
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 40ms/step - loss: 4.7340 - mae: 4.7340 - val_loss: 3.6517 - val_mae: 3.6517
Epoch 2/30
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 3.7702 - mae: 3.7702 - val_loss: 3.2170 - val_mae: 3.2170
Epoch 3/30
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 3.6120 - mae: 3.6120 - val_loss: 2.7499 - val_mae: 2.7499
Epoch 4/30
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 3.5114 - mae: 3.5114 - val_loss: 2.2643 - val_mae: 2.2643
Epoch 5/30
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 2.5315 - mae: 2.5315 - val_loss: 1.7865 - val_mae: 1.7865
Epoch 6/30
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 2.4192 - mae: 2.4192 - val_loss: 1.6393 - val_mae: 1.6393
Epoch 7/30
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 2.1843

<keras.src.callbacks.history.History at 0x20ce94270d0>

In [47]:
from sklearn.metrics import r2_score
print(r2_score(train_y,model.predict(train_x)))

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
0.9133991781788622


In [48]:
print(r2_score(test_y,model.predict(test_x)))

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
0.8443577964641193


In [49]:
from keras_tuner import RandomSearch

In [50]:
def build_model(hp):
    model = Sequential()
    for i in range(hp.Int("num_layers",1,5)):
        model.add(Dense(units=hp.Int(f"units_{i}",8,128,step=8),
                        activation="relu",
                        kernel_initializer = "he_uniform"))
        model.add(Dropout(hp.Float(f"dropout_{i}",0.1,0.5,step=0.1)))
    model.add(Dense(units=1,kernel_initializer="he_uniform"))
    model.compile(optimizer=keras.optimizers.Adam(hp.Choice("learning_rate",[1e-1,1e-2,1e-3,1e-4])),
                  loss="mean_absolute_error",
                  metrics=["mae"])
    return model
    

In [51]:
tuner = RandomSearch(
    build_model,
    objective="mae",
    max_trials=10,
    executions_per_trial =3,
    directory="model_weights",
    project_name = "car_price_prediction_using_ANN"
)

In [52]:
tuner.search(train_x,train_y,epochs=100,validation_split=0.2)

Trial 10 Complete [00h 01m 03s]
mae: 2.4663071632385254

Best mae So Far: 0.6427371303240458
Total elapsed time: 00h 18m 07s


In [53]:
best_model=tuner.get_best_models(num_models=1)[0]

  saveable.load_own_variables(weights_store.get(inner_path))


In [54]:
best_model.summary()

In [55]:
print(r2_score(train_y,best_model.predict(train_x)))

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
0.9679239662169993


In [56]:
print(r2_score(test_y,best_model.predict(test_x)))

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
0.9761178193111312


In [57]:
best_model.save("model.h5")



In [58]:
import joblib
joblib.dump(scaler,"scaler.pkl")

['scaler.pkl']

In [59]:
from keras.models import load_model

In [60]:
model = load_model("model.h5")



In [62]:
model.summary()

In [67]:
year = 2020
present_Price = 34
Fuel_Type = "Petrol"
Seller_Type = "Individual"
fuel = 1 if Fuel_Type == "Petrol" else 0
seller = 1 if Seller_Type == "Individual" else 0
age = 2024 -year


In [79]:
input = {
    "Present_Price":[present_Price],
    "Seller_Type_Individual":[seller],
    "Fuel_Type_Petrol":[fuel],
    "age":[age]
}
data = pd.DataFrame(input)

In [80]:
scaler_1 = joblib.load("scaler.pkl")

In [81]:
scaled = scaler_1.transform(data)

In [82]:
data_input = pd.DataFrame(scaled,columns=data.columns)


In [83]:
model.predict(data_input)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 228ms/step


array([[24.103874]], dtype=float32)