In [76]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

In [77]:
tf.keras.backend.clear_session()

In [78]:
database = pd.read_csv('./cleaned_data.csv')

database

Unnamed: 0.1,Unnamed: 0,airline,flight_code,source_city,time_taken,stop,destinate_city,price,Class,Days_Left,dep_time_category,arr_time_category
0,0,SpiceJet,SG-8709,Delhi,130,non-stop,Mumbai,5953,Economy,1,Evening,Night
1,1,SpiceJet,SG-8157,Delhi,140,non-stop,Mumbai,5953,Economy,1,Early Morning,Morning
2,2,AirAsia,I5-764,Delhi,130,non-stop,Mumbai,5956,Economy,1,Early Morning,Early Morning
3,3,Vistara,UK-995,Delhi,135,non-stop,Mumbai,5955,Economy,1,Morning,Afternoon
4,4,Vistara,UK-963,Delhi,140,non-stop,Mumbai,5955,Economy,1,Morning,Morning
...,...,...,...,...,...,...,...,...,...,...,...,...
298921,301051,Vistara,UK-822,Chennai,605,1-stop,Hyderabad,69265,Business,49,Morning,Evening
298922,301052,Vistara,UK-826,Chennai,625,1-stop,Hyderabad,77105,Business,49,Afternoon,Night
298923,301053,Vistara,UK-832,Chennai,830,1-stop,Hyderabad,79099,Business,49,Early Morning,Night
298924,301054,Vistara,UK-828,Chennai,600,1-stop,Hyderabad,81585,Business,49,Early Morning,Evening


In [79]:
print(database['airline'].unique())
#print(database['flight_code'].unique())
print(database['source_city'].unique())
print(database['stop'].unique())
print(database['destinate_city'].unique())
print(database['Class'].unique())
print(database['dep_time_category'].unique())
print(database['arr_time_category'].unique())

['SpiceJet' 'AirAsia' 'Vistara' 'GO FIRST' 'Indigo' 'Air India' 'Trujet'
 'StarAir']
['Delhi' 'Mumbai' 'Bangalore' 'Kolkata' 'Hyderabad' 'Chennai']
['non-stop' '1-stop' '2+-stop']
['Mumbai' 'Bangalore' 'Kolkata' 'Hyderabad' 'Chennai' 'Delhi']
['Economy' 'Business']
['Evening' 'Early Morning' 'Morning' 'Afternoon' 'Night']
['Night' 'Morning' 'Early Morning' 'Afternoon' 'Evening']


modify all non-numeric features to numeric features for model fitting.

In [80]:
database['airline'] = database['airline'].replace({
    'SpiceJet':0, 
    'AirAsia':1,
    'Vistara':2,
    'GO FIRST':3,
    'Indigo':4,
    'Air India':5,
    'Trujet':6,
    'StarAir':7
    })

database['source_city'] = database['source_city'].replace({
    'Delhi':0,
    'Mumbai':1,
    'Bangalore':2,
    'Kolkata':3,
    'Hyderabad':4,
    'Chennai':5
})

database['destinate_city'] = database['destinate_city'].replace({
    'Delhi':0,
    'Mumbai':1,
    'Bangalore':2,
    'Kolkata':3,
    'Hyderabad':4,
    'Chennai':5
})

database['stop'] = database['stop'].replace({
    'non-stop':0,
    '1-stop':1,
    '2+-stop':2
})

database['Class'] = database['Class'].replace({
   'Economy':0,
   'Business':1
})

database['arr_time_category'] = database['arr_time_category'].replace({
    'Evening':0,
    'Early Morning':1,
    'Morning':2,
    'Afternoon':3,
    'Night':4
})

database['dep_time_category'] = database['dep_time_category'].replace({
    'Evening':0,
    'Early Morning':1,
    'Morning':2,
    'Afternoon':3,
    'Night':4
})

In [81]:
#drop flight code
database = database.drop(columns=database.columns[2], axis = 1)
database

Unnamed: 0.1,Unnamed: 0,airline,source_city,time_taken,stop,destinate_city,price,Class,Days_Left,dep_time_category,arr_time_category
0,0,0,0,130,0,1,5953,0,1,0,4
1,1,0,0,140,0,1,5953,0,1,1,2
2,2,1,0,130,0,1,5956,0,1,1,1
3,3,2,0,135,0,1,5955,0,1,2,3
4,4,2,0,140,0,1,5955,0,1,2,2
...,...,...,...,...,...,...,...,...,...,...,...
298921,301051,2,5,605,1,4,69265,1,49,2,0
298922,301052,2,5,625,1,4,77105,1,49,3,4
298923,301053,2,5,830,1,4,79099,1,49,1,4
298924,301054,2,5,600,1,4,81585,1,49,1,0


In [82]:
target_feature = ['price']

x = database.drop(columns=target_feature)
y = database[['price']]

scaler = StandardScaler()
x_scaled = scaler.fit_transform(x)

In [83]:
x_train, x_test, y_train, y_test = train_test_split(x_scaled, y, test_size=0.2, random_state=42)

In [84]:
input_shape = (x_train.shape[1],)

### Using tensorflow to create a model

In [85]:
model = keras.Sequential([
    layers.Dense(128, activation='relu', 
                 input_shape = input_shape, 
                 kernel_regularizer = tf.keras.regularizers.l2(0.01)),
    layers.Dropout(0.2),
    layers.Dense(64,
                 activation='relu',
                 kernel_regularizer = tf.keras.regularizers.l2(0.01)),
    layers.Dropout(0.2),
    layers.Dense(1)
])
optimizier = tf.keras.optimizers.RMSprop(0.001)

model.compile(loss = 'mse',
              optimizer = optimizier,
              metrics = ['mae', 'mse'])

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                              patience = 10,
                                              restore_best_weights = True)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [86]:
model.summary()

In [88]:
model.fit(x_train, y_train, epochs=100, validation_split=0.2, batch_size=32, callbacks=[early_stop])

Epoch 1/100
[1m5979/5979[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 1ms/step - loss: 47005068.0000 - mae: 4390.9980 - mse: 47004984.0000 - val_loss: 35718384.0000 - val_mae: 3760.1443 - val_mse: 35718288.0000
Epoch 2/100
[1m5979/5979[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 1ms/step - loss: 43232276.0000 - mae: 4146.0566 - mse: 43232164.0000 - val_loss: 34064548.0000 - val_mae: 3635.3979 - val_mse: 34064424.0000
Epoch 3/100
[1m5979/5979[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 1ms/step - loss: 41487928.0000 - mae: 4013.6436 - mse: 41487820.0000 - val_loss: 33512462.0000 - val_mae: 3577.3479 - val_mse: 33512346.0000
Epoch 4/100
[1m5979/5979[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 1ms/step - loss: 41079164.0000 - mae: 3965.7473 - mse: 41079044.0000 - val_loss: 32808764.0000 - val_mae: 3535.7859 - val_mse: 32808634.0000
Epoch 5/100
[1m5979/5979[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 1ms/step - loss: 40569716.0000 - mae

<keras.src.callbacks.history.History at 0x13c5ab280>