In [28]:
import base64
import os
import io
import requests
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn import metrics
from sklearn.model_selection import train_test_split

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
    
# Encode text values to indexes(i.e. [1],[2],[3] for red,green,blue).
def encode_text_index(df, name):
    le = preprocessing.LabelEncoder()
    df[name] = le.fit_transform(df[name])
    return le.classes_

# Convert a Pandas dataframe to the X,y inputs that Keras needs
def to_xy(df, target):
    result = []
    for x in df.columns:
        if x != target:
            result.append(x)
    # find out the type of the target column.  Is it really this hard? :(
    target_type = df[target].dtypes
    target_type = target_type[0] if hasattr(
        target_type, '__iter__') else target_type
    # Encode to int for classification, float otherwise. TensorFlow likes 32 bits.
    if target_type in (np.int64, np.int32):
        # Classification
        dummies = pd.get_dummies(df[target])
        return df[result].values.astype(np.float32), dummies.values.astype(np.float32)
    # Regression
    return df[result].values.astype(np.float32), df[[target]].values.astype(np.float32)

def remove_outliers(df, name, sd):
    drop_rows = df.index[(np.abs(df[name] - df[name].mean())
                          >= (sd * df[name].std()))]
    df.drop(drop_rows, axis=0, inplace=True)

path = "."

filename_read = os.path.join(path, "Clean_Dataset.csv")
df = pd.read_csv(filename_read, index_col = 0)

df.isna().sum()

df.head()

flights = df['flight']
df.drop('flight', 1, inplace = True)

df['price'] = (df["price"]*0.011).round(2)

df.head()

  df.drop('flight', 1, inplace = True)


Unnamed: 0,airline,source_city,departure_time,stops,arrival_time,destination_city,class,duration,days_left,price
0,SpiceJet,Delhi,Evening,zero,Night,Mumbai,Economy,2.17,1,65.48
1,SpiceJet,Delhi,Early_Morning,zero,Morning,Mumbai,Economy,2.33,1,65.48
2,AirAsia,Delhi,Early_Morning,zero,Early_Morning,Mumbai,Economy,2.17,1,65.52
3,Vistara,Delhi,Morning,zero,Afternoon,Mumbai,Economy,2.25,1,65.5
4,Vistara,Delhi,Morning,zero,Morning,Mumbai,Economy,2.33,1,65.5


In [30]:
encode_text_index(df, "airline")
encode_text_index(df, "source_city")
encode_text_index(df, "departure_time")
encode_text_index(df, "arrival_time")
encode_text_index(df, "destination_city")
encode_text_index(df, "class")
df['stops'] = df['stops'].replace(["zero", "one", "two_or_more"], [0, 1, 2])
df.head()

Unnamed: 0,airline,source_city,departure_time,stops,arrival_time,destination_city,class,duration,days_left,price
0,4,2,2,0,5,5,1,2.17,1,65.48
1,4,2,1,0,4,5,1,2.33,1,65.48
2,0,2,1,0,1,5,1,2.17,1,65.52
3,5,2,4,0,0,5,1,2.25,1,65.5
4,5,2,4,0,4,5,1,2.33,1,65.5


In [31]:
X,y = to_xy(df, "price")

print(y.shape)
print(y[0:5])

(289222, 1)
[[65.48]
 [65.48]
 [65.52]
 [65.5 ]
 [65.5 ]]


In [32]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

model = Sequential()
model.add(Dense(4, input_shape=X[1].shape, activation='sigmoid')) # Hidden 1
model.add(Dense(1)) # Output
model.summary() #note, only works if input shape specified, or Input layer given

Model: "sequential_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_24 (Dense)            (None, 4)                 40        
                                                                 
 dense_25 (Dense)            (None, 1)                 5         
                                                                 
Total params: 45
Trainable params: 45
Non-trainable params: 0
_________________________________________________________________


In [17]:
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X_train,y_train,verbose=2,epochs=10)
model.summary()

Epoch 1/10
7035/7035 - 10s - loss: 111811.2109 - 10s/epoch - 1ms/step
Epoch 2/10
7035/7035 - 9s - loss: 105885.9688 - 9s/epoch - 1ms/step
Epoch 3/10
7035/7035 - 10s - loss: 98493.7969 - 10s/epoch - 1ms/step
Epoch 4/10
7035/7035 - 9s - loss: 91189.8125 - 9s/epoch - 1ms/step
Epoch 5/10
7035/7035 - 9s - loss: 84805.6641 - 9s/epoch - 1ms/step
Epoch 6/10
7035/7035 - 9s - loss: 77516.5938 - 9s/epoch - 1ms/step
Epoch 7/10
7035/7035 - 10s - loss: 69734.2500 - 10s/epoch - 1ms/step
Epoch 8/10
7035/7035 - 13s - loss: 62614.3477 - 13s/epoch - 2ms/step
Epoch 9/10
7035/7035 - 10s - loss: 55205.0156 - 10s/epoch - 1ms/step
Epoch 10/10
7035/7035 - 10s - loss: 48832.0586 - 10s/epoch - 1ms/step
Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_12 (Dense)            (None, 4)                 40        
                                                                 
 dense_13 (Dense)            

In [18]:
model = Sequential()
model.add(Dense(4, input_dim=X.shape[1], activation='sigmoid')) # Hidden 1
model.add(Dense(1)) # Output
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X_train,y_train,verbose=2,epochs=200)

Epoch 1/200
7035/7035 - 9s - loss: 109021.0547 - 9s/epoch - 1ms/step
Epoch 2/200
7035/7035 - 15s - loss: 98050.2578 - 15s/epoch - 2ms/step
Epoch 3/200
7035/7035 - 15s - loss: 88676.6484 - 15s/epoch - 2ms/step
Epoch 4/200
7035/7035 - 8s - loss: 80538.7422 - 8s/epoch - 1ms/step
Epoch 5/200
7035/7035 - 8s - loss: 72713.6016 - 8s/epoch - 1ms/step
Epoch 6/200
7035/7035 - 11s - loss: 65396.9766 - 11s/epoch - 2ms/step
Epoch 7/200
7035/7035 - 14s - loss: 58337.3086 - 14s/epoch - 2ms/step
Epoch 8/200
7035/7035 - 12s - loss: 51437.3086 - 12s/epoch - 2ms/step
Epoch 9/200
7035/7035 - 10s - loss: 45276.4766 - 10s/epoch - 1ms/step
Epoch 10/200
7035/7035 - 12s - loss: 39629.0859 - 12s/epoch - 2ms/step
Epoch 11/200
7035/7035 - 10s - loss: 34451.3086 - 10s/epoch - 1ms/step
Epoch 12/200
7035/7035 - 9s - loss: 29741.1543 - 9s/epoch - 1ms/step
Epoch 13/200
7035/7035 - 10s - loss: 25481.0938 - 10s/epoch - 1ms/step
Epoch 14/200
7035/7035 - 14s - loss: 21691.3125 - 14s/epoch - 2ms/step
Epoch 15/200
7035/7035

Epoch 119/200
7035/7035 - 8s - loss: 3780.0613 - 8s/epoch - 1ms/step
Epoch 120/200
7035/7035 - 10s - loss: 3781.4385 - 10s/epoch - 1ms/step
Epoch 121/200
7035/7035 - 9s - loss: 3781.4968 - 9s/epoch - 1ms/step
Epoch 122/200
7035/7035 - 16s - loss: 3781.9978 - 16s/epoch - 2ms/step
Epoch 123/200
7035/7035 - 12s - loss: 3779.8994 - 12s/epoch - 2ms/step
Epoch 124/200
7035/7035 - 12s - loss: 3779.4114 - 12s/epoch - 2ms/step
Epoch 125/200
7035/7035 - 14s - loss: 3777.8748 - 14s/epoch - 2ms/step
Epoch 126/200
7035/7035 - 12s - loss: 3778.8972 - 12s/epoch - 2ms/step
Epoch 127/200
7035/7035 - 8s - loss: 3778.0959 - 8s/epoch - 1ms/step
Epoch 128/200
7035/7035 - 16s - loss: 3780.0496 - 16s/epoch - 2ms/step
Epoch 129/200
7035/7035 - 9s - loss: 3779.4124 - 9s/epoch - 1ms/step
Epoch 130/200
7035/7035 - 12s - loss: 3778.5146 - 12s/epoch - 2ms/step
Epoch 131/200
7035/7035 - 9s - loss: 3778.2971 - 9s/epoch - 1ms/step
Epoch 132/200
7035/7035 - 8s - loss: 3780.4363 - 8s/epoch - 1ms/step
Epoch 133/200
7035

<keras.callbacks.History at 0x2361a261310>

In [19]:
model = Sequential()
model.add(Dense(32, input_dim=X.shape[1], activation='sigmoid')) # Hidden 1
model.add(Dense(1)) # Output
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X_train,y_train,verbose=2,epochs=200)

Epoch 1/200
7035/7035 - 10s - loss: 85868.9531 - 10s/epoch - 1ms/step
Epoch 2/200
7035/7035 - 9s - loss: 43296.9531 - 9s/epoch - 1ms/step
Epoch 3/200
7035/7035 - 10s - loss: 15560.3271 - 10s/epoch - 1ms/step
Epoch 4/200
7035/7035 - 8s - loss: 5685.6050 - 8s/epoch - 1ms/step
Epoch 5/200
7035/7035 - 8s - loss: 4181.2954 - 8s/epoch - 1ms/step
Epoch 6/200
7035/7035 - 8s - loss: 3957.4385 - 8s/epoch - 1ms/step
Epoch 7/200
7035/7035 - 9s - loss: 3843.9451 - 9s/epoch - 1ms/step
Epoch 8/200
7035/7035 - 9s - loss: 3769.0085 - 9s/epoch - 1ms/step
Epoch 9/200
7035/7035 - 9s - loss: 3727.6345 - 9s/epoch - 1ms/step
Epoch 10/200
7035/7035 - 9s - loss: 3694.5974 - 9s/epoch - 1ms/step
Epoch 11/200
7035/7035 - 9s - loss: 3660.1780 - 9s/epoch - 1ms/step
Epoch 12/200
7035/7035 - 9s - loss: 3629.1357 - 9s/epoch - 1ms/step
Epoch 13/200
7035/7035 - 9s - loss: 3596.8088 - 9s/epoch - 1ms/step
Epoch 14/200
7035/7035 - 9s - loss: 3571.6650 - 9s/epoch - 1ms/step
Epoch 15/200
7035/7035 - 9s - loss: 3550.2805 - 9s

Epoch 121/200
7035/7035 - 9s - loss: 2937.6172 - 9s/epoch - 1ms/step
Epoch 122/200
7035/7035 - 9s - loss: 2936.3652 - 9s/epoch - 1ms/step
Epoch 123/200
7035/7035 - 10s - loss: 2934.9956 - 10s/epoch - 1ms/step
Epoch 124/200
7035/7035 - 9s - loss: 2933.1931 - 9s/epoch - 1ms/step
Epoch 125/200
7035/7035 - 9s - loss: 2931.3489 - 9s/epoch - 1ms/step
Epoch 126/200
7035/7035 - 9s - loss: 2929.1499 - 9s/epoch - 1ms/step
Epoch 127/200
7035/7035 - 8s - loss: 2928.2024 - 8s/epoch - 1ms/step
Epoch 128/200
7035/7035 - 9s - loss: 2925.6812 - 9s/epoch - 1ms/step
Epoch 129/200
7035/7035 - 9s - loss: 2923.8752 - 9s/epoch - 1ms/step
Epoch 130/200
7035/7035 - 9s - loss: 2922.7271 - 9s/epoch - 1ms/step
Epoch 131/200
7035/7035 - 9s - loss: 2920.9902 - 9s/epoch - 1ms/step
Epoch 132/200
7035/7035 - 9s - loss: 2919.2017 - 9s/epoch - 1ms/step
Epoch 133/200
7035/7035 - 9s - loss: 2918.1833 - 9s/epoch - 1ms/step
Epoch 134/200
7035/7035 - 9s - loss: 2916.7302 - 9s/epoch - 1ms/step
Epoch 135/200
7035/7035 - 9s - l

<keras.callbacks.History at 0x2361a40b9d0>

In [20]:
model = Sequential()
model.add(Dense(1024, input_dim=X.shape[1], activation='sigmoid')) # Hidden 1
model.add(Dense(1)) # Output
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X_train,y_train,verbose=2,epochs=200)

Epoch 1/200
7035/7035 - 12s - loss: 13450.7998 - 12s/epoch - 2ms/step
Epoch 2/200
7035/7035 - 12s - loss: 4320.0547 - 12s/epoch - 2ms/step
Epoch 3/200
7035/7035 - 11s - loss: 3829.9514 - 11s/epoch - 2ms/step
Epoch 4/200
7035/7035 - 11s - loss: 3659.9304 - 11s/epoch - 2ms/step
Epoch 5/200
7035/7035 - 11s - loss: 3557.1680 - 11s/epoch - 2ms/step
Epoch 6/200
7035/7035 - 12s - loss: 3474.6519 - 12s/epoch - 2ms/step
Epoch 7/200
7035/7035 - 12s - loss: 3399.6448 - 12s/epoch - 2ms/step
Epoch 8/200
7035/7035 - 11s - loss: 3347.1772 - 11s/epoch - 2ms/step
Epoch 9/200
7035/7035 - 12s - loss: 3300.0647 - 12s/epoch - 2ms/step
Epoch 10/200
7035/7035 - 12s - loss: 3262.6790 - 12s/epoch - 2ms/step
Epoch 11/200
7035/7035 - 11s - loss: 3227.6304 - 11s/epoch - 2ms/step
Epoch 12/200
7035/7035 - 12s - loss: 3188.9280 - 12s/epoch - 2ms/step
Epoch 13/200
7035/7035 - 12s - loss: 3159.9932 - 12s/epoch - 2ms/step
Epoch 14/200
7035/7035 - 11s - loss: 3126.9636 - 11s/epoch - 2ms/step
Epoch 15/200
7035/7035 - 11s

Epoch 118/200
7035/7035 - 11s - loss: 1878.5967 - 11s/epoch - 2ms/step
Epoch 119/200
7035/7035 - 12s - loss: 1871.1234 - 12s/epoch - 2ms/step
Epoch 120/200
7035/7035 - 11s - loss: 1866.7911 - 11s/epoch - 2ms/step
Epoch 121/200
7035/7035 - 12s - loss: 1860.7711 - 12s/epoch - 2ms/step
Epoch 122/200
7035/7035 - 12s - loss: 1858.3892 - 12s/epoch - 2ms/step
Epoch 123/200
7035/7035 - 11s - loss: 1853.6613 - 11s/epoch - 2ms/step
Epoch 124/200
7035/7035 - 11s - loss: 1849.2887 - 11s/epoch - 2ms/step
Epoch 125/200
7035/7035 - 12s - loss: 1846.3876 - 12s/epoch - 2ms/step
Epoch 126/200
7035/7035 - 10s - loss: 1841.6141 - 10s/epoch - 1ms/step
Epoch 127/200
7035/7035 - 12s - loss: 1837.1270 - 12s/epoch - 2ms/step
Epoch 128/200
7035/7035 - 11s - loss: 1834.8226 - 11s/epoch - 2ms/step
Epoch 129/200
7035/7035 - 11s - loss: 1830.3282 - 11s/epoch - 2ms/step
Epoch 130/200
7035/7035 - 12s - loss: 1827.4651 - 12s/epoch - 2ms/step
Epoch 131/200
7035/7035 - 11s - loss: 1822.0067 - 11s/epoch - 2ms/step
Epoch 

<keras.callbacks.History at 0x23682311f40>

In [21]:
model = Sequential()
model.add(Dense(1024, input_dim=X.shape[1], activation='relu')) # Hidden 1
model.add(Dense(1)) # Output
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X_train,y_train,verbose=2,epochs=200)

Epoch 1/200
7035/7035 - 11s - loss: 10195.2451 - 11s/epoch - 2ms/step
Epoch 2/200
7035/7035 - 10s - loss: 3645.5032 - 10s/epoch - 1ms/step
Epoch 3/200
7035/7035 - 11s - loss: 3466.7197 - 11s/epoch - 2ms/step
Epoch 4/200
7035/7035 - 11s - loss: 3364.5652 - 11s/epoch - 2ms/step
Epoch 5/200
7035/7035 - 10s - loss: 3292.9092 - 10s/epoch - 1ms/step
Epoch 6/200
7035/7035 - 11s - loss: 3236.7368 - 11s/epoch - 2ms/step
Epoch 7/200
7035/7035 - 11s - loss: 3200.2544 - 11s/epoch - 2ms/step
Epoch 8/200
7035/7035 - 11s - loss: 3155.8411 - 11s/epoch - 2ms/step
Epoch 9/200
7035/7035 - 11s - loss: 3121.8486 - 11s/epoch - 2ms/step
Epoch 10/200
7035/7035 - 10s - loss: 3089.0898 - 10s/epoch - 1ms/step
Epoch 11/200
7035/7035 - 11s - loss: 3059.3984 - 11s/epoch - 2ms/step
Epoch 12/200
7035/7035 - 11s - loss: 3031.6018 - 11s/epoch - 2ms/step
Epoch 13/200
7035/7035 - 10s - loss: 3001.0259 - 10s/epoch - 1ms/step
Epoch 14/200
7035/7035 - 11s - loss: 2971.3350 - 11s/epoch - 2ms/step
Epoch 15/200
7035/7035 - 10s

Epoch 118/200
7035/7035 - 12s - loss: 2121.5801 - 12s/epoch - 2ms/step
Epoch 119/200
7035/7035 - 11s - loss: 2113.5837 - 11s/epoch - 2ms/step
Epoch 120/200
7035/7035 - 11s - loss: 2113.4150 - 11s/epoch - 2ms/step
Epoch 121/200
7035/7035 - 11s - loss: 2111.3262 - 11s/epoch - 2ms/step
Epoch 122/200
7035/7035 - 11s - loss: 2113.8418 - 11s/epoch - 2ms/step
Epoch 123/200
7035/7035 - 13s - loss: 2105.4077 - 13s/epoch - 2ms/step
Epoch 124/200
7035/7035 - 11s - loss: 2104.4043 - 11s/epoch - 2ms/step
Epoch 125/200
7035/7035 - 11s - loss: 2099.3447 - 11s/epoch - 2ms/step
Epoch 126/200
7035/7035 - 10s - loss: 2104.9299 - 10s/epoch - 1ms/step
Epoch 127/200
7035/7035 - 11s - loss: 2096.3359 - 11s/epoch - 2ms/step
Epoch 128/200
7035/7035 - 11s - loss: 2094.5234 - 11s/epoch - 1ms/step
Epoch 129/200
7035/7035 - 10s - loss: 2091.0471 - 10s/epoch - 1ms/step
Epoch 130/200
7035/7035 - 12s - loss: 2091.7908 - 12s/epoch - 2ms/step
Epoch 131/200
7035/7035 - 13s - loss: 2091.8560 - 13s/epoch - 2ms/step
Epoch 

<keras.callbacks.History at 0x23682191730>

In [22]:
model = Sequential()
model.add(Dense(64, input_dim=X.shape[1], activation='relu')) # Hidden 1
model.add(Dense(1)) # Output
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X_train,y_train,verbose=2,epochs=200)

Epoch 1/200
7035/7035 - 12s - loss: 26455.8555 - 12s/epoch - 2ms/step
Epoch 2/200
7035/7035 - 10s - loss: 5274.8022 - 10s/epoch - 1ms/step
Epoch 3/200
7035/7035 - 11s - loss: 4261.0352 - 11s/epoch - 2ms/step
Epoch 4/200
7035/7035 - 10s - loss: 3936.7229 - 10s/epoch - 1ms/step
Epoch 5/200
7035/7035 - 10s - loss: 3749.1316 - 10s/epoch - 1ms/step
Epoch 6/200
7035/7035 - 10s - loss: 3647.2864 - 10s/epoch - 1ms/step
Epoch 7/200
7035/7035 - 11s - loss: 3590.4834 - 11s/epoch - 2ms/step
Epoch 8/200
7035/7035 - 11s - loss: 3556.6465 - 11s/epoch - 2ms/step
Epoch 9/200
7035/7035 - 9s - loss: 3521.3059 - 9s/epoch - 1ms/step
Epoch 10/200
7035/7035 - 11s - loss: 3499.1289 - 11s/epoch - 2ms/step
Epoch 11/200
7035/7035 - 11s - loss: 3480.6760 - 11s/epoch - 2ms/step
Epoch 12/200
7035/7035 - 10s - loss: 3466.2617 - 10s/epoch - 1ms/step
Epoch 13/200
7035/7035 - 9s - loss: 3451.5889 - 9s/epoch - 1ms/step
Epoch 14/200
7035/7035 - 10s - loss: 3438.4595 - 10s/epoch - 1ms/step
Epoch 15/200
7035/7035 - 10s - l

Epoch 119/200
7035/7035 - 11s - loss: 3057.5134 - 11s/epoch - 2ms/step
Epoch 120/200
7035/7035 - 11s - loss: 3055.9756 - 11s/epoch - 2ms/step
Epoch 121/200
7035/7035 - 11s - loss: 3057.0928 - 11s/epoch - 2ms/step
Epoch 122/200
7035/7035 - 11s - loss: 3052.8071 - 11s/epoch - 1ms/step
Epoch 123/200
7035/7035 - 11s - loss: 3050.9351 - 11s/epoch - 1ms/step
Epoch 124/200
7035/7035 - 9s - loss: 3049.2507 - 9s/epoch - 1ms/step
Epoch 125/200
7035/7035 - 8s - loss: 3045.4131 - 8s/epoch - 1ms/step
Epoch 126/200
7035/7035 - 10s - loss: 3041.4539 - 10s/epoch - 1ms/step
Epoch 127/200
7035/7035 - 11s - loss: 3040.1975 - 11s/epoch - 2ms/step
Epoch 128/200
7035/7035 - 9s - loss: 3037.9966 - 9s/epoch - 1ms/step
Epoch 129/200
7035/7035 - 10s - loss: 3037.2812 - 10s/epoch - 1ms/step
Epoch 130/200
7035/7035 - 11s - loss: 3033.7393 - 11s/epoch - 2ms/step
Epoch 131/200
7035/7035 - 10s - loss: 3031.3689 - 10s/epoch - 1ms/step
Epoch 132/200
7035/7035 - 10s - loss: 3027.8345 - 10s/epoch - 1ms/step
Epoch 133/20

<keras.callbacks.History at 0x2368a38cc70>

In [23]:
pred = model.predict(X_test)
print("Shape: {}".format(pred.shape))
print(pred[:10])

Shape: (75039, 1)
[[115.86597 ]
 [667.745   ]
 [ 65.76815 ]
 [ 34.61377 ]
 [ 63.594482]
 [ 52.665   ]
 [620.89655 ]
 [592.5945  ]
 [ 31.84356 ]
 [ 48.964943]]


In [24]:
# Measure RMSE error.  RMSE is common for regression.
score = np.sqrt(metrics.mean_squared_error(pred,y_test))
print(f"Final score (RMSE): {score}")

Final score (RMSE): 53.47551727294922


In [26]:
# Sample predictions
for i in range(20):
    print(f"{i+1}. Flight: {flights[i]}, Price: {y[i]}, predicted price: {pred[i]}")

1. Flight: SG-8709, Price: [65.48], predicted price: [115.86597]
2. Flight: SG-8157, Price: [65.48], predicted price: [667.745]
3. Flight: I5-764, Price: [65.52], predicted price: [65.76815]
4. Flight: UK-995, Price: [65.5], predicted price: [34.61377]
5. Flight: UK-963, Price: [65.5], predicted price: [63.594482]
6. Flight: UK-945, Price: [65.5], predicted price: [52.665]
7. Flight: UK-927, Price: [66.66], predicted price: [620.89655]
8. Flight: UK-951, Price: [66.66], predicted price: [592.5945]
9. Flight: G8-334, Price: [65.49], predicted price: [31.84356]
10. Flight: G8-336, Price: [65.49], predicted price: [48.964943]
11. Flight: G8-392, Price: [65.49], predicted price: [524.3798]
12. Flight: G8-338, Price: [65.49], predicted price: [617.03625]
13. Flight: 6E-5001, Price: [65.5], predicted price: [506.98474]
14. Flight: 6E-6202, Price: [65.5], predicted price: [55.171577]
15. Flight: 6E-549, Price: [65.5], predicted price: [51.1165]
16. Flight: 6E-6278, Price: [65.5], predicted pr

In [33]:
print("Length before price outliers dropped: {}".format(len(df)))
remove_outliers(df,'price',2) #method call to method defined above
print("Length after price outliers dropped: {}".format(len(df)))

X,y = to_xy(df,"price")

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

from sklearn.preprocessing import StandardScaler 

sc = StandardScaler()
sc.fit(X_train)
X_train= sc.transform(X_train)
X_test = sc.transform(X_test)

Length before price outliers dropped: 289222
Length after price outliers dropped: 274248


In [34]:
print(X.shape[1])

model = Sequential()
model.add(Dense(64, input_dim=X.shape[1], activation='relu')) # Hidden 1
model.add(Dense(64,activation='relu')) #Hidden 2
model.add(Dense(1)) # Output
model.compile(loss='mean_squared_error', optimizer='adam')
model.summary()

model.fit(X_train,y_train,verbose=2,epochs=250)

#With test data
pred = model.predict(X_test)
score = np.sqrt(metrics.mean_squared_error(pred,y_test))
print(f"Final score (RMSE): {score}")

9
Model: "sequential_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_26 (Dense)            (None, 64)                640       
                                                                 
 dense_27 (Dense)            (None, 64)                4160      
                                                                 
 dense_28 (Dense)            (None, 1)                 65        
                                                                 
Total params: 4,865
Trainable params: 4,865
Non-trainable params: 0
_________________________________________________________________
Epoch 1/250
6428/6428 - 11s - loss: 3881.3350 - 11s/epoch - 2ms/step
Epoch 2/250
6428/6428 - 10s - loss: 1757.6669 - 10s/epoch - 2ms/step
Epoch 3/250
6428/6428 - 9s - loss: 1692.5818 - 9s/epoch - 1ms/step
Epoch 4/250
6428/6428 - 8s - loss: 1645.8766 - 8s/epoch - 1ms/step
Epoch 5/250
6428/6428 - 9s - loss: 1613.6

Epoch 108/250
6428/6428 - 9s - loss: 1067.1707 - 9s/epoch - 1ms/step
Epoch 109/250
6428/6428 - 9s - loss: 1067.5165 - 9s/epoch - 1ms/step
Epoch 110/250
6428/6428 - 9s - loss: 1065.4056 - 9s/epoch - 1ms/step
Epoch 111/250
6428/6428 - 10s - loss: 1060.8121 - 10s/epoch - 1ms/step
Epoch 112/250
6428/6428 - 9s - loss: 1063.0514 - 9s/epoch - 1ms/step
Epoch 113/250
6428/6428 - 9s - loss: 1060.8599 - 9s/epoch - 1ms/step
Epoch 114/250
6428/6428 - 9s - loss: 1060.7736 - 9s/epoch - 1ms/step
Epoch 115/250
6428/6428 - 9s - loss: 1060.5800 - 9s/epoch - 1ms/step
Epoch 116/250
6428/6428 - 10s - loss: 1058.4027 - 10s/epoch - 1ms/step
Epoch 117/250
6428/6428 - 10s - loss: 1057.2717 - 10s/epoch - 2ms/step
Epoch 118/250
6428/6428 - 9s - loss: 1056.1503 - 9s/epoch - 1ms/step
Epoch 119/250
6428/6428 - 9s - loss: 1054.7465 - 9s/epoch - 1ms/step
Epoch 120/250
6428/6428 - 9s - loss: 1052.7675 - 9s/epoch - 1ms/step
Epoch 121/250
6428/6428 - 9s - loss: 1053.5519 - 9s/epoch - 1ms/step
Epoch 122/250
6428/6428 - 9s

Epoch 227/250
6428/6428 - 9s - loss: 978.4210 - 9s/epoch - 1ms/step
Epoch 228/250
6428/6428 - 9s - loss: 980.1567 - 9s/epoch - 1ms/step
Epoch 229/250
6428/6428 - 9s - loss: 978.8787 - 9s/epoch - 1ms/step
Epoch 230/250
6428/6428 - 9s - loss: 979.2774 - 9s/epoch - 1ms/step
Epoch 231/250
6428/6428 - 9s - loss: 978.3887 - 9s/epoch - 1ms/step
Epoch 232/250
6428/6428 - 9s - loss: 974.4634 - 9s/epoch - 1ms/step
Epoch 233/250
6428/6428 - 9s - loss: 976.3077 - 9s/epoch - 1ms/step
Epoch 234/250
6428/6428 - 9s - loss: 976.4410 - 9s/epoch - 1ms/step
Epoch 235/250
6428/6428 - 9s - loss: 974.0327 - 9s/epoch - 1ms/step
Epoch 236/250
6428/6428 - 9s - loss: 976.4469 - 9s/epoch - 1ms/step
Epoch 237/250
6428/6428 - 9s - loss: 972.3333 - 9s/epoch - 1ms/step
Epoch 238/250
6428/6428 - 9s - loss: 972.7818 - 9s/epoch - 1ms/step
Epoch 239/250
6428/6428 - 9s - loss: 971.9177 - 9s/epoch - 1ms/step
Epoch 240/250
6428/6428 - 9s - loss: 970.9288 - 9s/epoch - 1ms/step
Epoch 241/250
6428/6428 - 10s - loss: 972.0451 -