In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn import tree
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import cross_val_score,RandomizedSearchCV,RepeatedKFold
from sklearn.linear_model import Ridge,Lasso, LassoCV,LinearRegression
from xgboost import XGBRegressor
from model_functions import label_encoding,training_model,nn_models
from sklearn.model_selection import train_test_split
dataset=pd.read_excel("dataset.xlsx")

[NbConvertApp] Converting notebook model_functions.ipynb to script
[NbConvertApp] Writing 2362 bytes to model_functions.py


In [2]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9376 entries, 0 to 9375
Data columns (total 27 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Destination      9376 non-null   object 
 1   Origin           9376 non-null   object 
 2   To Area          9376 non-null   object 
 3   Flight Date      9376 non-null   object 
 4   day_name         9376 non-null   object 
 5   flight_month     9376 non-null   object 
 6   season           9376 non-null   object 
 7   year             9376 non-null   int64  
 8   Flight Code      9376 non-null   object 
 9   Days             9376 non-null   int64  
 10  Airline Company  9376 non-null   object 
 11  dpt              9376 non-null   object 
 12  Block            9376 non-null   int64  
 13  Sold             9376 non-null   int64  
 14  Left             9376 non-null   int64  
 15  Occ.(%)          9376 non-null   int64  
 16  dpt1             9376 non-null   object 
 17  Block1        

In [3]:
dataset["Origin"].value_counts()

Moscow             5443
S.Petersburg       1275
Kazan               637
Mineralnye Vodi     266
Chelyabinsk         211
Samara              191
Perm                183
Ekaterinburg        182
Kaliningrad         178
Rostov-na-Donu      137
Sochi               117
Ufa                 112
Tyumen               70
Novosibirsk          62
N.Novgorod           61
Krasnodar            44
Arkhangelsk          40
Voronezh             37
Belgorod             35
Saratov              30
Omsk                 26
Orenburg             15
Volgograd            14
Syktyvkar             3
Stavropol             3
Surgut                3
Nizhnekamsk           1
Name: Origin, dtype: int64

In [4]:
df_top_origins = dataset.groupby("Origin").size().reset_index(name="count")
df_top_origins = df_top_origins.sort_values("count", ascending=False).head(3)

In [5]:
df_top_origins

Unnamed: 0,Origin,count
8,Moscow,5443
16,S.Petersburg,1275
5,Kazan,637


In [6]:
top_origins = df_top_origins["Origin"].tolist()
dataset_top_origins = dataset[dataset["Origin"].isin(top_origins)]
dataset_top_origins

Unnamed: 0,Destination,Origin,To Area,Flight Date,day_name,flight_month,season,year,Flight Code,Days,...,Block1,Sold1,Left1,Occ.(%)1,Occ.,Netto,Netto Currency,Profit,prıce,day_convert
8,Turkey,Moscow,Antalya,10.11.2020,Tuesday,November,Fall,2020,U6 3001,2,...,220,220,0,100,30.00,148.00,EUR,-20.01,127.99,2020-11-10
9,Turkey,Moscow,Antalya,10.11.2020,Tuesday,November,Fall,2020,RL 7703,2,...,330,330,0,100,20.30,144.00,EUR,-21.77,122.23,2020-11-10
10,Turkey,Moscow,Antalya,11.11.2020,Wednesday,November,Fall,2020,RL 7705,3,...,330,330,0,100,42.73,145.00,EUR,-42.08,102.92,2020-11-11
11,Turkey,Moscow,Antalya,11.11.2020,Wednesday,November,Fall,2020,U6 3001,3,...,220,220,0,100,32.73,148.00,EUR,-39.18,108.82,2020-11-11
12,Turkey,Moscow,Antalya,12.11.2020,Thursday,November,Fall,2020,U6 3001,4,...,220,218,2,99,58.64,149.00,EUR,-35.64,113.36,2020-11-12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9371,Turkey,S.Petersburg,Antalya,16.07.2022,Saturday,July,Summer,2022,TK 3673,6,...,65,65,0,100,100.00,377.18,EUR,420.95,798.13,2022-07-16
9372,Turkey,S.Petersburg,Antalya,16.07.2022,Saturday,July,Summer,2022,TK 3961,6,...,46,46,0,100,100.00,343.18,EUR,398.44,741.62,2022-07-16
9373,Turkey,S.Petersburg,Antalya,17.07.2022,Sunday,July,Summer,2022,TK 1234,7,...,34,34,0,100,100.00,370.00,USD,512.82,882.82,2022-07-17
9374,Turkey,S.Petersburg,Antalya,17.07.2022,Sunday,July,Summer,2022,TK 3657,7,...,54,54,0,100,100.00,377.18,EUR,467.51,844.69,2022-07-17


In [7]:
dataset.head()

Unnamed: 0,Destination,Origin,To Area,Flight Date,day_name,flight_month,season,year,Flight Code,Days,...,Block1,Sold1,Left1,Occ.(%)1,Occ.,Netto,Netto Currency,Profit,prıce,day_convert
0,Turkey,Belgorod,Antalya,02.01.2020,Thursday,January,Winter,2020,WZ 4035.,4,...,220,151,69,69,95.45,177.0,EUR,61.73,238.73,2020-01-02
1,Turkey,Belgorod,Antalya,09.01.2020,Thursday,January,Winter,2020,WZ 4035.,4,...,220,220,0,100,0.0,174.0,EUR,,174.0,2020-01-09
2,Turkey,Chelyabinsk,Antalya,02.01.2020,Thursday,January,Winter,2020,U6 1009,4,...,220,181,39,82,97.73,253.0,EUR,58.35,311.35,2020-01-02
3,Turkey,Chelyabinsk,Antalya,10.01.2020,Friday,January,Winter,2020,U6 1009,5,...,220,217,3,99,0.0,236.0,EUR,,236.0,2020-01-10
4,Turkey,Chelyabinsk,Antalya,15.03.2020,Sunday,March,Spring,2020,WZ 4009,7,...,220,1,219,0,99.09,251.0,EUR,-50.69,200.31,2020-03-15


In [8]:
top3=dataset[dataset["Origin"].isin(["Moscow", "S.Petersburg", "Kazan"])].copy()

In [9]:
top3.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7355 entries, 8 to 9375
Data columns (total 27 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Destination      7355 non-null   object 
 1   Origin           7355 non-null   object 
 2   To Area          7355 non-null   object 
 3   Flight Date      7355 non-null   object 
 4   day_name         7355 non-null   object 
 5   flight_month     7355 non-null   object 
 6   season           7355 non-null   object 
 7   year             7355 non-null   int64  
 8   Flight Code      7355 non-null   object 
 9   Days             7355 non-null   int64  
 10  Airline Company  7355 non-null   object 
 11  dpt              7355 non-null   object 
 12  Block            7355 non-null   int64  
 13  Sold             7355 non-null   int64  
 14  Left             7355 non-null   int64  
 15  Occ.(%)          7355 non-null   int64  
 16  dpt1             7355 non-null   object 
 17  Block1        

In [10]:
label_encoding(dataset, top3, 'Destination')
label_encoding(dataset, top3, 'Origin')
label_encoding(dataset, top3, 'To Area')
label_encoding(dataset, top3, 'day_name')
label_encoding(dataset, top3, 'flight_month')

label_encoding(dataset, top3, 'season')
label_encoding(dataset, top3, 'Netto Currency')
label_encoding(dataset, top3, 'Flight Code')

label_encoding(dataset, top3, 'Airline Company')
label_encoding(dataset, top3, 'Flight Date')

label_encoding(dataset, top3, 'dpt')
label_encoding(dataset, top3, 'dpt1')

In [11]:
dataset

Unnamed: 0,Destination,Origin,To Area,Flight Date,day_name,flight_month,season,year,Flight Code,Days,...,Block1,Sold1,Left1,Occ.(%)1,Occ.,Netto,Netto Currency,Profit,prıce,day_convert
0,0,0,0,0,0,0,0,2020,0,4,...,220,151,69,69,95.45,177.00,0,61.73,238.73,2020-01-02
1,0,0,0,1,0,0,0,2020,0,4,...,220,220,0,100,0.00,174.00,0,,174.00,2020-01-09
2,0,1,0,0,0,0,0,2020,1,4,...,220,181,39,82,97.73,253.00,0,58.35,311.35,2020-01-02
3,0,1,0,2,1,0,0,2020,1,5,...,220,217,3,99,0.00,236.00,0,,236.00,2020-01-10
4,0,1,0,3,2,1,1,2020,2,7,...,220,1,219,0,99.09,251.00,0,-50.69,200.31,2020-03-15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9371,0,5,0,496,6,9,3,2022,152,6,...,65,65,0,100,100.00,377.18,0,420.95,798.13,2022-07-16
9372,0,5,0,496,6,9,3,2022,149,6,...,46,46,0,100,100.00,343.18,0,398.44,741.62,2022-07-16
9373,0,5,0,497,2,9,3,2022,151,7,...,34,34,0,100,100.00,370.00,1,512.82,882.82,2022-07-17
9374,0,5,0,497,2,9,3,2022,148,7,...,54,54,0,100,100.00,377.18,0,467.51,844.69,2022-07-17


In [12]:
top3

Unnamed: 0,Destination,Origin,To Area,Flight Date,day_name,flight_month,season,year,Flight Code,Days,...,Block1,Sold1,Left1,Occ.(%)1,Occ.,Netto,Netto Currency,Profit,prıce,day_convert
8,0,4,0,7,4,2,2,2020,5,2,...,220,220,0,100,30.00,148.00,0,-20.01,127.99,2020-11-10
9,0,4,0,7,4,2,2,2020,6,2,...,330,330,0,100,20.30,144.00,0,-21.77,122.23,2020-11-10
10,0,4,0,8,5,2,2,2020,7,3,...,330,330,0,100,42.73,145.00,0,-42.08,102.92,2020-11-11
11,0,4,0,8,5,2,2,2020,5,3,...,220,220,0,100,32.73,148.00,0,-39.18,108.82,2020-11-11
12,0,4,0,9,0,2,2,2020,5,4,...,220,218,2,99,58.64,149.00,0,-35.64,113.36,2020-11-12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9371,0,5,0,496,6,9,3,2022,152,6,...,65,65,0,100,100.00,377.18,0,420.95,798.13,2022-07-16
9372,0,5,0,496,6,9,3,2022,149,6,...,46,46,0,100,100.00,343.18,0,398.44,741.62,2022-07-16
9373,0,5,0,497,2,9,3,2022,151,7,...,34,34,0,100,100.00,370.00,1,512.82,882.82,2022-07-17
9374,0,5,0,497,2,9,3,2022,148,7,...,54,54,0,100,100.00,377.18,0,467.51,844.69,2022-07-17


In [13]:
top3["Origin"].value_counts()

4     5443
5     1275
10     637
Name: Origin, dtype: int64

In [14]:
top3["Profit"]=top3["Profit"].fillna(top3["Profit"].median())
top3 = top3.drop(['day_convert'], axis=1)

In [15]:
top3

Unnamed: 0,Destination,Origin,To Area,Flight Date,day_name,flight_month,season,year,Flight Code,Days,...,dpt1,Block1,Sold1,Left1,Occ.(%)1,Occ.,Netto,Netto Currency,Profit,prıce
8,0,4,0,7,4,2,2,2020,5,2,...,0,220,220,0,100,30.00,148.00,0,-20.01,127.99
9,0,4,0,7,4,2,2,2020,6,2,...,1,330,330,0,100,20.30,144.00,0,-21.77,122.23
10,0,4,0,8,5,2,2,2020,7,3,...,2,330,330,0,100,42.73,145.00,0,-42.08,102.92
11,0,4,0,8,5,2,2,2020,5,3,...,1,220,220,0,100,32.73,148.00,0,-39.18,108.82
12,0,4,0,9,0,2,2,2020,5,4,...,0,220,218,2,99,58.64,149.00,0,-35.64,113.36
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9371,0,5,0,496,6,9,3,2022,152,6,...,12,65,65,0,100,100.00,377.18,0,420.95,798.13
9372,0,5,0,496,6,9,3,2022,149,6,...,26,46,46,0,100,100.00,343.18,0,398.44,741.62
9373,0,5,0,497,2,9,3,2022,151,7,...,47,34,34,0,100,100.00,370.00,1,512.82,882.82
9374,0,5,0,497,2,9,3,2022,148,7,...,188,54,54,0,100,100.00,377.18,0,467.51,844.69


In [16]:
X = top3.drop(['prıce'], axis=1)
y = top3.iloc[:, -1:]

In [17]:
X

Unnamed: 0,Destination,Origin,To Area,Flight Date,day_name,flight_month,season,year,Flight Code,Days,...,Occ.(%),dpt1,Block1,Sold1,Left1,Occ.(%)1,Occ.,Netto,Netto Currency,Profit
8,0,4,0,7,4,2,2,2020,5,2,...,30,0,220,220,0,100,30.00,148.00,0,-20.01
9,0,4,0,7,4,2,2,2020,6,2,...,20,1,330,330,0,100,20.30,144.00,0,-21.77
10,0,4,0,8,5,2,2,2020,7,3,...,43,2,330,330,0,100,42.73,145.00,0,-42.08
11,0,4,0,8,5,2,2,2020,5,3,...,33,1,220,220,0,100,32.73,148.00,0,-39.18
12,0,4,0,9,0,2,2,2020,5,4,...,59,0,220,218,2,99,58.64,149.00,0,-35.64
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9371,0,5,0,496,6,9,3,2022,152,6,...,100,12,65,65,0,100,100.00,377.18,0,420.95
9372,0,5,0,496,6,9,3,2022,149,6,...,100,26,46,46,0,100,100.00,343.18,0,398.44
9373,0,5,0,497,2,9,3,2022,151,7,...,100,47,34,34,0,100,100.00,370.00,1,512.82
9374,0,5,0,497,2,9,3,2022,148,7,...,100,188,54,54,0,100,100.00,377.18,0,467.51


In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y.values.ravel(), test_size=0.33, random_state=42)

In [19]:
xgb = XGBRegressor()

param_grid = {
    "learning_rate": [0.0001,0.001, 0.01, 0.1, 1] ,
    "max_depth": range(3,21,3),
    "gamma": [i/10.0 for i in range(0,5)],
    "colsample_bytree": [i/10.0 for i in range(3,10)],
    "reg_alpha": [1e-5, 1e-2, 0.1, 1, 10, 100],
    "reg_lambda": [1e-5, 1e-2, 0.1, 1, 10, 100]
}

xgb_cv = RandomizedSearchCV(xgb, param_grid, n_iter = 100, refit='recall',
         scoring='neg_mean_squared_error', cv = 5, verbose=2, random_state=42, n_jobs = -1) 
xgb_cv.fit(X_train, y_train)
xgb_reg = XGBRegressor(**xgb_cv.best_params_ )

Fitting 5 folds for each of 100 candidates, totalling 500 fits


In [20]:
rf2 = RandomForestRegressor(n_estimators = 25, max_depth = 30,min_samples_split=5,random_state = 42)
DT_Regressor = tree.DecisionTreeRegressor()
ridge = Ridge(alpha=1.0)
lasso_cv = LassoCV(alphas = None, cv = 10, max_iter = 100000, normalize = True)
lr = LinearRegression()

In [21]:
a=pd.read_excel("sample2.xlsx")

In [23]:
label_encoding(dataset, a, 'Destination')
label_encoding(dataset, a, 'Origin')
label_encoding(dataset, a, 'To Area')
label_encoding(dataset, a, 'day_name')
label_encoding(dataset, a, 'flight_month')

label_encoding(dataset, a, 'season')
label_encoding(dataset, a, 'Netto Currency')
label_encoding(dataset, a, 'Flight Code')

label_encoding(dataset, a, 'Airline Company')
label_encoding(dataset, a, 'Flight Date')

label_encoding(dataset, a, 'dpt')
label_encoding(dataset, a, 'dpt1')

In [26]:
sample2 = a.drop(['prıce'], axis=1) 
sample2

Unnamed: 0,Destination,Origin,To Area,Flight Date,day_name,flight_month,season,year,Flight Code,Days,...,Occ.(%),dpt1,Block1,Sold1,Left1,Occ.(%)1,Occ.,Netto,Netto Currency,Profit
0,1,27,1,776,7,12,4,2021,180,3,...,65,211,220,212,8,96,65.45,149.0,3,-32.53
1,1,28,1,777,8,13,5,2022,181,2,...,71,212,189,189,0,100,70.9,246.0,4,-42.98
2,1,29,1,778,9,14,6,2022,182,7,...,100,213,58,58,0,100,100.0,475.82,4,562.33
3,1,30,1,779,10,12,4,2022,183,6,...,100,214,14,14,0,100,100.0,660.36,4,647.46
4,1,31,1,780,11,15,4,2021,184,5,...,100,215,163,163,0,100,100.0,356.36,4,318.2
5,1,32,1,781,12,16,5,2022,185,1,...,99,216,478,8,470,2,99.39,162.0,4,81.5
6,1,33,1,782,9,15,4,2022,186,7,...,100,217,474,0,474,0,99.58,150.0,4,153.87
7,1,34,1,783,9,15,4,2022,187,7,...,99,218,328,322,6,98,99.39,162.0,3,22.08
8,1,35,1,784,7,17,5,2022,180,3,...,96,219,110,82,28,75,95.8,200.0,4,75.78
9,1,36,1,785,9,17,5,2022,188,7,...,23,220,235,235,0,100,23.4,259.0,4,68.88


In [27]:
models = { 'Random forest': rf2,'Decision tree': DT_Regressor,'Ridge regression': ridge,
          'Lasso regression': lasso_cv,'Linear regression': lr,'XGBOOST': xgb_reg}

In [28]:
model_outputs = {}
sample_predicted={}

for i, (key, model) in enumerate(models.items()):
    model, score, mse, rmse, mae = training_model(X_train, X_test, y_train, y_test, model=model)
    model_outputs[key] = {'R-squared': score, 'MSE': mse, 'RMSE': rmse, 'MAE': mae}
    sample_predicted[key]=model.predict(sample2)

R^squared: 0.9987514277671024
MSE:  117.65203362385581
RMSE:  10.846752215472419
MAE: 4.978441350264454
R^squared: 0.9974295249805388
MSE:  242.2139508237242
RMSE:  15.563224306798517
MAE: 6.869522240527185
R^squared: 0.9968226647227327
MSE:  299.3979419258536
RMSE:  17.30311942760188
MAE: 5.58951349321485


If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), Lasso())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * np.sqrt(n_samples). 


R^squared: 0.9968006828216976
MSE:  301.4692801244445
RMSE:  17.362870733966904
MAE: 5.091592632343726
R^squared: 0.9968222405793103
MSE:  299.437908645346
RMSE:  17.304274288318073
MAE: 5.629307122470763
R^squared: 0.9992935642090611
MSE:  66.56691958922674
RMSE:  8.158855286694742
MAE: 3.940400027443197


In [29]:
model_outputs = pd.DataFrame(model_outputs)
model_outputs

Unnamed: 0,Random forest,Decision tree,Ridge regression,Lasso regression,Linear regression,XGBOOST
R-squared,0.998751,0.99743,0.996823,0.996801,0.996822,0.999294
MSE,117.652034,242.213951,299.397942,301.46928,299.437909,66.56692
RMSE,10.846752,15.563224,17.303119,17.362871,17.304274,8.158855
MAE,4.978441,6.869522,5.589513,5.091593,5.629307,3.9404


In [30]:
sample_predicted = pd.DataFrame(sample_predicted)

In [31]:
real_pred=pd.concat([a[['prıce']],sample_predicted], axis=1)
real_pred

Unnamed: 0,prıce,Random forest,Decision tree,Ridge regression,Lasso regression,Linear regression,XGBOOST
0,116.47,38.300011,116.47,99.261441,103.799635,-2353779000.0,29.115944
1,203.02,87.123828,167.55,186.65877,193.603085,-2353779000.0,86.264008
2,1038.15,938.562356,939.03,1030.336735,1035.121781,-2353778000.0,977.204102
3,1307.82,1318.548076,1314.24,1296.640466,1302.571228,-2353778000.0,1290.212769
4,674.56,673.286967,674.56,668.256102,672.724355,-2353779000.0,688.287231
5,243.5,242.687705,234.4,233.751177,244.352905,-2353779000.0,248.760849
6,303.87,299.028446,285.05,294.758844,303.605691,-2353779000.0,309.032898
7,184.08,186.170716,185.83,175.642728,186.497301,-2353779000.0,204.022141
8,275.78,274.53187,273.0,263.824455,275.261326,-2353779000.0,295.093506
9,327.88,303.184909,327.88,279.613018,291.524752,-2353779000.0,334.27948


In [32]:
from tensorflow import keras
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from tensorflow.keras import optimizers
from tensorflow.keras.utils import plot_model
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential, Model
from tensorflow.python.keras.layers.convolutional import Conv1D, MaxPooling1D
from tensorflow.python.keras.layers import Dense, LSTM, RepeatVector, Flatten,SimpleRNN

In [33]:
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [34]:
print('Train shape', X_train_scaled.shape)
print('Test shape', X_test_scaled.shape)

Train shape (4927, 25)
Test shape (2428, 25)


In [35]:
# input [samples, time steps, features] for LSTM
X_train_items = np.reshape(X_train_scaled, (X_train_scaled.shape[0], X_train_scaled.shape[1], 1))
X_test_items = np.reshape(X_test_scaled, (X_test_scaled.shape[0], X_test_scaled.shape[1], 1))

In [36]:
from sklearn.neural_network import MLPRegressor
mlp = MLPRegressor(hidden_layer_sizes=(500, 50), max_iter=2000, random_state=42)

In [37]:
modellstm = Sequential()
modellstm.add(LSTM(50, input_shape=(X_train_items.shape[1], 1)))
modellstm.add(Dense(1))
modellstm.compile(optimizer='adam', loss='mean_squared_error')

In [38]:
rnn = Sequential()
rnn.add(SimpleRNN(units=20, input_shape=(X_train_items.shape[1], X_train_items.shape[2])))
rnn.add(Dense(1))
rnn.compile(loss='mean_squared_error', optimizer='adam')

In [39]:
models = { 'MLP': mlp,'LSTM': modellstm,'RNN': rnn}

In [40]:
model_outputs_nn = {}
sample_predicted_nn={}

for i, (key, model) in enumerate(models.items()):
    if key == 'MLP':
        model, score, mse, rmse, mae=training_model(X_train_scaled, X_test_scaled, y_train, y_test ,model)
        model_outputs_nn[key] = {'R-squared': score, 'MSE': mse, 'RMSE': rmse, 'MAE': mae}
        sample_data = scaler.transform(sample2)
        sample_predicted_nn[key]=model.predict(sample_data)
    else:
        model, score, mse, rmse, mae =nn_models(X_train_scaled, X_test_scaled, y_train, y_test, model, epoch_num=200, batch_size=16)
        model_outputs_nn[key] = {'R-squared': score, 'MSE': mse, 'RMSE': rmse, 'MAE': mae}
        sample_predicted_nn[key]=model.predict(sample2)

R^squared: 0.9975828911598803
MSE:  227.76236971914375
RMSE:  15.091798094300882
MAE: 7.591903280305099
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70

Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155/200
Epoch 156/200
Epoch 157/200
Epoch 158/200
Epoch 159/200
Epoch 160/200
Epoch 161/200
Epoch 162/200
Epoch 163/200
Epoch 164/200

Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200
R^squared: -0.0009475101287657051
MSE:  94318.54002077722
RMSE:  307.11323647927844
MAE: 243.06929278856032
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
E

Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epo

Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200
R^squared: 0.8033933005254702
MSE:  18526.10318232954
RMSE:  136.1106284693798
MAE: 68.98714605717913


In [41]:
model_outputs_nn = pd.DataFrame(model_outputs_nn)
model_outputs_nn

Unnamed: 0,MLP,LSTM,RNN
R-squared,0.997583,-0.000948,0.803393
MSE,227.76237,94318.540021,18526.103182
RMSE,15.091798,307.113236,136.110628
MAE,7.591903,243.069293,68.987146


In [42]:
sample_predicted_nn

{'MLP': array([ 384.68259604,  497.44326105, 1151.54403004, 1383.98691321,
         876.71807175,  549.27587741,  604.33105714,  513.99141176,
         626.00839478,  591.55272244,  484.65806155]),
 'LSTM': array([[184.67273],
        [105.33587],
        [253.33878],
        [271.4123 ],
        [233.25691],
        [275.00385],
        [237.07637],
        [399.91672],
        [297.56808],
        [293.1799 ],
        [ 42.00238]], dtype=float32),
 'RNN': array([[-660.2687 ],
        [-672.1963 ],
        [ 837.3159 ],
        [ 837.3159 ],
        [ 837.3159 ],
        [ 837.3159 ],
        [ 837.3159 ],
        [ 919.31177],
        [ 837.3159 ],
        [ 837.3159 ],
        [-672.2483 ]], dtype=float32)}

In [43]:
sample_predicted_nn['LSTM'] = sample_predicted_nn['LSTM'].flatten()
sample_predicted_nn['RNN'] = sample_predicted_nn['RNN'].flatten()

In [44]:
sample_predicted_nn = pd.DataFrame(sample_predicted_nn)
real_pred_nn=pd.concat([a[['prıce']],sample_predicted_nn], axis=1)
real_pred_nn

Unnamed: 0,prıce,MLP,LSTM,RNN
0,116.47,384.682596,184.672729,-660.268677
1,203.02,497.443261,105.335869,-672.196289
2,1038.15,1151.54403,253.338776,837.315918
3,1307.82,1383.986913,271.412292,837.315918
4,674.56,876.718072,233.256912,837.315918
5,243.5,549.275877,275.003845,837.315918
6,303.87,604.331057,237.07637,837.315918
7,184.08,513.991412,399.916718,919.311768
8,275.78,626.008395,297.568085,837.315918
9,327.88,591.552722,293.179901,837.315918
