In [139]:
import pandas as pd
import numpy as np
import math
import datetime as dt
from sklearn.metrics import mean_squared_error, mean_absolute_error, explained_variance_score, r2_score 
from sklearn.metrics import mean_poisson_deviance, mean_gamma_deviance, accuracy_score
from sklearn.preprocessing import MinMaxScaler

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM, GRU

from itertools import cycle

# ! pip install plotly
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

In [142]:
bist100 = pd.read_csv("../src/daily_data/Si_230101_240831.csv", sep=";")
bist100.head()

Unnamed: 0,<TICKER>,<PER>,<DATE>,<TIME>,<OPEN>,<HIGH>,<LOW>,<CLOSE>,<VOL>
0,Si,D,230103,0,70105,72280,69873,72236,920700
1,Si,D,230104,0,72227,72600,71310,72530,846368
2,Si,D,230105,0,72510,72844,71675,72563,708928
3,Si,D,230106,0,72530,72707,71560,72108,848286
4,Si,D,230109,0,72100,72100,70001,70350,1354959


In [143]:
bist100.rename(columns={col: col.replace("<", "").replace(">", "") for col in bist100.columns}, inplace= True)
bist100.head()

Unnamed: 0,TICKER,PER,DATE,TIME,OPEN,HIGH,LOW,CLOSE,VOL
0,Si,D,230103,0,70105,72280,69873,72236,920700
1,Si,D,230104,0,72227,72600,71310,72530,846368
2,Si,D,230105,0,72510,72844,71675,72563,708928
3,Si,D,230106,0,72530,72707,71560,72108,848286
4,Si,D,230109,0,72100,72100,70001,70350,1354959


In [145]:
bist100['DATE'] = df["DATE"] = pd.to_datetime(bist100["DATE"], format="%y%m%d")
bist100.head()

Unnamed: 0,TICKER,PER,DATE,TIME,OPEN,HIGH,LOW,CLOSE,VOL
0,Si,D,2023-01-03,0,70105,72280,69873,72236,920700
1,Si,D,2023-01-04,0,72227,72600,71310,72530,846368
2,Si,D,2023-01-05,0,72510,72844,71675,72563,708928
3,Si,D,2023-01-06,0,72530,72707,71560,72108,848286
4,Si,D,2023-01-09,0,72100,72100,70001,70350,1354959


In [146]:
bist100.sort_values(by='DATE', inplace=True)
bist100.head()

Unnamed: 0,TICKER,PER,DATE,TIME,OPEN,HIGH,LOW,CLOSE,VOL
0,Si,D,2023-01-03,0,70105,72280,69873,72236,920700
1,Si,D,2023-01-04,0,72227,72600,71310,72530,846368
2,Si,D,2023-01-05,0,72510,72844,71675,72563,708928
3,Si,D,2023-01-06,0,72530,72707,71560,72108,848286
4,Si,D,2023-01-09,0,72100,72100,70001,70350,1354959


In [147]:
bist100.shape

(423, 9)

In [150]:
closedf = bist100[['DATE','CLOSE']]
print("Shape of close dataframe:", closedf.shape)

Shape of close dataframe: (423, 2)


In [151]:
close_stock = closedf.copy()
del closedf['DATE']
scaler=MinMaxScaler(feature_range=(0,1))
closedf=scaler.fit_transform(np.array(closedf).reshape(-1,1))
print(closedf.shape)

(423, 1)


In [152]:
training_size=int(len(closedf)*0.65)
test_size=len(closedf)-training_size
train_data,test_data=closedf[0:training_size,:],closedf[training_size:len(closedf),:1]
print("train_data: ", train_data.shape)
print("test_data: ", test_data.shape)

train_data:  (274, 1)
test_data:  (149, 1)


In [153]:
def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), 0]   ###i=0, 0,1,2,3-----99   100 
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
    return np.array(dataX), np.array(dataY)

In [154]:
# reshape into X=t,t+1,t+2,t+3 and Y=t+4
time_step = 15
X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)

print("X_train: ", X_train.shape)
print("y_train: ", y_train.shape)
print("X_test: ", X_test.shape)
print("y_test", y_test.shape)

X_train:  (258, 15)
y_train:  (258,)
X_test:  (133, 15)
y_test (133,)


In [155]:
# LSTM
# reshape input to be [samples, time steps, features] which is required for LSTM
X_train =X_train.reshape(X_train.shape[0],X_train.shape[1] , 1)
X_test = X_test.reshape(X_test.shape[0],X_test.shape[1] , 1)

print("X_train: ", X_train.shape)
print("X_test: ", X_test.shape)

X_train:  (258, 15, 1)
X_test:  (133, 15, 1)


In [156]:
tf.keras.backend.clear_session()
model=Sequential()
model.add(LSTM(32,return_sequences=True,input_shape=(time_step,1)))
model.add(LSTM(32,return_sequences=True))
model.add(LSTM(32))
model.add(Dense(1))
model.compile(loss='mean_squared_error',optimizer='adam')




  super().__init__(**kwargs)


In [157]:
model.summary()

In [158]:
model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=200,batch_size=5,verbose=1)

Epoch 1/200
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 20ms/step - loss: 0.1011 - val_loss: 0.0027
Epoch 2/200
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.0046 - val_loss: 0.0026
Epoch 3/200
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 0.0047 - val_loss: 0.0052
Epoch 4/200
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.0041 - val_loss: 0.0044
Epoch 5/200
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 0.0041 - val_loss: 0.0024
Epoch 6/200
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - loss: 0.0048 - val_loss: 0.0026
Epoch 7/200
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 0.0037 - val_loss: 0.0024
Epoch 8/200
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 0.0037 - val_loss: 0.0043
Epoch 9/200
[1m52/52[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x2805e23e610>

In [159]:
### Lets Do the prediction and check performance metrics
train_predict=model.predict(X_train)
test_predict=model.predict(X_test)
train_predict.shape, test_predict.shape

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 78ms/step
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 


((258, 1), (133, 1))

In [160]:
# Transform back to original form

train_predict = scaler.inverse_transform(train_predict)
test_predict = scaler.inverse_transform(test_predict)
original_ytrain = scaler.inverse_transform(y_train.reshape(-1,1)) 
original_ytest = scaler.inverse_transform(y_test.reshape(-1,1))

In [161]:
# Evaluation metrices RMSE and MAE
print("Train data RMSE: ", math.sqrt(mean_squared_error(original_ytrain,train_predict)))
print("Train data MSE: ", mean_squared_error(original_ytrain,train_predict))
print("Test data MAE: ", mean_absolute_error(original_ytrain,train_predict))
print("-------------------------------------------------------------------------------------")
print("Test data RMSE: ", math.sqrt(mean_squared_error(original_ytest,test_predict)))
print("Test data MSE: ", mean_squared_error(original_ytest,test_predict))
print("Test data MAE: ", mean_absolute_error(original_ytest,test_predict))

Train data RMSE:  1061.483886307057
Train data MSE:  1126748.040889533
Test data MAE:  827.6585816375969
-------------------------------------------------------------------------------------
Test data RMSE:  1213.9917891828914
Test data MSE:  1473776.0642034775
Test data MAE:  966.530662593985


In [162]:
print("Train data explained variance regression score:", explained_variance_score(original_ytrain, train_predict))
print("Test data explained variance regression score:", explained_variance_score(original_ytest, test_predict))

Train data explained variance regression score: 0.9887905143200233
Test data explained variance regression score: 0.9163178415780527


In [163]:
print("Train data R2 score:", r2_score(original_ytrain, train_predict))
print("Test data R2 score:", r2_score(original_ytest, test_predict))

Train data R2 score: 0.9836972498865658
Test data R2 score: 0.8450759847652795


In [164]:
print("Train data MGD: ", mean_gamma_deviance(original_ytrain, train_predict))
print("Test data MGD: ", mean_gamma_deviance(original_ytest, test_predict))
print("----------------------------------------------------------------------")
print("Train data MPD: ", mean_poisson_deviance(original_ytrain, train_predict))
print("Test data MPD: ", mean_poisson_deviance(original_ytest, test_predict))

Train data MGD:  0.0001433311691433012
Test data MGD:  0.00018543470443910666
----------------------------------------------------------------------
Train data MPD:  12.658530813505351
Test data MPD:  16.519223210835765


In [167]:
# shift train predictions for plotting

look_back=time_step
trainPredictPlot = np.empty_like(closedf)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(train_predict)+look_back, :] = train_predict
print("Train predicted data: ", trainPredictPlot.shape)

# shift test predictions for plotting
testPredictPlot = np.empty_like(closedf)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(train_predict)+(look_back*2)+1:len(closedf)-1, :] = test_predict
print("Test predicted data: ", testPredictPlot.shape)

names = cycle(['Original close price','Train predicted close price','Test predicted close price'])


plotdf = pd.DataFrame({'date': close_stock['DATE'],
                       'original_close': close_stock['CLOSE'],
                      'train_predicted_close': trainPredictPlot.reshape(1,-1)[0].tolist(),
                      'test_predicted_close': testPredictPlot.reshape(1,-1)[0].tolist()})

fig = px.line(plotdf,x=plotdf['date'], y=[plotdf['original_close'],plotdf['train_predicted_close'],
                                          plotdf['test_predicted_close']],
              labels={'value':'Stock price','date': 'Date'})
fig.update_layout(title_text='Comparision between original close price vs predicted close price',
                  plot_bgcolor='white', font_size=15, font_color='black', legend_title_text='Close Price')
fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

Train predicted data:  (423, 1)
Test predicted data:  (423, 1)


ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed