In [32]:
import pandas as pd
import matplotlib.pyplot as plt

In [33]:
#pip install pandas_ta

In [34]:
import pandas_ta as ta

In [35]:
import random
import os
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import Dataset, DataLoader

In [39]:
bist_daily = pd.read_csv("datasets/bist_100_daily.csv")
# Extract the number of rows and columns by using the shape of the data.
numRows,numColumns = bist_daily.shape
# Extract the time interval.
last_date, first_date = bist_daily.iloc[0].Date, bist_daily.iloc[-1].Date
# Check the availability of the data.
na_cols = bist_daily.columns[bist_daily.isna().any()].tolist()

# Print the information.
print(f"There are {numRows} rows and {numColumns} columns in the initial dataset.")
print(f"The data represents the time frame between the dates '{last_date}' and '{first_date}'.")
if not na_cols:
    print("There are no NA rows.")
else:
    print(f"Columns in the dataset which include NA rows: {na_cols}.")
# Convert columns to numeric values
column_names = ["Price", "Open", "High", "Low"]
for column in column_names:
    bist_daily[column] = bist_daily[column].str.replace(',', '')
    bist_daily[column] = pd.to_numeric(bist_daily[column])
# CONVERT TO DATETIME FORMAT AND SORT DATA BY DATE
bist_daily.Date = pd.to_datetime(bist_daily.Date)
bist_daily.sort_values(by="Date", ignore_index=True,inplace=True)
bist_daily.set_index(pd.DatetimeIndex(bist_daily["Date"]), inplace=True)
bist_daily.rename(columns={"Price": "close"},inplace=True)
# Calculate Returns and append to the df DataFrame
# CUMLOGRET_1 and CUMPCTRET_1 are added (NaN values exists)
bist_daily.ta.log_return(cumulative=True, append=True)
bist_daily.ta.percent_return(cumulative=True, append=True)
# Returns a list of indicators and utility functions (to check in future)
ind_list = bist_daily.ta.indicators(as_list=True)
# RSI_14, MACD_12_26_9, MACDh_12_26_9 and MACDs_12_26_9 are added (NaN values exists)
bist_daily.ta.rsi(append=True)
bist_daily.ta.macd(append=True)
# SMA values are added (use ta in the future)
sma_values = [5, 10, 15] 
for i in sma_values:
    bist_daily['SMA'+str(i)] = bist_daily['close'].rolling(window=i).mean()
# Remove all NaN value rows
bist_daily.dropna(inplace=True)
bist_daily

There are 5000 rows and 7 columns in the initial dataset.
The data represents the time frame between the dates 'Dec 11, 2019' and 'Jan 04, 2000'.
There are no NA rows.


Unnamed: 0_level_0,date,close,open,high,low,Vol.,Change %,CUMLOGRET_1,CUMPCTRET_1,RSI_14,MACD_12_26_9,MACDh_12_26_9,MACDs_12_26_9,SMA5,SMA10,SMA15
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2000-02-21,2000-02-21,145.69,153.64,153.64,142.79,25.75M,-5.17%,-0.183990,-0.160515,34.352566,-7.230371,-0.392050,-6.838321,153.936,155.116,157.896000
2000-02-22,2000-02-22,139.94,145.69,147.02,137.58,29.58M,-3.95%,-0.224258,-0.199983,31.136786,-8.027940,-0.951695,-7.076245,150.206,153.060,156.330667
2000-02-23,2000-02-23,134.47,139.94,141.26,131.71,26.95M,-3.91%,-0.264130,-0.239071,28.411996,-8.997682,-1.537150,-7.460532,145.432,151.074,154.602000
2000-02-24,2000-02-24,146.52,134.47,146.52,134.14,43.02M,8.96%,-0.178310,-0.149460,40.719161,-8.693661,-0.986503,-7.707158,144.052,149.896,153.638667
2000-02-25,2000-02-25,156.18,146.52,156.32,146.52,56.16M,6.59%,-0.114462,-0.083530,48.380526,-7.585796,0.097090,-7.682886,144.560,149.916,152.803333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-12-05,2019-12-05,1086.59,1082.60,1090.32,1081.51,2.24B,0.89%,1.825328,2.894902,66.153619,16.788401,0.360019,16.428382,1076.116,1068.981,1068.217333
2019-12-06,2019-12-06,1088.69,1090.09,1093.15,1083.52,2.16B,0.19%,1.827259,2.896834,66.774984,17.187282,0.607120,16.580162,1080.046,1071.262,1070.543333
2019-12-09,2019-12-09,1087.86,1088.96,1091.25,1081.39,2.32B,-0.08%,1.826496,2.896072,66.257244,17.237719,0.526046,16.711673,1081.610,1074.666,1071.836667
2019-12-10,2019-12-10,1080.11,1089.12,1093.78,1076.18,2.07B,-0.71%,1.819347,2.888948,61.465048,16.462560,-0.199291,16.661851,1084.052,1076.694,1072.158000


In [40]:
# seeding an arbitrary number to get same results in multiple runs
manualSeed = 999
random.seed(manualSeed)
torch.manual_seed(manualSeed)
print("Seed:", manualSeed)

Seed: 999


In [41]:
# getting number of GPUs from cuda
ngpu = torch.cuda.device_count()
print("Count of available GPUs:", ngpu)

Count of available GPUs: 1


In [42]:
# printing the name of available GPUs
for i in range(ngpu):
    print("GPU {}: {}".format(i+1, torch.cuda.get_device_name(i)))

GPU 1: GeForce RTX 2060


In [43]:
# batch size for the training
batch_size = 64

# input dimension of the generator
z_dimension = 64

# optimizer rates
optimizer_betas = (0.5, 0.999)
learning_rate = 2e-4

# number of epochs
num_epochs = 150

# decide which device we want to run on
device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")

In [44]:
class TimeseriesDataset(Dataset):
    def __init__(self, data_frame, q=2):
        self.data = torch.tensor(data_frame.values)
        self.q = q

    def __len__(self):
        return self.data.shape[0] // self.q

    def __getitem__(self, index):
        return self.data[index * self.q: (index+1) * self.q]

In [45]:
# create pytorch dataset from the pandas DataFrame

# TODO: Convert change(%) and Volume columns to numeric values
columns_used_in_training = ["close", "open", "high", "low", "CUMLOGRET_1", "RSI_14", "MACD_12_26_9", "SMA5"]
train_dataset = TimeseriesDataset(bist_daily[columns_used_in_training])
# create the dataloader
dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [47]:
for i, d in enumerate(dataloader):
    print(d)

tensor([[[ 363.9000,  368.3000,  368.3000,  ...,   42.1400,   -1.3379,
           368.7940],
         [ 369.1700,  363.9000,  370.8600,  ...,   48.1462,   -1.2836,
           368.0920]],

        [[ 768.3700,  765.7000,  769.7500,  ...,   36.2612,   -2.0614,
           787.8960],
         [ 749.9800,  766.2500,  766.9000,  ...,   31.0310,   -5.6244,
           777.0100]],

        [[ 276.9800,  277.9000,  279.2900,  ...,   54.1106,    2.3398,
           275.8720],
         [ 277.4600,  276.9800,  278.1200,  ...,   54.6165,    2.2807,
           276.9120]],

        ...,

        [[ 996.7700,  979.9100,  996.7700,  ...,   73.9314,   16.3255,
           976.6300],
         [1001.4100,  997.3800, 1004.8800,  ...,   74.8189,   19.1745,
           986.0900]],

        [[1025.5600, 1012.2400, 1029.1700,  ...,   66.6837,   20.6942,
          1013.7320],
         [1034.5700, 1028.1000, 1034.5700,  ...,   68.6353,   21.9900,
          1021.0340]],

        [[ 983.3700,  944.7800,  983.3700,  ..