In [72]:
import logging

import numpy as np
import pandas as pd
import tensorflow as tf
import seaborn as sns
import talib
from binance.enums import HistoricalKlinesType
from matplotlib import pyplot as plt
from sklearn import metrics, preprocessing
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from main_funcs import get_and_update_data

In [73]:
# configs
country=None
joft = "ETHUSDT"
interval = "4h"

# remove small candle 
zero_remover_pct = 0.002

# add history of open_close_pct 
open_close_pct_history_count = 20

# add history of max_low_high_pct 
max_low_high_pct_history_count = -1

# calculate rsi with different lengths 
rsi_ranges = range(14, 15)

# add history of rsi
rsi_history_count = -1

# add moving average of rsi
rsi_ma_lengths = [3,7,9,10,14,20,50,100]
rsi_ma_ranges = []


In [74]:
# validation config

# valid rsi_ma_ranges
for r in rsi_ma_ranges:
    if not r in rsi_ranges:
        raise ValueError("rsi_ma_ranges items is not in rsi_ranges")
    

In [75]:
# get data from database
df_source = get_and_update_data(joft, interval, HistoricalKlinesType.SPOT, country=country)
df_source = df_source[["open", "high", "low", "close", "volume"]]
print(df_source.shape)
df_source.tail(1)



(13664, 5)


Unnamed: 0_level_0,open,high,low,close,volume
open_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-11-14 03:30:00,2053.65,2058.47,2031.55,2055.54,61237.6987


In [76]:
# calculate parameters
df_cal = df_source.copy()

# change from open to other columns
df_cal['open_close'] = df_cal.close - df_cal.open
df_cal['open_low'] = df_cal.low - df_cal.open
df_cal['open_high'] = df_cal.high - df_cal.open

# change percent from open to other columns
df_cal['open_close_pct'] = df_cal.open_close / df_cal.open
df_cal['open_low_pct'] = df_cal.open_low / df_cal.open
df_cal['open_high_pct'] = df_cal.open_high / df_cal.open

# price direction
# df_target['open_close_direction'] = np.sign(df_target["open_close"])
# df_target['open_open_high_direction'] = np.sign(df_target["open_low"]).astype(int)
# df_target['open_high_direction'] = np.sign(df_target["open_high"]).astype(int)

# max abs low and high
df_cal['max_low_high_pct'] = np.where(
    df_cal['open_low_pct'].abs() > df_cal['open_high_pct'].abs(),
    df_cal['open_low_pct'],
    df_cal['open_high_pct']
)

# open_close_pct history
for h in range(0, open_close_pct_history_count + 1):
    df_cal[f"open_close_pct__{h}"] = df_cal.open_close_pct.shift(h)

# max_low_high_pct history
for h in range(0, max_low_high_pct_history_count + 1):
    df_cal[f"max_low_high_pct__{h}"] = df_cal.max_low_high_pct.shift(h)

# calculate multi rsi
for rsi in rsi_ranges:
    for h in range(0, rsi_history_count + 1):
        df_cal[f"rsi_{rsi}__{h}"] = talib.RSI(df_cal.close.shift(h), timeperiod=rsi)

# calculate rsi sma 
for ma_length in rsi_ma_lengths:
    for rsi in rsi_ma_ranges:
        df_cal[f"rsi_{rsi}_ma_{ma_length}"] = talib.SMA(df_cal[f"rsi_{rsi}__{0}"], timeperiod=ma_length)
df_cal


Unnamed: 0_level_0,open,high,low,close,volume,open_close,open_low,open_high,open_close_pct,open_low_pct,...,open_close_pct__11,open_close_pct__12,open_close_pct__13,open_close_pct__14,open_close_pct__15,open_close_pct__16,open_close_pct__17,open_close_pct__18,open_close_pct__19,open_close_pct__20
open_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-08-17 07:30:00,301.13,307.96,298.00,307.96,1561.95305,6.83,-3.13,6.83,0.022681,-0.010394,...,,,,,,,,,,
2017-08-17 11:30:00,307.95,312.00,307.00,308.95,1177.71088,1.00,-0.95,4.05,0.003247,-0.003085,...,,,,,,,,,,
2017-08-17 15:30:00,308.95,310.51,303.56,307.06,1882.05267,-1.89,-5.39,1.56,-0.006117,-0.017446,...,,,,,,,,,,
2017-08-17 19:30:00,307.74,312.18,298.21,301.60,1208.05192,-6.14,-9.53,4.44,-0.019952,-0.030968,...,,,,,,,,,,
2017-08-17 23:30:00,301.60,310.85,299.01,302.00,1200.94182,0.40,-2.59,9.25,0.001326,-0.008588,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-11-13 11:30:00,2048.15,2061.42,2038.80,2046.73,44731.03360,-1.42,-9.35,13.27,-0.000693,-0.004565,...,0.005098,0.001251,0.004124,-0.015031,-0.004919,-0.000938,-0.003405,-0.002075,-0.009217,-0.000160
2023-11-13 15:30:00,2046.73,2107.42,2039.16,2105.24,143108.44630,58.51,-7.57,60.69,0.028587,-0.003699,...,0.003748,0.005098,0.001251,0.004124,-0.015031,-0.004919,-0.000938,-0.003405,-0.002075,-0.009217
2023-11-13 19:30:00,2105.23,2118.00,2065.35,2100.00,93610.46060,-5.23,-39.88,12.77,-0.002484,-0.018943,...,-0.010878,0.003748,0.005098,0.001251,0.004124,-0.015031,-0.004919,-0.000938,-0.003405,-0.002075
2023-11-13 23:30:00,2100.01,2102.72,2043.47,2053.65,82395.16270,-46.36,-56.54,2.71,-0.022076,-0.026924,...,-0.000847,-0.010878,0.003748,0.005098,0.001251,0.004124,-0.015031,-0.004919,-0.000938,-0.003405


In [77]:
# create calculate target
df_target = df_cal.copy()
mabna = 'open_close_pct'

# calculate target
# df_target["target"] = df_target[mabna]
df_target["target"] = df_target[mabna]


# add log
# df_target["target"] = np.log(df_target["target"])

# shift target the real place
df_target["target"] = df_target["target"].shift(-1)
df_target

Unnamed: 0_level_0,open,high,low,close,volume,open_close,open_low,open_high,open_close_pct,open_low_pct,...,open_close_pct__12,open_close_pct__13,open_close_pct__14,open_close_pct__15,open_close_pct__16,open_close_pct__17,open_close_pct__18,open_close_pct__19,open_close_pct__20,target
open_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-08-17 07:30:00,301.13,307.96,298.00,307.96,1561.95305,6.83,-3.13,6.83,0.022681,-0.010394,...,,,,,,,,,,0.003247
2017-08-17 11:30:00,307.95,312.00,307.00,308.95,1177.71088,1.00,-0.95,4.05,0.003247,-0.003085,...,,,,,,,,,,-0.006117
2017-08-17 15:30:00,308.95,310.51,303.56,307.06,1882.05267,-1.89,-5.39,1.56,-0.006117,-0.017446,...,,,,,,,,,,-0.019952
2017-08-17 19:30:00,307.74,312.18,298.21,301.60,1208.05192,-6.14,-9.53,4.44,-0.019952,-0.030968,...,,,,,,,,,,0.001326
2017-08-17 23:30:00,301.60,310.85,299.01,302.00,1200.94182,0.40,-2.59,9.25,0.001326,-0.008588,...,,,,,,,,,,-0.001987
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-11-13 11:30:00,2048.15,2061.42,2038.80,2046.73,44731.03360,-1.42,-9.35,13.27,-0.000693,-0.004565,...,0.001251,0.004124,-0.015031,-0.004919,-0.000938,-0.003405,-0.002075,-0.009217,-0.000160,0.028587
2023-11-13 15:30:00,2046.73,2107.42,2039.16,2105.24,143108.44630,58.51,-7.57,60.69,0.028587,-0.003699,...,0.005098,0.001251,0.004124,-0.015031,-0.004919,-0.000938,-0.003405,-0.002075,-0.009217,-0.002484
2023-11-13 19:30:00,2105.23,2118.00,2065.35,2100.00,93610.46060,-5.23,-39.88,12.77,-0.002484,-0.018943,...,0.003748,0.005098,0.001251,0.004124,-0.015031,-0.004919,-0.000938,-0.003405,-0.002075,-0.022076
2023-11-13 23:30:00,2100.01,2102.72,2043.47,2053.65,82395.16270,-46.36,-56.54,2.71,-0.022076,-0.026924,...,-0.010878,0.003748,0.005098,0.001251,0.004124,-0.015031,-0.004919,-0.000938,-0.003405,0.000920


In [78]:
# clean data
df = df_target.copy()
df.dropna(inplace=True)
df.drop(columns=['open', 'high', 'low', 'close', 'volume', 'open_close', 'open_low', 'open_high',
                 'open_close_pct', 'open_low_pct', 'open_high_pct', 'max_low_high_pct'], inplace=True)
df

Unnamed: 0_level_0,open_close_pct__0,open_close_pct__1,open_close_pct__2,open_close_pct__3,open_close_pct__4,open_close_pct__5,open_close_pct__6,open_close_pct__7,open_close_pct__8,open_close_pct__9,...,open_close_pct__12,open_close_pct__13,open_close_pct__14,open_close_pct__15,open_close_pct__16,open_close_pct__17,open_close_pct__18,open_close_pct__19,open_close_pct__20,target
open_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-08-20 15:30:00,0.000339,0.006757,0.014272,0.010366,0.001067,0.016179,0.025765,-0.026124,-0.015372,-0.000307,...,-0.029177,0.005029,0.016058,-0.001987,0.001326,-0.019952,-0.006117,0.003247,0.022681,0.009421
2017-08-20 19:30:00,0.009421,0.000339,0.006757,0.014272,0.010366,0.001067,0.016179,0.025765,-0.026124,-0.015372,...,-0.016633,-0.029177,0.005029,0.016058,-0.001987,0.001326,-0.019952,-0.006117,0.003247,0.004129
2017-08-20 23:30:00,0.004129,0.009421,0.000339,0.006757,0.014272,0.010366,0.001067,0.016179,0.025765,-0.026124,...,-0.005077,-0.016633,-0.029177,0.005029,0.016058,-0.001987,0.001326,-0.019952,-0.006117,0.002508
2017-08-21 03:30:00,0.002508,0.004129,0.009421,0.000339,0.006757,0.014272,0.010366,0.001067,0.016179,0.025765,...,-0.000307,-0.005077,-0.016633,-0.029177,0.005029,0.016058,-0.001987,0.001326,-0.019952,0.061561
2017-08-21 07:30:00,0.061561,0.002508,0.004129,0.009421,0.000339,0.006757,0.014272,0.010366,0.001067,0.016179,...,-0.015372,-0.000307,-0.005077,-0.016633,-0.029177,0.005029,0.016058,-0.001987,0.001326,0.044255
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-11-13 07:30:00,0.004118,-0.002421,-0.002162,-0.003889,0.004689,-0.004962,0.003086,-0.000847,-0.010878,0.003748,...,0.004124,-0.015031,-0.004919,-0.000938,-0.003405,-0.002075,-0.009217,-0.000160,0.050913,-0.000693
2023-11-13 11:30:00,-0.000693,0.004118,-0.002421,-0.002162,-0.003889,0.004689,-0.004962,0.003086,-0.000847,-0.010878,...,0.001251,0.004124,-0.015031,-0.004919,-0.000938,-0.003405,-0.002075,-0.009217,-0.000160,0.028587
2023-11-13 15:30:00,0.028587,-0.000693,0.004118,-0.002421,-0.002162,-0.003889,0.004689,-0.004962,0.003086,-0.000847,...,0.005098,0.001251,0.004124,-0.015031,-0.004919,-0.000938,-0.003405,-0.002075,-0.009217,-0.002484
2023-11-13 19:30:00,-0.002484,0.028587,-0.000693,0.004118,-0.002421,-0.002162,-0.003889,0.004689,-0.004962,0.003086,...,0.003748,0.005098,0.001251,0.004124,-0.015031,-0.004919,-0.000938,-0.003405,-0.002075,-0.022076


In [79]:
# create x and y
x = df.iloc[:, 0:-1]
y = df.iloc[:, -1]

In [80]:
# clean x
from sklearn.preprocessing import MinMaxScaler
# mms = MinMaxScaler(feature_range=(0,1))
# x = mms.fit_transform(x)

In [81]:
# clean y
# y = pd.get_dummies(y)

In [82]:
# create test and train
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=1398)
print(x_test.shape)
print(y_test.shape)

(3411, 21)
(3411,)


In [83]:
# create model
model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(units=50, return_sequences=True,input_shape=(x_test.shape[1],1)),
    tf.keras.layers.Dropout(0.2),

    tf.keras.layers.LSTM(units=50, return_sequences=True),
    tf.keras.layers.Dropout(0.2),

    tf.keras.layers.LSTM(units=50, return_sequences=True),
    tf.keras.layers.Dropout(0.2),

    tf.keras.layers.LSTM(units=50, ),
    tf.keras.layers.Dropout(0.2),

    tf.keras.layers.Dense(units=1),
])
model.compile(optimizer=tf.keras.optimizers.Adam(0.001), loss=tf.keras.losses.CategoricalCrossentropy(), metrics=[tf.keras.metrics.CategoricalAccuracy()])
model.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test), batch_size=200)


Epoch 1/10


  return dispatch_target(*args, **kwargs)




  return dispatch_target(*args, **kwargs)


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x26ccd8d5290>

In [84]:
y_pred = model.predict(x_test)
y_pred



array([[-4.9817850e-06],
       [ 1.8647146e-05],
       [-3.0447522e-05],
       ...,
       [ 2.6493694e-04],
       [ 2.7052154e-05],
       [ 9.1659873e-05]], dtype=float32)

In [85]:
# ??????????????????????????

In [86]:
df_last = df_target.copy()
df_last.drop(columns=['open', 'high', 'low', 'close', 'volume', 'open_close', 'open_low', 'open_high',
                 'open_close_pct', 'open_low_pct', 'open_high_pct', 'max_low_high_pct'], inplace=True)
df_last=df_last.iloc[-1:,:]
df_last

Unnamed: 0_level_0,open_close_pct__0,open_close_pct__1,open_close_pct__2,open_close_pct__3,open_close_pct__4,open_close_pct__5,open_close_pct__6,open_close_pct__7,open_close_pct__8,open_close_pct__9,...,open_close_pct__12,open_close_pct__13,open_close_pct__14,open_close_pct__15,open_close_pct__16,open_close_pct__17,open_close_pct__18,open_close_pct__19,open_close_pct__20,target
open_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-11-14 03:30:00,0.00092,-0.022076,-0.002484,0.028587,-0.000693,0.004118,-0.002421,-0.002162,-0.003889,0.004689,...,-0.000847,-0.010878,0.003748,0.005098,0.001251,0.004124,-0.015031,-0.004919,-0.000938,


In [89]:
y_last_pred = model.predict(df_last.iloc[:,0:-1])
y_last_pred



array([[-1.2127607e-05]], dtype=float32)

In [88]:
y_last_pred