In [1]:
!pip install streamlit pyngrok pyspark tensorflow pycoingecko pandas numpy scikit-learn plotly

!ngrok authtoken 2oNBajSooRk2Lkdz6OAOIg5ze8P_3yNGjJemssBoYazXjxRQf

Collecting streamlit
  Downloading streamlit-1.40.0-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.1-py3-none-any.whl.metadata (8.3 kB)
Collecting pycoingecko
  Downloading pycoingecko-3.1.0-py3-none-any.whl.metadata (12 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting watchdog<6,>=2.1.5 (from streamlit)
  Downloading watchdog-5.0.3-py3-none-manylinux2014_x86_64.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.9/41.9 kB[0m [31m1.0 MB/s[0m eta [36m0:00:00[0m
Downloading streamlit-1.40.0-py2.py3-none-any.whl (8.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.6/8.6 MB[0m [31m32.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.2.1-py3-none-any.whl (22 kB)
Downloading pycoingecko-3.1.0-py3-none-any.whl (8.8 kB)
Downloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


#Export an url

In [3]:
%%writefile Crypto.py

import streamlit as st
import pandas as pd
import numpy as np
import time
import datetime
import plotly.graph_objects as go
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, LongType, DoubleType, TimestampType, FloatType
from pyspark.sql.functions import col, to_timestamp, asc
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from tensorflow.keras.models import load_model, Sequential
from tensorflow.keras.layers import Dense, LSTM, GRU, SimpleRNN, Conv1D, Dropout, Flatten, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from keras.regularizers import l2
from xgboost import XGBRegressor
from pycoingecko import CoinGeckoAPI
from google.colab import drive


cg = CoinGeckoAPI()
spark = SparkSession.builder.appName("CryptoData").getOrCreate()

coins_to_fetch = {
    'Bitcoin': 'bitcoin',
    'Coinbase Wrapped BTC': 'coinbase-wrapped-btc',
    'Wrapped Bitcoin': 'wrapped-bitcoin',
    'Solv Protocol SolvBTC': 'solv-btc',
    'Wrapped stETH': 'wrapped-steth',
    'Rocket Pool ETH': 'rocket-pool-eth',
    'Wrapped eETH': 'wrapped-eeth',
    'Mantle Staked Ether': 'mantle-staked-ether',
    'Renzo Restaked ETH': 'renzo-restaked-eth',
    'Ethereum': 'ethereum'
}

def fetch_data(coin_id, vs_currency="usd", hours_to_fetch=1439, days_per_request=30):
    chunks_needed = hours_to_fetch // (days_per_request * 24) + 1
    end_date = datetime.datetime.now()
    all_data_rdd = spark.sparkContext.emptyRDD()

    for _ in range(chunks_needed):
        start_date = end_date - datetime.timedelta(days=days_per_request)

        data = cg.get_coin_market_chart_range_by_id(
            id=coin_id,
            vs_currency=vs_currency,
            from_timestamp=int(start_date.timestamp()),
            to_timestamp=int(end_date.timestamp())
        )

        current_data = [(price_data[0], price_data[1], volume_data[1])
                        for price_data, volume_data in zip(data['prices'], data['total_volumes'])]

        current_rdd = spark.sparkContext.parallelize(current_data)
        all_data_rdd = all_data_rdd.union(current_rdd)
        end_date = start_date
        time.sleep(0.5)

    all_data_rdd = all_data_rdd.take(1439)
    all_data_rdd = spark.sparkContext.parallelize(all_data_rdd)

    def convert_timestamp(row):
        timestamp_ms = row[0]
        dt_object = datetime.datetime.fromtimestamp(timestamp_ms / 1000)
        return (dt_object, row[1], row[2])

    converted_rdd = all_data_rdd.map(convert_timestamp)

    new_schema = StructType([
        StructField("timestamp", TimestampType(), True),
        StructField("price", DoubleType(), True),
        StructField("volume", FloatType(), True)
    ])

    df_spark = spark.createDataFrame(converted_rdd, schema=new_schema)
    df_spark = df_spark.orderBy("timestamp")
    df = df_spark.toPandas()
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df.set_index("timestamp", inplace=True)
    return df

def calculate_macd(df, short_window=12, long_window=26, signal_window=9):
    df['ema_short'] = df['price'].ewm(span=short_window, adjust=False).mean()
    df['ema_long'] = df['price'].ewm(span=long_window, adjust=False).mean()
    df['macd'] = df['ema_short'] - df['ema_long']
    df['signal'] = df['macd'].ewm(span=signal_window, adjust=False).mean()
    df['change'] = df['macd'] - df['signal']
    return df

def add_indicators(df, window=20):
    df['moving_avg'] = df['price'].rolling(window=window).mean()
    df['std_dev'] = df['price'].rolling(window=window).std()
    df['upper_band'] = df['moving_avg'] + (df['price'].rolling(window=window).std() * 2)
    df['lower_band'] = df['moving_avg'] - (df['price'].rolling(window=window).std() * 2)
    df['sma20'] = df['price'].rolling(window=window).mean()
    df['sma50'] = df['price'].rolling(window=50).mean()
    return df
def calculate_signals(df_coin):
    short_window = int(0.025 * len(df_coin))
    long_window = int(0.05 * len(df_coin))
    signals = pd.DataFrame(index=df_coin.index)
    signals['signal'] = 0.0
    signals['short_ma'] = df_coin['price'].rolling(window=short_window, min_periods=1, center=False).mean()
    signals['long_ma'] = df_coin['price'].rolling(window=long_window, min_periods=1, center=False).mean()
    signals['signal'][short_window:] = np.where(signals['short_ma'][short_window:] > signals['long_ma'][short_window:], 1.0, 0.0)
    signals['positions'] = signals['signal'].diff().fillna(0)
    return signals

def buy_coin(real_movement, signal, df, initial_money=40000, max_buy=1, max_sell=1):
    starting_money = initial_money
    states_sell = []
    states_buy = []
    current_inventory = 0

    def buy(i, initial_money, current_inventory):
        shares = initial_money // real_movement[i]
        if shares < 1:
            print(
                'day %d: total balances %f, not enough money to buy a unit price %f'
                % (i, initial_money, real_movement[i])
            )
        else:
            if shares > max_buy:
                buy_units = max_buy
            else:
                buy_units = shares
            initial_money -= buy_units * real_movement[i]
            current_inventory += buy_units
            print(
                'day %d: buy %d units at price %f, total balance %f'
                % (i, buy_units, buy_units * real_movement[i], initial_money)
            )
            states_buy.append(0)
        return initial_money, current_inventory

    for i in range(real_movement.shape[0] - int(0.025 * len(df))):
        state = signal[i]
        if state == 1:
            initial_money, current_inventory = buy(i, initial_money, current_inventory)
            states_buy.append(i)
        elif state == -1:
            if current_inventory == 0:
                print('day %d: cannot sell anything, inventory 0' % (i))
            else:
                if current_inventory > max_sell:
                    sell_units = max_sell
                else:
                    sell_units = current_inventory
                current_inventory -= sell_units
                total_sell = sell_units * real_movement[i]
                initial_money += total_sell
                try:
                    invest = (
                        (real_movement[i] - real_movement[states_buy[-1]])
                        / real_movement[states_buy[-1]]
                    ) * 100
                except:
                    invest = 0
                print(
                    'day %d, sell %d units at price %f, investment %f %%, total balance %f,'
                    % (i, sell_units, total_sell, invest, initial_money)
                )
            states_sell.append(i)
    invest = ((initial_money - starting_money) / starting_money) * 100
    total_gains = initial_money - starting_money
    return states_buy, states_sell, total_gains, invest
def plot_signals(df_coin, Signals, states_buy, states_sell, coin_selection):
    close = df_coin['price']
    fig2 = go.Figure()

    fig2.add_trace(go.Scatter(
        x=df_coin.index,
        y=close,
        mode='lines',
        line=dict(color='steelblue', width=1),
        name='Price'
    ))

    fig2.add_trace(go.Scatter(
        x=df_coin.index[states_buy],
        y=close[states_buy],
        mode='markers',
        marker=dict(symbol='triangle-up', size=10, color='green'),
        name='Buying Signal'
    ))

    fig2.add_trace(go.Scatter(
        x=df_coin.index[states_sell],
        y=close[states_sell],
        mode='markers',
        marker=dict(symbol='triangle-down', size=10, color='red'),
        name='Selling Signal'
    ))

    fig2.update_layout(
        title=f'BUY / SELL INDICATORS for {selected_coin}',
        title_x=0.5,
        xaxis_title='Year',
        yaxis_title='Price',
        width=1200,
        height=500,
        legend=dict(
            x=0.01,
            y=0.99,
            bgcolor='rgba(255, 255, 255, 0.5)',
            bordercolor='rgba(0, 0, 0, 0.1)',
            borderwidth=1
        )
    )
    return fig2
def lstm(df_coin, coin_id):
    split_date = int(len(df_coin) * 0.8)
    train = df_coin.iloc[:split_date]
    test = df_coin.iloc[split_date:]
    train_processed = df_coin.iloc[:, 0:1].values
    train_processed = train_processed[0:len(train):1]
    test_processed = df_coin.iloc[:, 0:1].values
    test_processed = test_processed[len(train)-1:len(df_coin)-1:1]

    scaler = MinMaxScaler(feature_range=(-1, 1))
    train_sc = scaler.fit_transform(train_processed)
    test_sc = scaler.transform(test_processed)
    X_train = train_sc[:-1]
    y_train = train_sc[1:]
    X_test = test_sc[:-1]
    y_test = test_sc[1:]
    X_train_model = X_train.reshape(X_train.shape[0], 1, 1)
    X_test_model = X_test.reshape(X_test.shape[0], 1, 1)
    model_path = f'/content/drive/Shareddrives/Bigdata/{coin_id}_model.h5'
    lstm_model = load_model(model_path)
    y_pred_test_lstm = lstm_model.predict(X_test_model)
    y_train_pred_lstm = lstm_model.predict(X_train_model)
    lstm_y_pred_test = lstm_model.predict(X_test_model)
    lstm_y_pred_test_original = scaler.inverse_transform(lstm_y_pred_test)
    lstm_mae = mean_absolute_error(y_test, lstm_y_pred_test)
    lstm_rmse = np.sqrt(mean_squared_error(y_test, lstm_y_pred_test))
    last_price = cg.get_price(ids=coin_id, vs_currencies='usd')[coin_id]['usd']
    last_price_scaled = scaler.transform(np.array([[last_price]]))
    X_last = last_price_scaled.reshape(1, 1, 1)
    next_price_scaled = lstm_model.predict(X_last)
    next_price = scaler.inverse_transform(next_price_scaled)[0][0]

    symbol = 'triangle-up' if next_price > last_price else 'triangle-down'
    return y_test, test_processed, lstm_y_pred_test_original, next_price, symbol

st.title('Crypto Tracking')

selected_coin = st.selectbox('Select a coin:', list(coins_to_fetch.keys()))

indicator_options = {
    'Moving Average': 'moving_avg',
    'Upper Band': 'upper_band',
    'Lower Band': 'lower_band',
    'SMA 20': 'sma20',
    'SMA 50': 'sma50',
    'MACD': 'MACD',
    'Signal': 'Signal',
    'LSTM': 'LSTM'
}
selected_indicators = st.multiselect('Select indicators to display:', list(indicator_options.keys()))

if selected_coin:
    coin_id = coins_to_fetch[selected_coin]
    df = fetch_data(coin_id)
    df = add_indicators(df)

    if 'MACD' in selected_indicators:
        df = calculate_macd(df)

    fig = go.Figure()

    fig.add_trace(go.Bar(x=df.index, y=df['volume'] / 1e9, name='Volume', marker_color='silver', yaxis='y1'))
    fig.add_trace(go.Scatter(x=df.index, y=df['price'], mode='lines', name='Price', line=dict(width=1), yaxis='y2'))

    for indicator in selected_indicators:
        if indicator in ['MACD', 'Signal', 'LSTM']:
            continue
        fig.add_trace(go.Scatter(
            x=df.index, y=df[indicator_options[indicator]], mode='lines', name=indicator, line=dict(width=1), yaxis='y2'))

    fig.update_layout(
        title=f'{selected_coin} Price and Selected Indicators', title_x=0.5,
        xaxis_title="Date", yaxis=dict(title="Volume"),
        yaxis2=dict(title="Price", overlaying='y', side='right'),
        legend=dict(x=1, y=1, bgcolor='rgba(255, 255, 255, 0.5)'),
        plot_bgcolor='rgba(0, 0, 0, 0)')
    st.plotly_chart(fig)

    if 'MACD' in selected_indicators:
        fig1 = go.Figure()
        fig1.add_trace(go.Scatter(x=df.index, y=df['macd'], mode='lines', name='MACD', line=dict(color='blue', width=1)))
        fig1.add_trace(go.Scatter(x=df.index, y=df['signal'], mode='lines', name='Signal Line', line=dict(color='orange', width=1)))
        df['positive'] = df['change'].where(df['change'] > 0, 0)
        df['negative'] = df['change'].where(df['change'] < 0, 0)
        fig1.add_trace(go.Bar(x=df.index, y=df['positive'], name='Positive', marker=dict(color='green')))
        fig1.add_trace(go.Bar(x=df.index, y=df['negative'], name='Negative', marker=dict(color='red')))
        fig1.update_layout(
            title='MACD',
            title_x=0.5,
            xaxis_title='Date',
            yaxis_title='Value',
            legend_title='Indicators',
            legend=dict(bgcolor='rgba(255, 255, 255, 0.5)'),
            plot_bgcolor='rgba(0, 0, 0, 0)'
        )
        st.plotly_chart(fig1)

    if 'Signal' in selected_indicators:
        signals = calculate_signals(df)
        states_buy, states_sell, total_gains, invest = buy_coin(df['price'].values, signals['positions'].values, df)
        fig2 = plot_signals(df, signals, states_buy, states_sell, selected_coin)
        st.plotly_chart(fig2)

    if 'LSTM' in selected_indicators:
        y_test, test_processed, lstm_y_pred_test_original, next_price, symbol = lstm(df, coin_id)
        fig3 = go.Figure()
        fig3.add_trace(go.Scatter(x=np.arange(len(test_processed)), y=test_processed.flatten(), mode='lines', name='Actual Price'))
        fig3.add_trace(go.Scatter(x=np.arange(len(lstm_y_pred_test_original)), y=lstm_y_pred_test_original.flatten(), mode='lines', name='Predicted Price'))
        fig3.add_trace(go.Scatter(x=[len(y_test)], y=[next_price], mode='markers', name='Next Price', marker_symbol=symbol, marker_size=10))
        st.plotly_chart(fig3)

else:
    st.warning("Please select a coin.")


Writing Crypto.py


In [4]:
from pyngrok import ngrok
import os

public_url = ngrok.connect(8501)
print("Public URL:", public_url)
os.system("streamlit run Crypto.py &")

Public URL: NgrokTunnel: "https://69c7-35-184-124-128.ngrok-free.app" -> "http://localhost:8501"


0

#Save the Best Model



In [5]:
import pandas as pd
import numpy as np
import time
import datetime
import plotly.graph_objects as go
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, LongType, DoubleType, TimestampType, FloatType
from pyspark.sql.functions import col, to_timestamp, asc
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from tensorflow.keras.models import load_model, Sequential
from tensorflow.keras.layers import Dense, LSTM, GRU, SimpleRNN, Conv1D, Dropout, Flatten, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from keras.regularizers import l2
from xgboost import XGBRegressor
from pycoingecko import CoinGeckoAPI
from google.colab import drive


In [6]:
spark = SparkSession.builder.appName("CryptoData").getOrCreate()
cg = CoinGeckoAPI()

In [7]:
cg = CoinGeckoAPI()

def list_all_coins():
    coins = cg.get_coins_markets(vs_currency='usd', order='market_cap_desc', per_page=100, page=1)
    coins.sort(key=lambda x: x['current_price'], reverse=True)

    print("Available Coins:")
    for idx, coin in enumerate(coins[:10], start=1):
      print(f"{idx}. {coin['name']} ({coin['id']})")
    return coins

def select_coin(coins):
    try:
        choice = int(input("\nSelect a coin by entering the corresponding number: ")) - 1
        selected_coin = coins[choice]
        print(f"\nYou selected: {selected_coin['name']} ({selected_coin['id']})\n")
        return selected_coin['id']
    except (IndexError, ValueError):
        print("Invalid choice. Please select a valid number.")
        return select_coin(coins)

In [8]:
coins = list_all_coins()
coin_id = select_coin(coins)

Available Coins:
1. Coinbase Wrapped BTC (coinbase-wrapped-btc)
2. Wrapped Bitcoin (wrapped-bitcoin)
3. Bitcoin (bitcoin)
4. Solv Protocol SolvBTC (solv-btc)
5. Wrapped stETH (wrapped-steth)
6. Rocket Pool ETH (rocket-pool-eth)
7. Wrapped eETH (wrapped-eeth)
8. Mantle Staked Ether (mantle-staked-ether)
9. Renzo Restaked ETH (renzo-restaked-eth)
10. Binance-Peg WETH (binance-peg-weth)

Select a coin by entering the corresponding number: 3

You selected: Bitcoin (bitcoin)



In [9]:
vs_currency = "usd"
hours_to_fetch = 1439
days_per_request = 30

chunks_needed = hours_to_fetch // (days_per_request * 24) + 1

end_date = datetime.datetime.now()

all_data_rdd = spark.sparkContext.emptyRDD()


for _ in range(chunks_needed):
    start_date = end_date - datetime.timedelta(days=days_per_request)

    data = cg.get_coin_market_chart_range_by_id(
        id=coin_id,
        vs_currency=vs_currency,
        from_timestamp=int(start_date.timestamp()),
        to_timestamp=int(end_date.timestamp())
    )

    current_data = []
    for price_data, volume_data in zip(data['prices'], data['total_volumes']):
        current_data.append((price_data[0], price_data[1], volume_data[1]))

    current_rdd = spark.sparkContext.parallelize(current_data)

    all_data_rdd = all_data_rdd.union(current_rdd)
    end_date = start_date
    time.sleep(0.5)

all_data_rdd = all_data_rdd.take(1439)
all_data_rdd = spark.sparkContext.parallelize(all_data_rdd)

def convert_timestamp(row):
    timestamp_ms = row[0]
    dt_object = datetime.datetime.fromtimestamp(timestamp_ms / 1000)
    return (dt_object, row[1], row[2])

converted_rdd = all_data_rdd.map(convert_timestamp)
new_schema = StructType([
    StructField("timestamp", TimestampType(), True),
    StructField("price", DoubleType(), True),
    StructField("volume", FloatType(), True)
])

df_spark = spark.createDataFrame(converted_rdd, schema=new_schema)
df_spark = df_spark.orderBy(asc("timestamp"))
df_spark.show()

+--------------------+------------------+-------------+
|           timestamp|             price|       volume|
+--------------------+------------------+-------------+
|2024-09-11 15:03:...|  55973.8859302244| 3.5871486E10|
|2024-09-11 16:13:...| 56918.38810970602| 3.7152985E10|
|2024-09-11 17:14:...| 57513.11497228514| 3.7123731E10|
|2024-09-11 18:19:...|  57749.3173909131| 3.9330771E10|
|2024-09-11 19:23:...| 57571.61409552569| 3.6477575E10|
|2024-09-11 20:24:...| 57583.58942979908| 3.4973499E10|
|2024-09-11 21:08:...| 57561.05343117227|  3.802599E10|
|2024-09-11 22:05:...| 57314.02410795248|  3.857587E10|
|2024-09-11 23:04:...| 57479.54836290061| 3.6059582E10|
|2024-09-12 00:25:...|57510.798830398475|3.30129633E10|
|2024-09-12 01:05:...| 57624.35189196091|3.36252334E10|
|2024-09-12 02:02:...| 57935.45646898966| 3.6688286E10|
|2024-09-12 04:09:...| 58218.67880572494|  3.746868E10|
|2024-09-12 05:13:...| 58120.23290678855| 3.6932231E10|
|2024-09-12 06:00:...|57938.797549689305|  3.758

In [10]:
df = df_spark.toPandas()
df['timestamp'] = pd.to_datetime(df['timestamp'])
df_chg= df.set_index(['timestamp'], drop=True)
df_chg.head()

Unnamed: 0_level_0,price,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-09-11 15:03:42.377,55973.88593,35871490000.0
2024-09-11 16:13:30.253,56918.38811,37152990000.0
2024-09-11 17:14:57.475,57513.114972,37123730000.0
2024-09-11 18:19:14.598,57749.317391,39330770000.0
2024-09-11 19:23:47.078,57571.614096,36477580000.0


In [11]:
split_date = int(len(df) * 0.8)
df1 = df_chg['price']
train = df1.iloc[:split_date]
test = df1.iloc[split_date:]

In [12]:
train_processed = df_chg.iloc[:, 0:1].values
train_processed = train_processed[0:len(train):1]
test_processed = df_chg.iloc[:, 0:1].values
test_processed = test_processed[len(train)-1:len(df)-1:1]

scaler = MinMaxScaler(feature_range=(-1, 1))
train_sc = scaler.fit_transform(train_processed)
test_sc = scaler.transform(test_processed)

X_train = train_sc[:-1]
y_train = train_sc[1:]

X_test = test_sc[:-1]
y_test = test_sc[1:]

In [13]:
X_train_model = X_train.reshape(X_train.shape[0], 1, 1)
X_test_model = X_test.reshape(X_test.shape[0], 1, 1)

print('Train shape: ', X_train_model.shape)
print('Test shape: ', X_test_model.shape)

Train shape:  (1150, 1, 1)
Test shape:  (287, 1, 1)


In [14]:
lstm_model = Sequential()
lstm_model.add(LSTM(7, input_shape=(1, X_train_model.shape[1]), activation='relu', kernel_initializer='lecun_uniform', return_sequences=False))
lstm_model.add(Dense(1))
lstm_model.compile(loss='mean_squared_error', optimizer='adam')
early_stop = EarlyStopping(monitor='loss', patience=2, verbose=1)
history_lstm_model = lstm_model.fit(X_train_model, y_train, epochs=100, batch_size=1, verbose=1, shuffle=False, callbacks=[early_stop])

Epoch 1/100


  super().__init__(**kwargs)


[1m1150/1150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.1379
Epoch 2/100
[1m1150/1150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.0214
Epoch 3/100
[1m1150/1150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.0066
Epoch 4/100
[1m1150/1150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 0.0049
Epoch 5/100
[1m1150/1150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 0.0040
Epoch 6/100
[1m1150/1150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - loss: 0.0035
Epoch 7/100
[1m1150/1150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - loss: 0.0031
Epoch 8/100
[1m1150/1150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - loss: 0.0028
Epoch 9/100
[1m1150/1150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - loss: 0.0026
Epoch 10/100
[1m1150/1150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s

In [15]:
lstm_y_pred_test = lstm_model.predict(X_test_model)
lstm_mae = mean_absolute_error(y_test, lstm_y_pred_test)
lstm_rmse = np.sqrt(mean_squared_error(y_test, lstm_y_pred_test))

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step


In [16]:
gru_model = Sequential()

gru_model.add(GRU(units=40, return_sequences=True, input_shape=(X_train_model.shape[1], 1), kernel_regularizer=l2(0.01)))
gru_model.add(Dropout(0.2))
gru_model.add(BatchNormalization())

gru_model.add(GRU(units=40, return_sequences=True, kernel_regularizer=l2(0.01)))
gru_model.add(Dropout(0.2))
gru_model.add(BatchNormalization())

gru_model.add(GRU(units=40, kernel_regularizer=l2(0.01)))
gru_model.add(Dropout(0.2))
gru_model.add(BatchNormalization())

gru_model.add(Dense(units=40, activation='relu'))
gru_model.add(Dropout(0.2))

gru_model.add(Dense(units=1))

gru_model.compile(optimizer='adam', loss='mean_squared_error')
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

gru_model.fit(
    X_train_model, y_train,
    epochs=100,
    batch_size=64,
    verbose=1,
    validation_data=(X_test_model, y_test),
    callbacks=[early_stopping]
)

  super().__init__(**kwargs)


Epoch 1/100
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 63ms/step - loss: 2.2250 - val_loss: 2.5002
Epoch 2/100
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 1.4260 - val_loss: 2.3797
Epoch 3/100
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 1.2124 - val_loss: 2.2654
Epoch 4/100
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 1.0962 - val_loss: 2.1796
Epoch 5/100
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 1.0034 - val_loss: 2.1093
Epoch 6/100
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.9132 - val_loss: 2.0470
Epoch 7/100
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.8115 - val_loss: 1.9921
Epoch 8/100
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 0.7421 - val_loss: 1.9574
Epoch 9/100
[1m18/18[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x7ebedcff0a60>

In [17]:
y_pred_test_gru = gru_model.predict(X_test_model)
gru_mae = mean_absolute_error(y_test, y_pred_test_gru)
gru_rmse = np.sqrt(mean_squared_error(y_test, y_pred_test_gru))

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 103ms/step


In [18]:
rnn_model = Sequential()
rnn_model.add(SimpleRNN(units=20, activation='tanh',
kernel_regularizer=l2(0.01),
return_sequences=True,
input_shape=(X_train_model.shape[1], X_train_model.shape[2])))
rnn_model.add(Dropout(0.3))
rnn_model.add(SimpleRNN(units=20, activation='tanh'))
rnn_model.add(Dropout(0.3))
rnn_model.add(Dense(units=1))

rnn_model.compile(optimizer='adam', loss='mean_squared_error')
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

rnn_model.fit(X_train_model, y_train, epochs=100, batch_size=64, verbose=1,
               validation_split=0.2, callbacks=[early_stopping])

Epoch 1/100


  super().__init__(**kwargs)


[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 32ms/step - loss: 0.0986 - val_loss: 0.0728
Epoch 2/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0523 - val_loss: 0.0201
Epoch 3/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0409 - val_loss: 0.0178
Epoch 4/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0404 - val_loss: 0.0199
Epoch 5/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0359 - val_loss: 0.0196
Epoch 6/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0336 - val_loss: 0.0172
Epoch 7/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0297 - val_loss: 0.0156
Epoch 8/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0302 - val_loss: 0.0162
Epoch 9/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

<keras.src.callbacks.history.History at 0x7ebede079d80>

In [19]:
y_pred_test_rnn = rnn_model.predict(X_test_model)
rnn_mae = mean_absolute_error(y_test, y_pred_test_rnn)
rnn_rmse = np.sqrt(mean_squared_error(y_test, y_pred_test_rnn))

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step


In [20]:
tcn_model = Sequential()

tcn_model.add(Conv1D(filters=32, kernel_size=3, dilation_rate=1, activation='relu',
                     input_shape=(X_train_model.shape[1], 1), padding='causal', kernel_regularizer=l2(0.01)))
tcn_model.add(BatchNormalization())
tcn_model.add(Dropout(0.2))

tcn_model.add(Conv1D(filters=32, kernel_size=3, dilation_rate=2, activation='relu', padding='causal', kernel_regularizer=l2(0.01)))
tcn_model.add(BatchNormalization())
tcn_model.add(Dropout(0.2))

tcn_model.add(Conv1D(filters=32, kernel_size=3, dilation_rate=4, activation='relu', padding='causal', kernel_regularizer=l2(0.01)))
tcn_model.add(BatchNormalization())
tcn_model.add(Dropout(0.2))

tcn_model.add(Flatten())

tcn_model.add(Dense(1))

tcn_model.compile(optimizer=Adam(), loss='mean_squared_error')
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

tcn_model.fit(X_train_model, y_train, epochs=100, batch_size=64, verbose=1,
               validation_split=0.2, callbacks=[early_stopping])

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 28ms/step - loss: 2.8171 - val_loss: 0.8939
Epoch 2/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 1.6213 - val_loss: 0.8165
Epoch 3/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 1.2926 - val_loss: 0.7389
Epoch 4/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 1.1412 - val_loss: 0.6738
Epoch 5/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.8958 - val_loss: 0.6076
Epoch 6/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 1.0248 - val_loss: 0.5560
Epoch 7/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.7965 - val_loss: 0.5298
Epoch 8/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.7787 - val_loss: 0.5094
Epoch 9/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

<keras.src.callbacks.history.History at 0x7ebedcb27310>

In [21]:
y_pred_test_tcn = tcn_model.predict(X_test_model)
tcn_mae = mean_absolute_error(y_test, y_pred_test_tcn)
tcn_rmse = np.sqrt(mean_squared_error(y_test, y_pred_test_tcn))

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step


In [22]:
data_results = {
    'Model': ['LSTM', 'GRU', 'RNN', 'TCN'],
    'MAE': [lstm_mae, gru_mae, rnn_mae, tcn_mae],
    'RMSE': [lstm_rmse, gru_rmse, rnn_rmse, tcn_rmse],
    'R2 Score': [
        r2_score(y_test, lstm_y_pred_test),
        r2_score(y_test, y_pred_test_gru),
        r2_score(y_test, y_pred_test_rnn),
        r2_score(y_test, y_pred_test_tcn)
    ]
}

df_results = pd.DataFrame(data_results)

df_results

Unnamed: 0,Model,MAE,RMSE,R2 Score
0,LSTM,0.050518,0.069311,0.972144
1,GRU,0.287397,0.320885,0.402952
2,RNN,0.078704,0.097774,0.944569
3,TCN,0.25185,0.29006,0.512151


In [23]:
best_model_index = df_results['R2 Score'].idxmax()
best_model_name = df_results.loc[best_model_index]
print(f"The best model is: {best_model_name['Model']} with MAE: {best_model_name['MAE']}, RMSE: {best_model_name['RMSE']} and R2 Score: {best_model_name['R2 Score']}")

The best model is: LSTM with MAE: 0.05051784971803387, RMSE: 0.06931104294745655 and R2 Score: 0.9721442387193628


In [24]:
if best_model_name['Model'] == 'GRU':
    best_model = gru_model
elif best_model_name['Model'] == 'LSTM':
    best_model = lstm_model
elif best_model_name['Model'] == 'RNN':
    best_model = rnn_model
elif best_model_name['Model'] == 'TCN':
    best_model = tcn_model
else:
    print("Error: Best model not found.")
    best_model = None

In [25]:
best_model

<Sequential name=sequential, built=True>

In [26]:
!mkdir -p /content/drive/Shareddrives/Bigdata/
best_model.save(f'/content/drive/Shareddrives/Bigdata/{coin_id}_model.h5')



In [27]:
model_path = f'/content/drive/Shareddrives/Bigdata/{coin_id}_model.h5'
best_model = load_model(model_path)



In [28]:
y_pred_test = best_model.predict(X_test_model)
y_pred_test_original = scaler.inverse_transform(y_pred_test)
lstm_y_pred_test_original = scaler.inverse_transform(y_pred_test)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step


PRICE CHART

In [29]:
def create_initial_figure(df, title='PRICE CHART'):
    window = 20
    df['moving_avg'] = df['price'].rolling(window=window).mean()
    df['std_dev'] = df['price'].rolling(window=window).std()
    df['upper_band'] = df['moving_avg'] + (df['std_dev'] * 2)
    df['lower_band'] = df['moving_avg'] - (df['std_dev'] * 2)
    df['sma20'] = df['price'].rolling(window=20).mean()
    df['sma50'] = df['price'].rolling(window=50).mean()

    fig = go.Figure()
    fig.add_trace(go.Bar(
        x=df['timestamp'],
        y=df['volume'],
        name='Volume',
        marker_color='grey',
        yaxis='y1'
    ))


    fig.add_trace(go.Scatter(
        x=df['timestamp'],
        y=df['price'],
        mode='lines',
        name='Price',
        line=dict(color='blue', width=1),
        yaxis='y2'
    ))
    fig.add_trace(go.Scatter(
        x=df['timestamp'],
        y=df['upper_band'],
        mode='lines',
        name='Upper Band',
        line=dict(color='green', dash='dash', width=1),
        yaxis='y2'
    ))
    fig.add_trace(go.Scatter(
        x=df['timestamp'],
        y=df['lower_band'],
        mode='lines',
        name='Lower Band',
        line=dict(color='red', dash='dash', width=1),
        yaxis='y2'
    ))


    fig.add_trace(go.Scatter(
        x=df['timestamp'],
        y=df['moving_avg'],
        mode='lines',
        name='Moving Average',
        line=dict(color='orange', width=1),
        yaxis='y2'
    ))
    fig.add_trace(go.Scatter(
        x=df['timestamp'],
        y=df['sma20'],
        mode='lines',
        name='SMA 20',
        line=dict(color='purple', width=1),
        yaxis='y2'
    ))
    fig.add_trace(go.Scatter(
        x=df['timestamp'],
        y=df['sma50'],
        mode='lines',
        name='SMA 50',
        line=dict(color='brown', width=1),
        yaxis='y2'
    ))


    fig.update_layout(
        title="CHART PRICE",
        xaxis_title="Date",
        yaxis=dict(
            title="Volume"),
        yaxis2=dict(
            title="Price",
            overlaying='y',
            side='right',
        ),
        legend=dict(
            x=1.05, y=1,
            bgcolor='rgba(255, 255, 255, 0.5)',
        ),
        plot_bgcolor='rgba(0, 0, 0, 0)',
    )
    return fig

UPDATE CHART PRICE COMBINE INDICATORS

In [30]:
def update_chart_with_new_data(df, new_data, fig, window_bollinger=20, window_sma20=20, window_sma50=50):
    df = pd.concat([df, new_data]).reset_index(drop=True)

    fig.data[0].x = df['timestamp']
    fig.data[0].y = df['volume'] / 1e9
    fig.data[0].marker.color = 'rgba(255, 0, 0, 0.6)'
    fig.data[1].x = df['timestamp']
    fig.data[1].y = df['price']

    if len(df) >= window_bollinger:
        df['moving_avg'] = df['price'].rolling(window=window_bollinger).mean()
        df['std_dev'] = df['price'].rolling(window=window_bollinger).std()
        df['upper_band'] = df['moving_avg'] + (df['std_dev'] * 2)
        df['lower_band'] = df['moving_avg'] - (df['std_dev'] * 2)

        fig.data[2].x = df['timestamp']
        fig.data[2].y = df['upper_band']
        fig.data[3].x = df['timestamp']
        fig.data[3].y = df['lower_band']
        fig.data[4].x = df['timestamp']
        fig.data[4].y = df['moving_avg']

    if len(df) >= window_sma20:
        df['sma20'] = df['price'].rolling(window=window_sma20).mean()
        fig.data[5].x = df['timestamp']
        fig.data[5].y = df['sma20']

    if len(df) >= window_sma50:
        df['sma50'] = df['price'].rolling(window=window_sma50).mean()
        fig.data[6].x = df['timestamp']
        fig.data[6].y = df['sma50']

MACD

In [31]:
def calculate_macd(df, short_window=12, long_window=26, signal_window=9):
    df['ema_short'] = df['price'].ewm(span=short_window, adjust=False).mean()
    df['ema_long'] = df['price'].ewm(span=long_window, adjust=False).mean()
    df['macd'] = df['ema_short'] - df['ema_long']
    df['signal'] = df['macd'].ewm(span=signal_window, adjust=False).mean()
    df['change'] = df['macd'] - df['signal']
    return df

In [32]:
def create_macd_figure(df, title='MACD'):
    df = calculate_macd(df, short_window=12, long_window=26, signal_window=9)

    fig1 = go.Figure()
    fig1.add_trace(go.Scatter(x=df['timestamp'], y=df['macd'], mode='lines', name='MACD', line=dict(color='blue', width=1)))
    fig1.add_trace(go.Scatter(x=df['timestamp'], y=df['signal'], mode='lines', name='Signal Line', line=dict(color='orange', width=1)))

    df['positive'] = df['change'].where(df['change'] > 0, 0)
    df['negative'] = df['change'].where(df['change'] < 0, 0)

    fig1.add_trace(go.Bar(x=df['timestamp'], y=df['positive'], name='Dương', marker=dict(color='green')))
    fig1.add_trace(go.Bar(x=df['timestamp'], y=df['negative'], name='Âm', marker=dict(color='red')))
    fig1.update_layout(
        title="MACD",
        xaxis_title='Date',
        yaxis_title='Value',
        legend_title='Indicators',
        legend=dict(bgcolor='rgba(255, 255, 255, 0.5)'),
        plot_bgcolor='rgba(0, 0, 0, 0)'
    )
    return fig1

UPDATE MACD

In [33]:
def update_macd_figure(df, new_data, fig1, short_window=12, long_window=26, signal_window=9):
    df = pd.concat([df, new_data]).reset_index(drop=True)
    if len(df) >= long_window:
        df['ema_short'] = df['price'].ewm(span=short_window, adjust=False).mean()
        df['ema_long'] = df['price'].ewm(span=long_window, adjust=False).mean()
        df['macd'] = df['ema_short'] - df['ema_long']
        df['signal'] = df['macd'].ewm(span=signal_window, adjust=False).mean()
        df['change'] = df['macd'] - df['signal']

    fig1.data[0].x = df['timestamp']
    fig1.data[0].y = df['macd']
    fig1.data[1].x = df['timestamp']
    fig1.data[1].y = df['signal']
    fig1.data[2].x = df['timestamp']
    fig1.data[2].y = df['change'].where(df['change'] > 0, 0)
    fig1.data[3].x = df['timestamp']
    fig1.data[3].y = df['change'].where(df['change'] < 0, 0)

BUY/SELL

In [34]:
df['timestamp'] = pd.to_datetime(df['timestamp'])

In [35]:
short_window = int(0.025 * len(df))
long_window = int(0.05 * len(df))
signals = pd.DataFrame(index=df.index)
signals['signal'] = 0.0
signals['short_ma'] = df['price'].rolling(window=short_window, min_periods=1, center=False).mean()
signals['long_ma'] = df['price'].rolling(window=long_window, min_periods=1, center=False).mean()
signals['signal'][short_window:] = np.where(signals['short_ma'][short_window:]
                                            > signals['long_ma'][short_window:], 1.0, 0.0)
signals['positions'] = signals['signal'].diff()

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  signals['signal'][short_window:] = np.where(signals['short_ma'][short_window:]


CALCULATING BUY/SELL POINTS

In [36]:
def buy_coin(
    real_movement,
    signal,
    initial_money = 40000,
    max_buy = 1,
    max_sell = 1,
):
    starting_money = initial_money
    states_sell = []
    states_buy = []
    current_inventory = 0

    def buy(i, initial_money, current_inventory):
        shares = initial_money // real_movement[i]
        if shares < 1:
            print(
                'day %d: total balances %f, not enough money to buy a unit price %f'
                % (i, initial_money, real_movement[i])
            )
        else:
            if shares > max_buy:
                buy_units = max_buy
            else:
                buy_units = shares
            initial_money -= buy_units * real_movement[i]
            current_inventory += buy_units
            print(
                'day %d: buy %d units at price %f, total balance %f'
                % (i, buy_units, buy_units * real_movement[i], initial_money)
            )
            states_buy.append(0)
        return initial_money, current_inventory

    for i in range(real_movement.shape[0] - int(0.025 * len(df))):
        state = signal[i]
        if state == 1:
            initial_money, current_inventory = buy(
                i, initial_money, current_inventory
            )
            states_buy.append(i)
        elif state == -1:
            if current_inventory == 0:
                    print('day %d: cannot sell anything, inventory 0' % (i))
            else:
                if current_inventory > max_sell:
                    sell_units = max_sell
                else:
                    sell_units = current_inventory
                current_inventory -= sell_units
                total_sell = sell_units * real_movement[i]
                initial_money += total_sell
                try:
                    invest = (
                        (real_movement[i] - real_movement[states_buy[-1]])
                        / real_movement[states_buy[-1]]
                    ) * 100
                except:
                    invest = 0
                print(
                    'day %d, sell %d units at price %f, investment %f %%, total balance %f,'
                    % (i, sell_units, total_sell, invest, initial_money)
                )
            states_sell.append(i)
    invest = ((initial_money - starting_money) / starting_money) * 100
    total_gains = initial_money - starting_money
    return states_buy, states_sell, total_gains, invest

In [37]:
states_buy, states_sell, total_gains, invest = buy_coin(df.price, signals['positions'])

day 35: total balances 40000.000000, not enough money to buy a unit price 58045.756058
day 112: cannot sell anything, inventory 0
day 156: total balances 40000.000000, not enough money to buy a unit price 60514.123181
day 252: cannot sell anything, inventory 0
day 253: total balances 40000.000000, not enough money to buy a unit price 63143.390932
day 254: cannot sell anything, inventory 0
day 276: total balances 40000.000000, not enough money to buy a unit price 64306.459583
day 438: cannot sell anything, inventory 0
day 559: total balances 40000.000000, not enough money to buy a unit price 62022.471620
day 656: cannot sell anything, inventory 0
day 732: total balances 40000.000000, not enough money to buy a unit price 62599.199064
day 968: cannot sell anything, inventory 0
day 1040: total balances 40000.000000, not enough money to buy a unit price 68205.623015
day 1079: cannot sell anything, inventory 0
day 1114: total balances 40000.000000, not enough money to buy a unit price 67615.

BUY/SELL CHART

In [38]:
def chart_buy_sell_signals(df, states_buy, states_sell):
    close = df['price']
    fig2 = go.Figure()
    fig2.add_trace(go.Scatter(
        x=df.index,
        y=close,
        mode='lines',
        line=dict(color='steelblue', width=1),
        name='Price'))
    fig2.add_trace(go.Scatter(
        x=df.index[states_buy],
        y=close[states_buy],
        mode='markers',
        marker=dict(symbol='triangle-up', size=10, color='green'),
        name='Buying Signal'))
    fig2.add_trace(go.Scatter(
        x=df.index[states_sell],
        y=close[states_sell],
        mode='markers',
        marker=dict(symbol='triangle-down', size=10, color='red'),
        name='Selling Signal'))

    fig2.update_layout(
    title='Buy / Sell Indicators',
    title_x=0.5,
    xaxis_title='Year',
    yaxis_title='Price',
    legend=dict(
        x=1.05,  # Đặt vị trí của legend nằm ngoài bên phải
        y=1,  # Đặt vị trí của legend ở trên cùng
        xanchor="left",  # Cố định legend ở bên trái vị trí x
        bgcolor='rgba(255, 255, 255, 0.5)',
        bordercolor='rgba(0, 0, 0, 0.1)',
        borderwidth=1
    ),
)

    fig2.show()

    return fig2

UPDATE BUY/SELL CHART

In [39]:
def update_chart_buy_sell_signals(fig2, df, new_data, signals, short_window, long_window):
    # Cập nhật dataframe với dữ liệu mới
    df = pd.concat([df, new_data], ignore_index=True)

    # Tính toán lại tín hiệu mua/bán
    signals['short_ma'] = df['price'].rolling(window=short_window, min_periods=1, center=False).mean()
    signals['long_ma'] = df['price'].rolling(window=long_window, min_periods=1, center=False).mean()
    signals['signal'] = np.where(signals['short_ma'] > signals['long_ma'], 1.0, 0.0)
    signals['positions'] = signals['signal'].diff()

    states_buy = signals[signals['positions'] == 1].index
    states_sell = signals[signals['positions'] == -1].index

    # Cập nhật biểu đồ với tín hiệu mới
    fig2.data[0].y = df['price']
    fig2.data[1].x = df.index[states_buy]
    fig2.data[1].y = df['price'][states_buy]
    fig2.data[2].x = df.index[states_sell]
    fig2.data[2].y = df['price'][states_sell]

    fig2.show()

TRAIN/TEST CHART

In [40]:
def chart_train_test_split(train, test, title='Train and Test Data'):
    fig3 = go.Figure()

    fig3.add_trace(go.Scatter(
        x=train.index,
        y=train.values,
        mode='lines',
        line=dict(color='royalblue', width=1),
        name='train'
    ))

    fig3.add_trace(go.Scatter(
        x=test.index,
        y=test.values,
        mode='lines',
        line=dict(color='firebrick', width=1),
        name='test'
    ))

    fig3.update_layout(
        title='Train and Test Data',
        xaxis_title='Timestamp',
        yaxis_title='Price',
        legend_title='Data'
    )

    fig3.show()
    return fig3

UPDATE TRAIN/TEST

In [41]:
def update_chart_train_test(fig3, new_test_data):
    fig3.data[1].x = new_test_data.index
    fig3.data[1].y = new_test_data.values
    fig3.show()

PREDICTION CHART

In [42]:
def chart_lstm_prediction(test_processed, lstm_y_pred_test_original, title="Prediction"):
    last_price = cg.get_price(ids=coin_id, vs_currencies='usd')[coin_id]['usd']
    last_price_scaled = scaler.transform(np.array([[last_price]]))

    X_last = last_price_scaled.reshape(1, 1, 1)
    next_price_scaled = best_model.predict(X_last)
    next_price = scaler.inverse_transform(next_price_scaled)[0][0]

    if next_price > last_price:
        symbol = 'triangle-up'
    else:
        symbol = 'triangle-down'

    fig4 = go.Figure()
    fig4.add_trace(go.Scatter(
        y=test_processed.flatten(),
        mode='lines',
        line=dict(color='royalblue', width=1),
        name='True'
    ))

    fig4.add_trace(go.Scatter(
        y=lstm_y_pred_test_original.flatten(),
        mode='lines',
        line=dict(color='firebrick', width=1),
        name=best_model_name["Model"],
    ))

    fig4.add_trace(go.Scatter(
            x=[len(lstm_y_pred_test_original) - 1],
            y=[lstm_y_pred_test_original[-1][0]],
            mode='markers',
            marker= dict(symbol=symbol, size=10, color='green')
                  if symbol=='triangle-up' else dict(symbol=symbol, size=10, color='red'),
            showlegend=False
        ))

    fig4.update_layout(
        title="Crypto Prediction",
        xaxis_title='Observation',
        yaxis_title='Adj Close scaled',
        legend_title='Legend'
    )

    fig4.show()
    return fig4

UPDATE CHART PREDICT

In [48]:
def update_chart_lstm_predict(fig4, new_test_processed, new_lstm_y_pred):
    fig4.data[0].y = new_test_processed.flatten()
    fig4.data[1].y = new_lstm_y_pred.flatten()
    fig4.data[2].x = [len(new_lstm_y_pred) - 1]
    fig4.data[2].y = [new_lstm_y_pred.flatten()[-1]]
    fig.data[2].marker = dict(symbol='triangle-up' if lstm_y_pred_test_original[-1] > test_processed[-1] else 'triangle-down',
                              size=10, color='green' if lstm_y_pred_test_original[-1] > test_processed[-1] else 'red')
    fig4.show()

STREAMING


In [None]:
from IPython.display import display, clear_output

def stream_and_update(fig, fig1, fig2, fig3, fig4, coin_id, train, test):
    while True:
        price_data = cg.get_price(ids=coin_id, vs_currencies='usd')
        price = price_data[coin_id]['usd']
        timestamp = datetime.datetime.now()
        new_row = {'timestamp': timestamp, 'price': price}
        new_data = pd.DataFrame([new_row])

        test = pd.concat([test, pd.Series(price, index=[timestamp])])

        update_chart_train_test(fig3, test)
        update_chart_with_new_data(df, new_data, fig, window_bollinger=20, window_sma20=20, window_sma50=50)
        update_macd_figure(df, new_data, fig1, short_window=12, long_window=26, signal_window=9)
        update_chart_lstm_predict(fig4, test.values, lstm_y_pred_test_original)
        update_chart_buy_sell_signals(fig2, df, new_data, signals, short_window, long_window)

        clear_output(wait=True)
        display(fig)
        display(fig1)
        display(fig2)
        display(fig3)
        display(fig4)

        time.sleep(300)

fig = create_initial_figure(df)
fig1 = create_macd_figure(df)
fig2 = chart_buy_sell_signals(df, states_buy, states_sell)
fig3 = chart_train_test_split(train, test)
fig4 = chart_lstm_prediction(test_processed, lstm_y_pred_test_original)

stream_and_update(fig, fig1, fig2, fig3, fig4, coin_id, train, test)