# Loading modules and libraries

In [1]:
# Data manipulation 
import pandas as pd
# Dates handling
from datetime import datetime, timedelta
import pytz
from dateutil.relativedelta import relativedelta
# Chart modules
import matplotlib.pyplot as plt
from bokeh.plotting import figure
from bokeh.io import push_notebook,show, output_notebook
from bokeh.models import HoverTool
output_notebook()

# Linear algrebra
import numpy as np

# ML and Metrics module

from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split

# Data APIs
import yfinance as yf
from pytrends.request import TrendReq

# Technical indicators
from ta.momentum import rsi, stochrsi_d,stochrsi_k
from ta.trend import ema_indicator, macd_diff, vortex_indicator_neg, vortex_indicator_pos, adx, cci
from ta.volatility import bollinger_hband, bollinger_lband
from ta.volume import ease_of_movement

# Remove warnings
import warnings

warnings.filterwarnings('ignore')

# Utils functions

In [60]:

def polynomialRegression(dataframe, column, order):
    indexs_2 = np.array([i for i in range(len(dataframe[column].values))])
    indexs_2 = np.reshape(indexs_2, newshape=(1,-1))[0]

    RMSEs = []
    for i in range(order):
        features = PolynomialFeatures(degree=i+1)
        x_train_transformed = features.fit_transform(indexs_2.reshape(-1, 1))
        
        model = LinearRegression().fit(x_train_transformed, dataframe[column].values)

        train_pred = model.predict(x_train_transformed)
        rmse_poly_4_train = mean_squared_error(dataframe[column].values, train_pred, squared = False)
        RMSEs.append(rmse_poly_4_train)

    features = PolynomialFeatures(degree=RMSEs.index(min(RMSEs))+1)
    x_train_transformed = features.fit_transform(indexs_2.reshape(-1, 1))
    model = LinearRegression().fit(x_train_transformed, dataframe[column].values)

    x_test_transformed = features.fit_transform(indexs_2.reshape(-1, 1))

    train_pred = model.predict(x_train_transformed)

    test_pred = model.predict(x_test_transformed)
    
    dataframe[f'Poly_{column}'] = test_pred
    return test_pred        



def fourrierFeatureGeneration(df, column, order=30) -> list:
    """Generate a fourrier transformed column in a dataframe with a specified order and column.

    Args:
        df (pd.DataFrame): The dataframe which you want to add a fourrier column
        column (str): The column of the dataframe which you want to compute fft
        order (int, optional): The order of the fourrier transformation. Defaults to 30.

    Returns:
        list: _description_
    """
    close_fft = np.fft.rfft(np.asarray(df[column].tolist()))
    fft_df = pd.DataFrame({'fft':close_fft})
    fft_df['absolute'] = fft_df['fft'].apply(lambda x: np.abs(x))
    fft_df['angle'] = fft_df['fft'].apply(lambda x: np.angle(x))

    fft_list = np.asarray(fft_df['fft'].tolist())

    fft_list_m10= np.copy(fft_list)
    fft_list_m10[order:-order] = 0
    if len(df) % 2 ==0:
        #df['fft'] = np.fft.irfft(fft_list_m10)
        return np.fft.irfft(fft_list_m10)#, fft_df
    else:
        #df['fft'] = np.insert(np.fft.irfft(fft_list_m10),0,np.fft.irfft(fft_list_m10)[0],axis=0)
        return np.insert(np.fft.irfft(fft_list_m10),0,np.fft.irfft(fft_list_m10)[0],axis=0)#, fft_df
        
def generateMovingLinearRegression(df,column,window=20)->None:
    """Generate the moving linear regression on a dataframe. This function is inplace=True (returns nothing)

    Args:
        df (pd.DataFrame): The dataframe which you want to add the moving linear regression
        column (str): The column which you want to compute Linear regression on.
        window (int, optional): The window's size on which you want to perform linear regression. Defaults to 20.
    """
    coefs = []
    intercepts = []

    for i in range(len(df)-window):
        ys = df[column].values[i:i+window]
        xs = df.Timestamp.values[i:i+window].reshape(-1, 1)
        
        model = LinearRegression().fit(xs,ys)
        
        coefs.append(model.coef_[0])
        intercepts.append(model.intercept_)
        
    ratio = sum([intercepts[i]/[-coef for coef in coefs][i] for i in range(len(coefs))]) / len([intercepts[i]/[-coef for coef in coefs][i] for i in range(len(coefs))])
    df['MLR'] = np.nan
    df['MLR'].iloc[window:] = [intercepts[i]/ratio+coefs[i] for i in range(len(coefs))]

def isBearishCandleStick(candle) -> bool:
    """Check whether a candle is a bearish candle or not

    Args:
        candle (pd.Series): The current candle that contains OHLC

    Returns:
        bool: A boolean representing if the candle is bearish candle (True) or not (False)
    """
    return candle['Close']<candle['Open']

def isBullishCandleStick(candle) -> bool:
    """Check whether a candle is a bullish candle or not

    Args:
        candle (pd.Series): The current candle that contains OHLC

    Returns:
        bool: A boolean representing if the candle is bullish candle (True) or not (False)
    """
    return candle['Close']>candle['Open']

def isBullishEngulfing(previous_candle,current_candle) -> int:
    """A function that check for bullish engulfing pattern through candle stick

    Args:
        previous_candle (pd.Series): The previous candle that contains OHLC
        current_candle (pd.Series): The current candle that contains OHLC

    Returns:
        int: represent the pattern spotting : 1 bullish engulfing, 0 not.
    """
    return 1 if isBearishCandleStick(previous_candle) and isBullishCandleStick(current_candle) and previous_candle['Open']<current_candle['Close'] and previous_candle['Close']>current_candle['Open'] else 0
    
def isBearishEngulfing(previous_candle,current_candle) -> int:
    """A function that check for bearish engulfing pattern through candle stick

    Args:
        previous_candle (pd.Series): The previous candle that contains OHLC
        current_candle (pd.Series): The current candle that contains OHLC

    Returns:
        int: represent the pattern spotting : 1 bearish engulfing, 0 not.
    """
    return 1 if isBullishCandleStick(previous_candle) and isBearishCandleStick(current_candle) and previous_candle['Close']<current_candle['Open'] and previous_candle['Open']>current_candle['Close'] else 0    
          
def addIndicators(df) -> pd.DataFrame:
    """Apply indicators to the whole dataframe

    Args:
        df (pd.DataFrame): The dataframe you want to add indicators

    Returns:
        pd.DataFrame: The dataframe with the indicators
    """
    #df = df.sort_values(by='timestamp')
    df['RSI'] = rsi(df.Close,14,fillna=True)
    df['EMA20'] = ema_indicator(df.Close,20)
    df['EMA50'] = ema_indicator(df.Close,50)
    df['EMA100'] = ema_indicator(df.Close,100)
    df['EMA200'] = ema_indicator(df.Close,200)
    df['MACD'] = macd_diff(df.Close)
    df['%D'] = stochrsi_d(df.Close,20,fillna=True)
    df['%K'] = stochrsi_k(df.Close,20,fillna=True)
    df['Vortex'] = (vortex_indicator_pos(df.High,df.Low,df.Close,20,fillna=True)-1)-(vortex_indicator_neg(df.High,df.Low,df.Close,20,fillna=True)-1)
    df['Bollinger_low'] = bollinger_hband(df.Close,20,fillna=True)
    df['Bollinger_high'] = bollinger_lband(df.Close,20,fillna=True)
    df['Slope'] = df.Close.diff()
    df['Acceleration'] = df.Slope.diff()
    df['FFT'] = fourrierFeatureGeneration(df,'Close',30)
    generateMovingLinearRegression(df, 'Close',30)
    df['Bullish_engulfing'] = np.nan
    df['Bullish_engulfing'].iloc[1:] = [isBullishEngulfing(df.iloc[i-1],df.iloc[i]) for i in range(1,len(df))]
    df['Bearish_engulfing'] = np.nan
    df['Bearish_engulfing'].iloc[1:] = [isBearishEngulfing(df.iloc[i-1],df.iloc[i]) for i in range(1,len(df))]
    df['ADX'] = adx(df.High,df.Low,df.Close)
    df['CCI'] = cci(df.High,df.Low,df.Close,14)
    df['EVM'] = ease_of_movement(df.High,df.Low,df.Volume,14)
    df.dropna(inplace=True)
    return df
    
def is_far_from_level(value, levels, df):
    ave =  np.mean(df['High'] - df['Low'])
    return np.sum([abs(value - level) < ave for _, level in levels]) == 0

def detectSupportAndResistance(df)->list:
    """This function detect using window approach all supports and resistance from a dataframe over a time period.

    Args:
        df (pd.DataFrame): The DataFrame that we want to extract supports and resistances, it must containe OHLC at least.

    Returns:
        list: The list of all supports and resistances timestamped
    """
    pivots = []
    max_list = []
    min_list = []
    for i in range(5, len(df)-5):
        high_range = df['High'][i-5:i+4]
        current_max = high_range.max()

        if current_max not in max_list:
            max_list = []
        max_list.append(current_max)
        if len(max_list) == 5 and is_far_from_level(current_max, pivots, df):
            pivots.append((high_range.idxmax(), current_max))
        
        low_range = df['Low'][i-5:i+5]
        current_min = low_range.min()
        if current_min not in min_list:
            min_list = []
        min_list.append(current_min)
        if len(min_list) == 5 and is_far_from_level(current_min, pivots, df):
            pivots.append((low_range.idxmin(), current_min))
    return list({x:y} for x, y in pivots)

def addTimestampToDf(df)->pd.DataFrame:
    """add timestamp to dataframe as column

    Args:
        df (pd.DataFrame): The Dataframe we want to add timestamp as col

    Returns:
        pd.DataFrame: The Dataframe with the new column
    """
    df['Timestamp'] = [round(datetime.timestamp(df.index[ind])) for ind in range(len(df.index))]
    df['Timestamp'] = df['Timestamp'].astype(int)
    return df

# Load the data

In [122]:
def prepareDataFrame(symbol='ETH',google_trend=True,interval='15m')->pd.DataFrame:
    """Get Datafram BTC and symbol data from yFinance and optionnaly google_trend data.

    Args:
        symbol (str, optional): The Symbol you want to predict from BTC. Defaults to 'ETH'.
        google_trend (bool, optional): Whether or not you want to add Google Trend Data to the Dataframe, is that case you need to put interval to 1h. Defaults to True.
        interval (str, optional): The interval to construct candlesticks : 1h, 15m, 1m... Defaults to '15m'.

    Returns:
        pd.DataFrame: The Dataframe containing all the data and the features.
    """
    n = 2
    current_date = datetime.today()
    past_date = current_date - relativedelta(days=n)

    hist_BTC = yf.Ticker("BTC-USD").history(interval=interval,start=past_date,end=current_date,tzinfo=pytz.utc)
    addTimestampToDf(hist_BTC)
    hist_BTC = hist_BTC.drop(columns=['Dividends', 'Stock Splits',])
    hist_ETH = yf.Ticker(f"{symbol}-USD").history(interval=interval,start=past_date,end=current_date,tzinfo=pytz.utc)
    addTimestampToDf(hist_ETH)
    hist_ETH = hist_ETH.drop(columns=['Dividends', 'Stock Splits',])
    
    if google_trend==True and interval=='1h':
        merged_BTC_ETH = pd.merge(hist_BTC,hist_ETH, left_on='Timestamp',right_on='Timestamp',how='inner',suffixes=('',f'_{symbol}')).dropna()
        starting_date = datetime.fromtimestamp(merged_BTC_ETH['Timestamp'].iloc[0])
        ending_date = datetime.fromtimestamp(merged_BTC_ETH['Timestamp'].iloc[-1])

        pytrends = TrendReq(hl='en-US', tz=360) 
        kw_list = ["BTC","Blockchain","Bitcoin"] # list of keywords to get data 

        pytrends.build_payload(kw_list, cat=0, timeframe='today 12-m') 
        data = pytrends.get_historical_interest(kw_list, year_start=starting_date.year, month_start=starting_date.month, day_start=starting_date.day, hour_start=starting_date.hour, year_end=ending_date.year, month_end=ending_date.month, day_end=ending_date.day, hour_end=ending_date.hour, cat=0, sleep=0)
        data = data.reset_index() 
        data.set_index('date',inplace=True)
        data.BTC = data.BTC + data.Bitcoin

        data.drop(columns=['Bitcoin','isPartial'],inplace=True)
        data.dropna(inplace=True)

        polynomialRegression(data,'BTC',20)
        polynomialRegression(data,'Blockchain',20)
        addTimestampToDf(data)
        merged_BTC_ETH_Gtrend = pd.merge(merged_BTC_ETH,data, left_on='Timestamp',right_on='Timestamp',how='outer').dropna()
        merged_BTC_ETH_Gtrend = addIndicators(merged_BTC_ETH_Gtrend)
        return merged_BTC_ETH_Gtrend
    else:
        return  addIndicators(pd.merge(hist_BTC,hist_ETH, left_on='Timestamp',right_on='Timestamp',how='inner',suffixes=('',f'_{symbol}')).dropna())
        
    

In [135]:
symbol = 'KDA'

merged_BTC_ETH_Gtrend = prepareDataFrame(symbol,False, '1m')
merged_BTC_ETH_Gtrend.head()

Unnamed: 0,Open,High,Low,Close,Volume,Timestamp,Open_KDA,High_KDA,Low_KDA,Close_KDA,...,Bollinger_high,Slope,Acceleration,FFT,MLR,Bullish_engulfing,Bearish_engulfing,ADX,CCI,EVM
200,29537.619141,29537.619141,29537.619141,29537.619141,1781760,1653485340,2.254774,2.254774,2.254774,2.254774,...,29533.534622,-14.544922,-6.398438,29585.980958,2e-05,0.0,0.0,17.141961,-122.443356,-0.0
203,29528.525391,29528.525391,29528.525391,29528.525391,12488704,1653485520,2.248037,2.248037,2.248037,2.248037,...,29520.837133,12.304688,19.470703,29597.218718,-5.3e-05,0.0,0.0,19.206503,-101.920718,0.0
205,29585.798828,29585.798828,29585.798828,29585.798828,59555840,1653485640,2.255344,2.255344,2.255344,2.255344,...,29517.210901,43.441406,29.609375,29605.48622,-6.7e-05,0.0,0.0,18.890128,179.125969,0.0
206,29604.716797,29604.716797,29604.716797,29604.716797,17012736,1653485700,2.256248,2.256248,2.256248,2.256248,...,29511.380427,18.917969,-24.523438,29610.53317,-4.1e-05,0.0,0.0,20.460262,204.753437,0.0
207,29600.421875,29600.421875,29600.421875,29600.421875,3209216,1653485760,2.261791,2.261791,2.261791,2.261791,...,29508.76548,-4.294922,-23.212891,29614.022499,2e-06,0.0,0.0,21.603759,145.314207,-0.0


# Choosing a SHIFT

In [136]:
SHIFT = 15
p = figure(title=f"BTC vs {symbol} with Shiffting", x_axis_label=f'{symbol} value ($)', y_axis_label='BTC value ($)',width=1500, height=600,)

p.circle(merged_BTC_ETH_Gtrend[f'Close_{symbol}'],merged_BTC_ETH_Gtrend['Close'],color='blue',legend_label=f'BTC vs {symbol}',alpha=0.3,size=4)
p.circle(merged_BTC_ETH_Gtrend[f'Close_{symbol}'].iloc[:-SHIFT].values,merged_BTC_ETH_Gtrend['Close'].shift(SHIFT).dropna().values,color='red',legend_label=f'BTC vs {symbol} shiffted',alpha=0.5,size=4)

p.legend.location = "top_left"

# show the results
show(p)

# Separing Features and Labels

In [137]:
X = merged_BTC_ETH_Gtrend.drop(columns=[f'Open_{symbol}',f'Close_{symbol}',f'High_{symbol}',f'Low_{symbol}',f'Volume_{symbol}','Timestamp','EVM']).iloc[:-SHIFT].values
y = merged_BTC_ETH_Gtrend[f'Close_{symbol}'].shift(SHIFT).dropna().values
X_train, X_test, y_train, y_test =  train_test_split(X,y,shuffle=True,test_size=0.5,)

# Training models

## Linear regression

In [138]:
LR_model = LinearRegression().fit(X_train,y_train)
print(f'Final score for Linear Regression on test set {round(LR_model.score(X_test,y_test)*100)} %')
y_pred = LR_model.predict(X_test)
print(f'Final MAE for Linear Regression : {mean_absolute_error(y_test, y_pred)}')
print(f'Final MSE for Linear Regression : {mean_squared_error(y_test, y_pred,squared=False)}')


Final score for Linear Regression on test set 86 %
Final MAE for Linear Regression : 0.026663435660884027
Final MSE for Linear Regression : 0.03388681677595172


## Random Forest

In [139]:
RF_model = RandomForestRegressor().fit(X_train,y_train)
print(f'\nFinal score for Random Forest on test set {round(RF_model.score(X_test,y_test)*100)} %')
y_pred = RF_model.predict(X_test)
print(f'Final MAE for Random Forest  : {mean_absolute_error(y_test, y_pred)}')
print(f'Final MSE for Random Forest  : {mean_squared_error(y_test, y_pred,squared=False)}')


Final score for Random Forest on test set 97 %
Final MAE for Random Forest  : 0.00961422115859418
Final MSE for Random Forest  : 0.016539364953326894


# Evalutate the model

In [140]:
df_ETH_with_pred = merged_BTC_ETH_Gtrend.shift(SHIFT).dropna()
df_ETH_with_pred['Close_pred_RF'] = RF_model.predict(X)
df_ETH_with_pred['Close_pred_LR'] = LR_model.predict(X)
p = figure(title=f"Prediction on {symbol} from BTC", x_axis_label='value ($)', y_axis_label='value ($)',width=1500, height=600,)
# Add the HoverTool to the figure
p.add_tools(HoverTool(
    tooltips=[
    ("Price", "@y{0.00} $"),
    ("Price", "@y{0.00} $"),
], formatters={
        '$x': 'printf',
        '$y' : 'printf',
    },
    mode='vline'
))
p.line(df_ETH_with_pred.index, df_ETH_with_pred[f'Close_{symbol}'], legend_label="real data", line_width=2,)
p.line(df_ETH_with_pred.index, df_ETH_with_pred.Close_pred_RF, legend_label="prediction data RF", line_width=2, color='orange')
#p.line(df_ETH_with_pred.index, df_ETH_with_pred.Close_pred_LR, legend_label="prediction data LR", line_width=2, color='red')
p.legend.location = "top_left"

show(p)

In [141]:

from kucoin.client import Market
import pandas as pd
import numpy as np
from datetime import datetime
import time
import math
client = Market(url='https://api.kucoin.com')

def getLastestData(symbol="BTC",timeframe='15min')-> pd.DataFrame:
    """Function that uses Kucoin API to get the latest data for a specific symbol and timeframe.

    Args:
        symbol (str, optional): The symbol for the data we want to extract. Defaults to "BTC".
        indicators (bool, optional): Whether we want to add indicators to the dataframe or not. Defaults to False.

    Returns:
        pd.DataFrame: The dataframe containing history.
    """
    klines = client.get_kline(f'{symbol}-USDT',timeframe, startAt=round(datetime.now().timestamp())-1950000, endAt=round(datetime.now().timestamp()))
    df = pd.DataFrame(klines,columns=['Date','Open','High','Low','Close','Volume','Transaction amount'],dtype=float)
    df = df.sort_values(by='Date')
    df = df.drop(columns=['Transaction amount'])
    df['Timestamp'] = df['Date'].astype(int)
    df['Date'] = df['Date'].astype(int).apply(datetime.fromtimestamp)
    df = df.set_index('Date')
    return df

In [143]:
symbol = 'KDA'

df_SOL = getLastestData(symbol,'1min',)
df_BTC = addIndicators(getLastestData('BTC','1min',)).drop(columns=['Timestamp','EVM'])

SHIFT = 20
p = figure(title=f"BTC vs {symbol} with Shiffting", x_axis_label=f'{symbol} value ($)', y_axis_label='BTC value ($)',width=1500, height=600,)

p.circle(df_SOL[f'Close'],df_BTC['Close'],color='blue',legend_label=f'BTC vs {symbol}',alpha=0.3,size=4)
p.circle(df_SOL[f'Close'].iloc[:-SHIFT].values,df_BTC['Close'].shift(SHIFT).dropna().values,color='red',legend_label=f'BTC vs {symbol} shiffted',alpha=0.5,size=4)

p.legend.location = "top_left"

# show the results
show(p)

In [144]:
df_SOL = addIndicators(df_SOL)
len(df_SOL[f'Close'].shift(SHIFT).dropna().values)

1279

In [146]:
p = figure(title=f"Prediction on {symbol} from BTC", x_axis_label='value ($)',x_axis_type='datetime', y_axis_label='value ($)',width=1500, height=600,)
# Add the HoverTool to the figure
p.add_tools(HoverTool(
    tooltips=[
    ("Price", "@y{0.00} $"),
    ("Date", "$x{%F}"),
], formatters={
        '$x': 'datetime',
        '$y' : 'printf',
    },
    mode='vline'
))
p.line(df_SOL.index, df_SOL[f'Close'], legend_label="real data", line_width=2,)
p.line(df_SOL.index, RF_model.predict(df_BTC.values), legend_label="prediction data RF", line_width=2, color='orange')
#p.line(df_ETH_with_pred.index, df_ETH_with_pred.Close_pred_LR, legend_label="prediction data LR", line_width=2, color='red')
p.legend.location = "top_left"

show(p)