# Import

In [1]:
# Data manipulation 
import pandas as pd
# Dates handling
from datetime import datetime, timedelta
import pytz
from dateutil.relativedelta import relativedelta

# Chart modules
import matplotlib.pyplot as plt
import seaborn as sns
from bokeh.plotting import figure
from bokeh.io import push_notebook,show, output_notebook
from bokeh.models import HoverTool
output_notebook()

# Linear algrebra
import numpy as np

# ML and Metrics module

from sklearn.preprocessing import PolynomialFeatures, StandardScaler,MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split

# Data APIs
import yfinance as yf
from pytrends.request import TrendReq

# Technical indicators
from ta.momentum import rsi, stochrsi_d,stochrsi_k
from ta.trend import ema_indicator, macd_diff, vortex_indicator_neg, vortex_indicator_pos, adx, cci
from ta.volatility import bollinger_hband, bollinger_lband
from ta.volume import ease_of_movement

# Remove warnings
import warnings

warnings.filterwarnings('ignore')

In [15]:
         
def addIndicators(df) -> pd.DataFrame:
    """Apply indicators to the whole dataframe

    Args:
        df (pd.DataFrame): The dataframe you want to add indicators

    Returns:
        pd.DataFrame: The dataframe with the indicators
    """
    #df = df.sort_values(by='timestamp')
    df['RSI'] = rsi(df.Close,14,fillna=True)
    df['EMA20'] = ema_indicator(df.Close,20)
    df['EMA50'] = ema_indicator(df.Close,50)
    df['EMA100'] = ema_indicator(df.Close,100)
    df['EMA200'] = ema_indicator(df.Close,200)
    df['MACD'] = macd_diff(df.Close)
    df['%D'] = stochrsi_d(df.Close,20,fillna=True)
    df['%K'] = stochrsi_k(df.Close,20,fillna=True)
    df['Vortex'] = (vortex_indicator_pos(df.High,df.Low,df.Close,20,fillna=True)-1)-(vortex_indicator_neg(df.High,df.Low,df.Close,20,fillna=True)-1)
    df['Bollinger_low'] = bollinger_hband(df.Close,20,fillna=True)
    df['Bollinger_high'] = bollinger_lband(df.Close,20,fillna=True)
    df['Slope'] = df.Close.diff()
    df['Acceleration'] = df.Slope.diff()
    df['ADX'] = adx(df.High,df.Low,df.Close)
    df['CCI'] = cci(df.High,df.Low,df.Close,14)
    df['EVM'] = ease_of_movement(df.High,df.Low,df.Volume,14)
    df.dropna(inplace=True)
    return df

In [2]:
def prepareDataFrame(symbol='ETH',intraday=True, interval='15m')->pd.DataFrame:
    """Get symbol data from yFinance.

    Args:
        symbol (str, optional): The Symbol you want. Defaults to 'ETH'.
        interval (str, optional): The interval to construct candlesticks : 1h, 15m, 1m... Defaults to '15m'.

    Returns:
        pd.DataFrame: The Dataframe containing all the data and the features.
    """
    if intraday==True:
        n = 60
        current_date = datetime.today()
        past_date = current_date - relativedelta(days=n)

        return yf.Ticker(f"{symbol}-USD").history(interval=interval,start=past_date,end=current_date,tzinfo=pytz.utc).drop(columns=['Dividends','Stock Splits'])  
    else:
        return yf.Ticker(f"{symbol}-USD").history(period='max',interval='1d',tzinfo=pytz.utc).drop(columns=['Dividends','Stock Splits'])  
        
    

In [16]:
df = prepareDataFrame(symbol='ETH',intraday=False)
df  = addIndicators(df)
df['Type'] = [ -1 if df['Open'][i]>df['Close'][i] else 1 for i in range(len(df))]
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,RSI,EMA20,EMA50,EMA100,EMA200,...,%K,Vortex,Bollinger_low,Bollinger_high,Slope,Acceleration,ADX,CCI,EVM,Type
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-05-27,588.52002,590.328003,562.866028,572.66803,1788790016,36.094151,652.250158,650.381056,658.729026,638.250919,...,0.02462,-0.406079,792.562075,561.02963,-14.612,-15.15802,23.439717,-114.032786,-28.012492,-1
2018-05-28,573.044983,576.049011,512.552002,516.036011,2356900096,30.311866,639.277382,645.112623,655.903422,637.034851,...,0.014086,-0.467001,794.772608,535.136999,-56.632019,-42.02002,25.026881,-133.156608,-87.009701,-1
2018-05-29,516.14801,572.263977,516.14801,565.388,2330820096,39.419814,632.240298,641.986168,654.111037,636.321947,...,0.092238,-0.432299,785.845395,525.37551,49.35199,105.984009,26.500677,-96.427344,-0.227546,1
2018-05-30,566.830017,583.135986,545.43103,559.590027,2053970048,38.77859,625.321225,638.754946,652.239334,635.558445,...,0.178756,-0.405914,779.501262,514.950947,-5.797974,-55.149963,27.473232,-73.048169,36.856516,-1
2018-05-31,558.497009,585.538025,557.065979,577.64502,1985040000,41.945509,620.780634,636.358479,650.762219,634.982192,...,0.298544,-0.307156,776.857192,507.40092,18.054993,23.852966,28.286504,-53.909125,10.066844,1


In [17]:
df['Trend_F'] = np.nan
df['Trend_F'].iloc[1:] = [1 if np.sign(df['Type'][i]) ==np.sign(df['Type'][i-1]) else -1 for i in range(1,len(df))]
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,RSI,EMA20,EMA50,EMA100,EMA200,...,Vortex,Bollinger_low,Bollinger_high,Slope,Acceleration,ADX,CCI,EVM,Type,Trend_F
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-05-27,588.52002,590.328003,562.866028,572.66803,1788790016,36.094151,652.250158,650.381056,658.729026,638.250919,...,-0.406079,792.562075,561.02963,-14.612,-15.15802,23.439717,-114.032786,-28.012492,-1,
2018-05-28,573.044983,576.049011,512.552002,516.036011,2356900096,30.311866,639.277382,645.112623,655.903422,637.034851,...,-0.467001,794.772608,535.136999,-56.632019,-42.02002,25.026881,-133.156608,-87.009701,-1,1.0
2018-05-29,516.14801,572.263977,516.14801,565.388,2330820096,39.419814,632.240298,641.986168,654.111037,636.321947,...,-0.432299,785.845395,525.37551,49.35199,105.984009,26.500677,-96.427344,-0.227546,1,-1.0
2018-05-30,566.830017,583.135986,545.43103,559.590027,2053970048,38.77859,625.321225,638.754946,652.239334,635.558445,...,-0.405914,779.501262,514.950947,-5.797974,-55.149963,27.473232,-73.048169,36.856516,-1,-1.0
2018-05-31,558.497009,585.538025,557.065979,577.64502,1985040000,41.945509,620.780634,636.358479,650.762219,634.982192,...,-0.307156,776.857192,507.40092,18.054993,23.852966,28.286504,-53.909125,10.066844,1,-1.0


In [18]:
df_filtered = df.dropna()
print(df_filtered[df_filtered['Trend_F']==1].count())
print(df_filtered[df_filtered['Trend_F']==-1].count())

Open              661
High              661
Low               661
Close             661
Volume            661
RSI               661
EMA20             661
EMA50             661
EMA100            661
EMA200            661
MACD              661
%D                661
%K                661
Vortex            661
Bollinger_low     661
Bollinger_high    661
Slope             661
Acceleration      661
ADX               661
CCI               661
EVM               661
Type              661
Trend_F           661
dtype: int64
Open              793
High              793
Low               793
Close             793
Volume            793
RSI               793
EMA20             793
EMA50             793
EMA100            793
EMA200            793
MACD              793
%D                793
%K                793
Vortex            793
Bollinger_low     793
Bollinger_high    793
Slope             793
Acceleration      793
ADX               793
CCI               793
EVM               793
Type              7

In [20]:
p = figure(title=f"Prediction on ETH from BTC", x_axis_label='value ($)', y_axis_label='value ($)',x_axis_type='datetime',width=1500, height=600,)
# Add the HoverTool to the figure
p.add_tools(HoverTool(
    tooltips=[
    ("Price", "@y{0.00} $"),
    ("Date", "$x{%F}"),
], formatters={
        '$x': 'datetime',
        '$y' : 'printf',
    },
    mode='vline'
))
p.line(df.index, df[f'EMA200'], legend_label="EMA_200", line_width=2,)
p.line(df.index, df[f'EMA100'], legend_label="EMA_100", line_width=2, color='orange')
p.line(df.index, df[f'EMA50'], legend_label="EMA_50", line_width=2, color='red')
p.line(df.index, df[f'EMA20'], legend_label="EMA_20", line_width=2, color='blue')
p.line(df.index, df[f'Close'], legend_label="Close", line_width=2, color='gray')
#p.line(df_ETH_with_pred.index, df_ETH_with_pred.Close_pred_LR, legend_label="prediction data LR", line_width=2, color='red')
p.legend.location = "top_left"

show(p)