In [12]:
# load model
from tensorflow.keras.models import load_model
import numpy as np
import pandas as pd
import json
import requests
import datetime
import time
import nltk
#nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from sklearn.preprocessing import MaxAbsScaler

sia = SentimentIntensityAnalyzer()

In [4]:
# # parameters
sequence_length = 50
ticker_num = 24
coin = 'BTC'
coinID = '1182'
timeperiod = 'histohour'
timefrequency = 'hour'
agg = 1
apiKey = 'bf7c04a024b244dea99e95798fa8e102b7c9738c0933795253c2c8f39f2d160c'
cat = 'BTC'
lang = 'EN'

In [5]:
def price_to_return(df, target_col):
    # get price change
    s_test = df[target_col]
    log_return = np.log(s_test/s_test.shift())
    df['price change'] = log_return

    # feature extraction from OHLC
    # '''high/open'''
    df['high/open'] = np.log(df['high']/df['open'])
    # '''low/open'''
    df['low/open'] = np.log(df['low']/df['open'])
    # '''close/high'''
    df['close/high'] = np.log(df['close']/df['high'])
    # '''close/low'''
    df['close/low'] = np.log(df['close']/df['low'])
    df = df.drop(['close','high','low','open','volumefrom'],axis=1)
    df.rename(columns = {'volumeto':'volumn'},inplace=True)
    df = df.dropna()
    return df

In [6]:
def get_ticker_sentiment(df,news):
    holder = []
    for i in range(df.shape[0]):
        to_t = df.time[i]
        from_t = to_t - 86400
        news_interval = news[(news['published_on'] >= from_t) & (news['published_on'] <= to_t)]
        score = news_interval.mean()[-4:]
        score['time'] = to_t
        holder.append(score)
    score_df = pd.concat(holder,axis=1).T
    score_df['time'] = score_df['time'].apply(int)
    complete_df = pd.merge(df, score_df, on = 'time')
    return complete_df

In [7]:
def get_ticker_social(df,coin_social):
    coin_social = coin_social.pct_change()
    coin_social = coin_social.add(1)
    coin_social = np.log(coin_social)
    coin_social = coin_social.reset_index()
    complete_df = pd.merge(df,coin_social,on='time', how = 'inner')
    return complete_df

In [8]:
def time_series_to_supervised(df,sequence_length):

    temp = df.values
    temp = temp.tolist()
    result = []
    for index in range(len(temp) - sequence_length +1):
        result.append(temp[index: index + sequence_length])
    data = np.array(result)
    
    # get x and y
    x = data[:, :-1]
    y = data[:, -1]
    y = y[:, 1] #second column is the price change, which is what we want to predict
    
    return x, y

In [9]:
def data_preparation(coin, coinID, time_period, time_frequency, agg, apiKey, cat, lang, ticker_num, sequence_length):
    

    def get_news_data_spec(cat, timestamp, apiKey, lang):
        news_url = "https://min-api.cryptocompare.com/data/v2/news/?categories={}&lang={}&lTs={}&api_key={}".format(cat, lang, timestamp ,apiKey)        
        r = requests.get(news_url)
        ipdata = r.json()
        return ipdata

    # get current time
    timestamp = int(time.time())
    
    prediction_length = ticker_num + sequence_length -1
    
    # APIs
    price_url = "https://min-api.cryptocompare.com/data/{}?fsym={}&tsym=USD&limit={}&toTs={}&api_key={}".format(time_period, coin, prediction_length, timestamp,apiKey)
    
    social_url = "https://min-api.cryptocompare.com/data/social/coin/histo/{}?coinId={}&aggregate={}&limit={}&toTs={}&api_key={}"\
        .format(time_frequency, coinID, agg, prediction_length, timestamp ,apiKey)
    

    # get data
    price = requests.get(price_url).json()
    social = requests.get(social_url).json()
    
    # deal with price
    price_df = pd.DataFrame(price['Data'])
    price_df['date/hour'] = pd.to_datetime(price_df['time'], unit='s') 
    price_df.set_index('date/hour', inplace=True)
    return_df = price_to_return(price_df,'close')
    
    # deal with news
    news_ts = timestamp
    from_t = news_ts - (prediction_length * 3600) - 86400
    holder = []
    while news_ts > from_t:
        news_data = get_news_data_spec(cat, news_ts, apiKey, lang)
        news_df = pd.DataFrame(news_data['Data'])
        holder.append(news_df)
        news_ts = news_df['published_on'].min()        
    df = pd.concat(holder, axis = 0)
    df = df[df['published_on']>from_t]
    df['time'] = df['published_on'].apply(lambda x: datetime.datetime.fromtimestamp(x).strftime("%Y-%m-%d"))
    news = df.set_index('time')
    # get news sentiment
    news['news_content'] = news['title'] + news['body']
    news['vader_polarity'] = news['news_content'].apply(lambda x: sia.polarity_scores(x))
    news['vader_compound'] = news['vader_polarity'].apply(lambda x: x['compound'])
    news['vader_neg'] = news['vader_polarity'].apply(lambda x: x['neg'])
    news['vader_neu'] = news['vader_polarity'].apply(lambda x: x['neu'])
    news['vader_pos'] = news['vader_polarity'].apply(lambda x: x['pos'])
    news = news[['id','published_on','vader_compound','vader_neg','vader_neu','vader_pos']]
    return_news_df = get_ticker_sentiment(return_df, news)
    
    # deal with social
    social_df = pd.DataFrame(social['Data'])
    social_df.set_index('time',inplace = True)
    coin_complete = get_ticker_social(return_news_df,social_df)
    
    # complete df
    coin_complete['time'] = pd.to_datetime(coin_complete['time'], unit='s')
    coin_complete['time'] = coin_complete['time'].dt.strftime('%Y-%m-%d %r')
    coin_complete.set_index('time', inplace=True)
    
    return coin_complete
    

In [27]:

def pipeline(coin_complete, sequence_length, model):
    
    coin_complete = coin_complete.replace([np.inf, -np.inf], np.nan)
    coin_complete = coin_complete.fillna(0)
    
    # scale data
    max_abs_scaler = MaxAbsScaler()
    temp = max_abs_scaler.fit_transform(coin_complete)
    temp = pd.DataFrame(temp,columns=coin_complete.columns)
    coin_complete = temp.set_index(coin_complete.index)
    
    x,y = time_series_to_supervised(coin_complete, sequence_length)
    
    y_predict = model.predict(x)
    
    return y_predict, y

In [28]:
coin_complete = data_preparation(coin, coinID, timeperiod, timefrequency, agg, apiKey, cat, lang, ticker_num, sequence_length)

In [29]:
coin_model = load_model('coin_model_1.h5')

ValueError: Unknown layer:name

In [22]:
x, y = pipeline(coin_complete, sequence_length, 1)

(24,)

In [199]:
x.shape

(1, 49, 42)

(73, 42)