# Initial check for CryptoQuant data
   * tasks
       * classify tables 
       * classify data types
       * build an initial data loader
   * data structure
       * instrument_list ['btc', 'stablecoin', 'erc20', 'eth']
       * exchange_list = ['coinbase_pro', 'derivative_exchange', 'deribit', 'binance', 'all_exchange', 'spot_exchange']
       * datatype (number of tales)
           * exchange-flows (5)
           * flow-indicator (5)
           * Market Indicator (3)
           * Network Indicator (5)
           * miner-flows (3)
           * Bank Flows (0)
           * Inter Entity Flows (1)
           * Fund Data (1)
           * market data (5)
           * network data (4)

In [None]:
import pandas as pd
from croqr.common.config import LOCAL_DATA_DIR, LOCAL_FIGURE_DIR
import os
from croqr.data.cq.config import CryptoQuantData
from datetime import datetime
import matplotlib.pyplot as plt
from croqr.common.utils import timeit, save_df
import numpy as np
import cufflinks as cf
cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)

### Calcualted correlation between features and returns
   * for each feature, get the change rate of of the feature numbers
   * calcualte corr between change on features v.s. change on close price (ret)
   * try different time scale, sampling frequency, 1Min, 5Min
   * try different lag
   * rolling historical corr
 

In [None]:
@timeit
def align_feature_df(raw_feature_df, feature_list, ret_df):
    # reverse time index
    feature_df = raw_feature_df[::-1].reset_index()
    
    # get signal_time
    feature_df['signal_time'] = feature_df['datetime'].apply(lambda x: x.ceil('min'))
    
    # select revelant columns
    feature_df = feature_df[feature_list + ['signal_time']].set_index('signal_time')
    
    # drop duplicated index
    feature_df = feature_df[~feature_df.index.duplicated(keep='first')]
    
    # align to ret dataframe
    aligned_feature_df = feature_df.reindex(index=ret_df.index).ffill()
    
    return aligned_feature_df

In [None]:
@timeit
def get_feature_df_corr_with_ret(df_features, feature_table_name, ret_df, look_back_window = 60*24):
    feature_list = df_features.columns
    df_features_chg = df_features.pct_change().ffill(0)
    n = len(feature_list)
    fig=plt.figure(num=1,figsize=(6,4*n))
    
    for i, feature in enumerate(feature_list):
        ax1=fig.add_subplot(n, 1, i+1)

        f_chg = df_features_chg[feature]
        f_chg.rolling(look_back_window, min_periods=look_back_window//2).corr(ret_df).rolling(look_back_window*100, min_periods=look_back_window//2).mean().plot(ax=ax1)
        f_chg.shift(10).rolling(look_back_window, min_periods=look_back_window//2).corr(ret_df).rolling(look_back_window*100, min_periods=look_back_window//2).mean().plot(ax=ax1)
        f_chg.shift(20).rolling(look_back_window, min_periods=look_back_window//2).corr(ret_df).rolling(look_back_window*100, min_periods=look_back_window//2).mean().plot(ax=ax1)
        f_chg.shift(30).rolling(look_back_window, min_periods=look_back_window//2).corr(ret_df).rolling(look_back_window*100, min_periods=look_back_window//2).mean().plot(ax=ax1)
        ax1.legend(['lag=0', 'lag=10','lag=20','lag=30'])
        ax1.title.set_text(feature)
        ax1.get_xaxis().set_visible(False)
        ax1.grid(True, axis='x')
    fig.show()
    fig.savefig(os.path.join(LOCAL_FIGURE_DIR, '{}.png'.format(feature_table_name)))    

### plot correlation

In [None]:
data_dict = pd.read_pickle(os.path.join(LOCAL_DATA_DIR, 'cq2.pkl'))

In [None]:
all_feature_list = list(data_dict.keys())

In [None]:
all_feature_list.remove('btc-all_exchange-market-data-price-usd')

In [None]:
all_feature_list

In [None]:
btc_close = data_dict['btc-all_exchange-market-data-price-usd']['price_usd_close'][::-1]
ret_df = btc_close.pct_change()

In [None]:
df0 = data_dict['btc-all_exchange-market-data-price-usd']
save_df(df0, 'df0')

In [None]:
feature_exlusion_list = ['blockheight', 'datetime']
feature_list = [x for x in feature_table.columns if x not in feature_exlusion_list]
display(feature_list)

In [None]:
aligned_feature_df = align_feature_df(feature_table, feature_list, ret_df)

In [None]:
get_feature_df_corr_with_ret(aligned_feature_df, feature_table_name, ret_df)

In [None]:
log_file_name = 'cq_corr_{}.log'.format(datetime.now().strftime('%Y%m%d%H%M%S'))

In [None]:
df_features_chg = pd.DataFrame(np.random.randint(0,100,size=(100, 4)), columns=list('ABCD')).pct_change()
df_features_chg.head()

In [None]:
n=4
ret_df = df_features_chg.shift(1)['A']
look_back_window = 20
feature_list = ['A','B','C','D',]
plt.tight_layout(pad=0.5, w_pad=2.5, h_pad=2.0)
fig = plt.figure(figsize=(6, 4 * n))

for i, feature in enumerate(feature_list):
    ax1 = fig.add_subplot(n, 1, i + 1)
    ax1.grid(True, axis='x')
    f_chg = df_features_chg[feature]
    f_chg.rolling(look_back_window).corr(ret_df).rolling(look_back_window * 100).mean().plot(
        ax=ax1)
    f_chg.shift(1).rolling(look_back_window).corr(ret_df).rolling(
        look_back_window ).mean().plot(ax=ax1)
    f_chg.shift(5).rolling(look_back_window).corr(ret_df).rolling(
        look_back_window ).mean().plot(ax=ax1)
    f_chg.shift(30).rolling(look_back_window).corr(ret_df).rolling(
        look_back_window).mean().plot(ax=ax1)
    ax1.hlines(y=0, xmin=f_chg.index.min(), xmax=f_chg.index.max(), linewidth=2, color='r', label=['0'], Linestyles='dotted')
    ax1.legend(['lag=0', 'lag=1', 'lag=5', 'lag=30', '0'])
    ax1.title.set_text(feature)
    #ax1.get_xaxis().set_visible(False)

In [None]:
existing_files = [x.split('.')[0] for x in os.listdir(LOCAL_FIGURE_DIR)]

In [None]:
btc_close.iplot()

In [None]:
ret_df

In [None]:
ret_df.addbb??

In [None]:
from croqr.common.utils import addbb

In [None]:
df1 = ret_df.to_frame()

In [None]:
df1.pipe(addbb(df1, lags=50, methodma='ewm', methodstd='ewm', nstdh=2, nstdl=2))

In [None]:
addbb(df1, lags=50, methodma='ewm', methodstd='ewm', nstdh=2, nstdl=2).plot()

In [None]:
df1['fff'] = ret_df

In [None]:
def triangle(self,nans=True):
    n=len(self)
    self=self.mask(np.arange(n)[:,None] > np.arange(n),np.nan if nans else '' )
    return self

In [None]:
df2 = df1.head()

In [None]:
df2

In [None]:
triangle(df1.head())

In [None]:
n=len(df2)

In [None]:
np.arange(n)[:,None] > np.arange(n)