# Cryptocurrency 2018 Bear Market Analysis


- Author: Vanessa Liu (vanessaliu124@gmail.com)
- Date: Feb 14, 2018

# Project Setup


In [61]:
#import the required dependencies
import requests
import datetime
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import plotly.offline as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
py.init_notebook_mode(connected=True)

%matplotlib inline
plt.style.use('fivethirtyeight')

# import Plotly and enable the offline mode.
import plotly.offline as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
py.init_notebook_mode(connected=True)

In [58]:
# Pretty print the JSON
import uuid
from IPython.display import display_javascript, display_html, display
import json

class RenderJSON(object):
    def __init__(self, json_data):
        if isinstance(json_data, dict):
            self.json_str = json.dumps(json_data)
        else:
            self.json_str = json_data
        self.uuid = str(uuid.uuid4())

    def _ipython_display_(self):
        display_html('<div id="{}" style="height: 600px; width:100%;"></div>'.format(self.uuid), raw=True)
        display_javascript("""
        require(["https://rawgit.com/caldwell/renderjson/master/renderjson.js"], function() {
        document.getElementById('%s').appendChild(renderjson(%s))
        });
        """ % (self.uuid, self.json_str), raw=True)


In [81]:
# daily_price_historical function gives all the historical daily price of a cryptocurrency 
# this price is updated every day at 8pm
# (price in USD in this study )
def daily_price_historical(symbol, comparison_symbol, limit=1, aggregate=1, exchange='', allData='true'):
    url = 'https://min-api.cryptocompare.com/data/histoday?fsym={}&tsym={}&limit={}&aggregate={}&allData={}'\
            .format(symbol.upper(), comparison_symbol.upper(), limit, aggregate, allData)
    if exchange:
        url += '&e={}'.format(exchange)
    page = requests.get(url)
    data = page.json()['Data']
    df = pd.DataFrame(data)
    df['date'] = [datetime.datetime.fromtimestamp(d).date() for d in df.time]
    return df

In [None]:
# for example, historical daily price of Bitcoin(BTC) in USD
df = daily_price_historical('BTC','USD')
df.head()

In [None]:
# This step pull Bitcoin data since Bear market, which is after 2017-12-01
btcprice = daily_price_historical('BTC','USD')
btcpriceb = btcprice[(btcprice['date'] > datetime.date(2017,11,30))]

btcpriceb.head()

In [89]:
# Chart the BTC pricing data since Bear market
btc_trace = go.Scatter(x=btcpriceb['date'], y=btcpriceb['close'])
data_trace=go.Data([btc_trace])
layout=go.Layout(title="Bitcoin Price Since Dec 2017 (USD)", xaxis={'title':'Date'}, yaxis={'title':'Bitcoin Price in USD'})
layout.update(dict(annotations=[go.Annotation(text="Highest Point -- 2017-12-15", x="2017-12-15 19:00:00", y="19345.49")]))
#layout.update(dict(annotations=[go.Annotation(text="Lowest Point -- 2018-02-04", x="2018-02-04 19:00:00", y="6937.08")]))
figure=go.Figure(data=data_trace,layout=layout)
py.iplot(figure)

# link of plot on plot.ly: https://plot.ly/~vanessaliu124/1

we can see that the price dropped significantly from almost 20k (peak at 2017-12-15) 
<br>to below 7k (bottom on 2018-02-04)
<br>for my data selection I will select data since Dec 2017, 
<br>then we include the 2018 bear market data and a little bit of bull market ata (pre Dec 2017)

# Coins selection

### In the analysis i will choose the mainstream coins and small coins, list below:

##### Mainstream Coins
- BTC -- Bitcoin
- ETH -- Ethereum
- LTC -- Litecoin
- XRP -- Ripple
- ETC -- Ethereum Classic

##### Non - Mainstream Coins
- XLM -- Stellar
- INK
- ELF
- XRB
- INS
- SRN
- BCD
- DBC
- BCPT

In [90]:
# getting all pricin data of coins
coins = ['BTC','ETH','LTC','XRP','XLM','XRB','BCD','BCPT','ZCL','LSK','OMG']
# 'DBC','ELF','INK','INS','SRN','ETC',
coin_data = {}
for coin in coins:
    crypto_price_df = daily_price_historical(coin, 'USD')
    coin_data[coin] = crypto_price_df[(crypto_price_df['date'] > datetime.date(2017,11,30))].set_index('date')

# pricing data of Bitcoin
coin_data['BTC'].tail()

Unnamed: 0_level_0,close,high,low,open,time,volumefrom,volumeto
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2018-02-09,8569.29,9081.49,8176.25,8696.83,1518220800,155616.78,1348923000.0
2018-02-10,8084.61,8573.35,7862.31,8569.32,1518307200,123293.84,1013772000.0
2018-02-11,8911.27,8997.34,8084.41,8084.61,1518393600,124923.98,1085922000.0
2018-02-12,8544.69,8955.15,8379.35,8911.17,1518480000,98632.88,853320400.0
2018-02-13,9268.66,9382.37,8542.98,8544.69,1518566400,116219.67,1056246000.0


In [98]:
# In this study we only use daily close data for pricing analysis
# This step merge daily close price of all coins

def merge_dfs_on_column(dataframes, labels, col):
    '''Merge a single column of each dataframe into a new combined dataframe'''
    series_dict = {}
    for index in range(len(dataframes)):
        series_dict[labels[index]] = dataframes[index][col]
        
    return pd.DataFrame(series_dict)

combined_df = merge_dfs_on_column(list(coin_data.values()), list(coin_data.keys()), 'close')
combined_df.head()

Unnamed: 0_level_0,BCD,BCPT,BTC,ETH,LSK,LTC,OMG,XLM,XRB,XRP,ZCL
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2017-12-01,44.59,0.2743,10912.73,457.96,7.55,99.32,9.28,0.09321,0.07039,0.2441,2.02
2017-12-02,45.84,0.2467,11246.21,462.81,7.55,100.7,9.42,0.08946,0.07254,0.2449,2.22
2017-12-03,45.46,0.2441,11623.91,466.93,7.8,103.87,10.48,0.09701,0.07497,0.2462,2.42
2017-12-04,37.02,0.2209,11667.13,453.96,9.61,100.49,9.97,0.1225,0.07525,0.2337,2.25
2017-12-05,33.69,0.2073,13749.57,422.48,9.3,98.97,8.56,0.1398,0.08868,0.2182,2.32


In [95]:
# saving the result to excel since the cryptocompare API is unstable sometimes
# combined_df.to_excel('coinprices2.xlsx')

In [99]:
# function to make a neat plot of coins price

def df_scatter(df, title, seperate_y_axis=False, y_axis_label='', scale='linear', initial_hide=False):
    '''Generate a scatter plot of the entire dataframe'''
    label_arr = list(df)
    series_arr = list(map(lambda col: df[col], label_arr))
    
    layout = go.Layout(
        title=title,
        legend=dict(orientation="h"),
        xaxis=dict(type='date'),
        yaxis=dict(
            title=y_axis_label,
            showticklabels= not seperate_y_axis,
            type=scale
        )
    )
    
    y_axis_config = dict(
        overlaying='y',
        showticklabels=False,
        type=scale )
    
    visibility = 'visible'
    if initial_hide:
        visibility = 'legendonly'
        
    # Form Trace For Each Series
    trace_arr = []
    for index, series in enumerate(series_arr):
        trace = go.Scatter(
            x=series.index, 
            y=series, 
            name=label_arr[index],
            visible=visibility
        )
        
        # Add seperate axis for the series
        if seperate_y_axis:
            trace['yaxis'] = 'y{}'.format(index + 1)
            layout['yaxis{}'.format(index + 1)] = y_axis_config    
        trace_arr.append(trace)

    fig = go.Figure(data=trace_arr, layout=layout)
    py.iplot(fig)

In [100]:

df_scatter(combined_df, 'Cryptocurrency Prices (USD)', seperate_y_axis=False, y_axis_label='Coin Value (USD)', scale='log')
# link of plot on plot.ly:  https://plot.ly/~vanessaliu124/3

### I'm pretty happy with this plot, it plot nicely of coin prices in USD
 However, the price does not flutuate much and its difficult to see overall trend.
<br> Later I will use a min max scaler to scale all coin prices between 0 to 1.
<br> This can also help us better understand the correlation and how sensitive coin prices are

### However, before doing that, lets see the pearson correlation of the prices first

In [102]:
combined_df.pct_change().corr(method='pearson')

Unnamed: 0,BCD,BCPT,BTC,ETH,LSK,LTC,OMG,XLM,XRB,XRP,ZCL
BCD,1.0,0.105505,0.147676,0.229645,0.019818,0.199238,0.264402,0.08472,-0.032425,0.124707,0.067581
BCPT,0.105505,1.0,0.333882,0.379094,0.316747,0.267289,0.450353,0.410483,0.216246,0.184859,0.256657
BTC,0.147676,0.333882,1.0,0.543854,0.285025,0.479254,0.459421,0.41591,0.180996,0.213949,0.20976
ETH,0.229645,0.379094,0.543854,1.0,0.444152,0.753897,0.789921,0.478554,0.017901,0.423388,0.344826
LSK,0.019818,0.316747,0.285025,0.444152,1.0,0.256409,0.486154,0.297973,0.08595,0.402816,0.139394
LTC,0.199238,0.267289,0.479254,0.753897,0.256409,1.0,0.601014,0.32456,0.010138,0.325668,0.241565
OMG,0.264402,0.450353,0.459421,0.789921,0.486154,0.601014,1.0,0.504738,-0.073945,0.407156,0.323604
XLM,0.08472,0.410483,0.41591,0.478554,0.297973,0.32456,0.504738,1.0,-0.088838,0.545251,0.238797
XRB,-0.032425,0.216246,0.180996,0.017901,0.08595,0.010138,-0.073945,-0.088838,1.0,-0.076465,0.05432
XRP,0.124707,0.184859,0.213949,0.423388,0.402816,0.325668,0.407156,0.545251,-0.076465,1.0,0.311101


In [103]:
# Heatmap visualization to more clearly see the correlation, 
def correlation_heatmap(df, title, absolute_bounds=True):
    '''Plot a correlation heatmap for the entire dataframe'''
    heatmap = go.Heatmap(
        z=df.corr(method='pearson').as_matrix(),
        x=df.columns,
        y=df.columns,
        colorbar=dict(title='Pearson Coefficient'),
    )
    
    layout = go.Layout(title=title)
    
    if absolute_bounds:
        heatmap['zmax'] = 1.0
        heatmap['zmin'] = -1.0
        
    fig = go.Figure(data=[heatmap], layout=layout)
    py.iplot(fig)

In [123]:
correlation_heatmap(combined_df.pct_change(), "Cryptocurrency Correlations in Bear Market")
# link of plot on plot.ly:  https://plot.ly/~vanessaliu124/5

From the correlation matrix we can see that most coins are either posititively correlated with each other or slightly(potentially not statistically significant) negatively correlated.
<br> Negative correlations could be significant between a small coin and other coins.
<br> Mainstream coins are all positively correlatied with each other

In [111]:
# import MinMaxScaler to scale price from 0 to 1
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()


In [116]:
# Transform the combined_df to combined_df_scaled, which scale prices from 0 to 1
combined_df_scaled = pd.DataFrame(scaler.fit_transform(combined_df), columns=combined_df.columns, index = combined_df.index)
combined_df_scaled.head()

Unnamed: 0_level_0,BCD,BCPT,BTC,ETH,LSK,LTC,OMG,XLM,XRB,XRP,ZCL
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2017-12-01,0.265816,0.075333,0.3204,0.03819,0.0,0.012695,0.092945,0.004715,0.0,0.015758,0.001149
2017-12-02,0.274583,0.058606,0.347275,0.043222,0.0,0.017973,0.100784,0.0,6.7e-05,0.016068,0.002033
2017-12-03,0.271918,0.05703,0.377714,0.047496,0.007676,0.030094,0.160134,0.009493,0.000143,0.016573,0.002917
2017-12-04,0.212723,0.04297,0.381197,0.03404,0.063248,0.01717,0.131579,0.041542,0.000151,0.011721,0.002166
2017-12-05,0.189367,0.034727,0.549022,0.00138,0.05373,0.011357,0.052632,0.063294,0.000569,0.005705,0.002475


In [114]:
# Plot it
df_scatter(combined_df_scaled, 'Cryptocurrency Prices Scaled', seperate_y_axis=False, y_axis_label='Coin Value Scaled')
#  link of plot on plot.ly: https://plot.ly/~vanessaliu124/7

### I'm pretty happy with the result, 
However its still a little difficult to the sensitivity since price moves all the time
<br> It would be bette if we can try the plot of daily price percentage change

In [124]:
#combined_df.pct_change()
# I dropped certain coins since i found them are outliers and affect the scale (y axix) too much
# I will talk about them separately later
df_scatter(combined_df.pct_change().drop('XRB', 1).drop('BCD', 1), 
           'Cryptocurrency Prices Change %', seperate_y_axis=False, y_axis_label='Coin Value % change', )
# link of plot on plot.ly:  https://plot.ly/~vanessaliu124/9

### I'm happy with this plot! 
We can tell a lot from this plot.
- First, we can easily tell that there are almost no lag of coin prices of non-mainstream coins.
<br> Expecially when there is lots of fluctuations 
<br> For example at the significant ups/downs: Dec 9, Dec 21, Jan 15, Feb 4
<br> Possible reason is possibly because cryptocurrency can trade instantly so basically lead time in trading coins if people think cryptocurrency is not doing good
 
- Second, we can see the most volatile coins -- BCPT, XRP, LSK, ZCL -- are all non-mainstream coins.





## Future Studies:
- Data wise:
<br>import price of all coins and use PCA to cluster them into groups 
<br>import some google trend data, twitter data and stock price 

- Analysis wise:
<br> Classify non mainstream coins into scam/non-scam and analize them separately

# Scratch area

Coin List

In [None]:
def coin_list():
    url = 'https://www.cryptocompare.com/api/data/coinlist/'
    page = requests.get(url)
    data = page.json()['Data']
    return data

In [None]:
coins = ['BTC','ETH','LTC','XRP','ETC','XLM','INK','ELF','XRB','INS','SRN','BCD','DBC','BCPT']
#
coin_data = {}
for coin in coins:
    crypto_price_df = daily_price_historical(coin, 'USD')
    coin_data[coin] = crypto_price_df[(crypto_price_df['timestamp'] > '2017-12-15')]
    
coin_data['BTC'].tail()

In [None]:
coin_data['BTC']

In [None]:
import statsmodels.api as sm

In [None]:
from statsmodels.tsa.api import VAR, DynamicVAR

In [None]:
mdata = sm.datasets.macrodata.load_pandas().data
mdata

In [None]:
# Chart the BTC pricing data since Bear market
btc_trace = go.Scatter(x=btcpriceb['timestamp'], y=btcpriceb['close'])
data=go.Data([btc_trace])
layout=go.Layout(title="Bitcoin Price Since Dec 2017 (USD)", xaxis={'title':'Date'}, yaxis={'title':'Bitcoin Price in USD'})
figure=go.Figure(data=data,layout=layout)
py.iplot(figure)

#we can see that the price dropped significantly from almost 20k (peak at Dec 17th) to below 8k 

In [None]:
def price(symbol, comparison_symbols=['USD'], exchange=''):
    url = 'https://min-api.cryptocompare.com/data/price?fsym={}&tsyms={}'\
            .format(symbol.upper(), ','.join(comparison_symbols).upper())
    if exchange:
        url += '&e={}'.format(exchange)
    page = requests.get(url)
    data = page.json()
    return data

In [None]:
coins = ['BTC','ETH','LTC','XRP','ETC','XLM','INK','ELF','XRB','INS','SRN','BCD','DBC','BCPT']
#
coin_data = {}
for coin in coins:
    crypto_price_df = daily_price_historical(coin, 'USD')
#    coin_data[coin] = crypto_price_df.set_index('timestamp')
#    coin_data[coin] = crypto_price_df[(crypto_price_df['timestamp'] > '2017-12-15')]
    coin_data[coin] = crypto_price_df
#coin_data['BTC'].tail()
crypto_price_df

### Useful links:
    
- CryptoCompare API Quick Start 
<br>This is a very useful link for the cryptocompare API, which is where I got data from this study
<br>https://github.com/agalea91/cryptocompare-api/blob/master/CryptoCompare.API.2017.08.ipynb
<br>
<br>
- Cryptocurrency Analysis Python
<br>https://github.com/triestpa/Cryptocurrency-Analysis-Python
<br>
<br>
- CryptoAsset Portfolios: Identifying Highly Correlated Cryptocurrencies using PCA
<br>http://www.quantatrisk.com/2017/03/31/cryptocurrency-portfolio-correlation-pca-python/
<br>
<br>Future studies
- Useful link of tsa lag and correlation
<br>https://stackoverflow.com/questions/25320773/time-series-correlation-and-lag-time
<br>
<br>