In [1]:
import datetime as dt
import os
import pickle

import numpy as np
import pandas as pd
import quandl
import plotly.offline as py
import plotly.graph_objs as go
import plotly.figure_factory as ff

py.init_notebook_mode(connected=True)

In [39]:
def get_quandl_data(quandl_id):
    """Download and cache Quandl dataseries."""
    cache_path = 'data/{}.pkl'.format(quandl_id).replace('/','-')
    
    try:
        f = open(cache_path, 'rb')
        df = pickle.load(f)   
        print(f"Loaded {quandl_id} from cache")
    except (OSError, IOError, FileNotFoundError) as e:
        print(f"Downloading {quandl_id} from Quandl")
        df = quandl.get(quandl_id, returns="pandas")
        df.to_pickle(cache_path)
        print(f"Cached {quandl_id} at {cache_path}")
    
    return df


def merge_dfs_on_column(dataframes, labels, col):
    """Merge a single column of each dataframe into a new combined dataframe."""
    series = {
        labels[index]: dataframes[index][col]
        for index in range(len(dataframes))
    }
        
    return pd.DataFrame(series)

In [42]:
def df_scatter(df, title, seperate_y_axis=False, y_axis_label='', scale='linear', initial_hide=False):
    """Generate a scatter plot of the entire dataframe."""
    label_arr = list(df)
    series_arr = list(map(lambda col: df[col], label_arr))
    
    layout = go.Layout(
        title=title,
        legend=dict(orientation='h'),
        xaxis=dict(type='date'),
        yaxis=dict(
            title=y_axis_label,
            showticklabels= not seperate_y_axis,
            type=scale
        )
    )
    
    y_axis_config = dict(
        overlaying='y',
        showticklabels=False,
        type=scale
    )
    
    visibility = 'legendonly' if initial_hide else True
        
    # Form Trace For Each Series
    trace_arr = []
    for index, series in enumerate(series_arr):
        trace = go.Scatter(
            x=series.index, 
            y=series, 
            name=label_arr[index],
            visible=visibility
        )
        
        # Add seperate axis for the series
        if seperate_y_axis:
            trace['yaxis'] = 'y{}'.format(index + 1)
            layout['yaxis{}'.format(index + 1)] = y_axis_config  
            
        trace_arr.append(trace)

    fig = go.Figure(data=trace_arr, layout=layout)
    py.iplot(fig)
    

def correlation_heatmap(df, title, absolute_bounds=True):
    """Plot a correlation heatmap for the entire dataframe."""
    heatmap = go.Heatmap(
        z=df.corr(method='pearson').to_numpy(),
        x=df.columns,
        y=df.columns,
        colorbar=dict(title='Pearson Coefficient'),
    )
    
    layout = go.Layout(title=title)
    
    if absolute_bounds:
        heatmap['zmax'] = 1.0
        heatmap['zmin'] = -1.0
        
    fig = go.Figure(data=[heatmap], layout=layout)
    py.iplot(fig)

In [14]:
# Pull pricing data for 2 major BTC/USD exchanges, to eevn out exhchange-specific prices or spikes
exchanges = ['KRAKEN','BITSTAMP']
currency = 'USD'

exchange_data = {}
for exchange in exchanges:
    exchange_code = f'BCHARTS/{exchange}{currency}'
    btc_exchange_df = get_quandl_data(exchange_code)
    exchange_data[exchange] = btc_exchange_df
    
exchange_data['KRAKEN'].head()

Loaded BCHARTS/KRAKENUSD from cache
Loaded BCHARTS/BITSTAMPUSD from cache


Unnamed: 0_level_0,Open,High,Low,Close,Volume (BTC),Volume (Currency),Weighted Price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2014-01-07,874.6704,892.06753,810.0,810.0,15.622378,13151.472844,841.835522
2014-01-08,810.0,899.84281,788.0,824.98287,19.182756,16097.329584,839.156269
2014-01-09,825.56345,870.0,807.42084,841.86934,8.158335,6784.249982,831.572913
2014-01-10,839.99,857.34056,817.0,857.33056,8.02451,6780.220188,844.938794
2014-01-11,858.2,918.05471,857.16554,899.84105,18.748285,16698.566929,890.671709


In [15]:
# Merge the BTC price dataseries' into a single dataframe
btc_usd_datasets = merge_dfs_on_column(list(exchange_data.values()), list(exchange_data.keys()), 'Weighted Price')
# Notice how the prices are in similar range but still quite significantely different
btc_usd_datasets.tail()

Unnamed: 0_level_0,KRAKEN,BITSTAMP
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-06-16,39141.737747,39266.920444
2021-06-17,38433.468618,38449.36421
2021-06-18,36734.445103,36802.295855
2021-06-19,35743.971502,35743.751401
2021-06-20,34804.086189,34791.393581


In [20]:
# let's clean up the data

# Remove "0" values
btc_usd_datasets.replace(0, np.nan, inplace=True)

# Calculate the average BTC price as a new column
btc_usd_datasets['avg_btc_price_usd'] = btc_usd_datasets.mean(axis=1)

# Plot all of the BTC exchange prices
df_scatter(btc_usd_datasets, 'Bitcoin Price (USD) By Exchange')

In [21]:
# Plot the average BTC price
btc_trace = go.Scatter(x=btc_usd_datasets.index, y=btc_usd_datasets['avg_btc_price_usd'])
py.iplot([btc_trace])

### Step 2 - Retrieve Altcoin Pricing Data

Now that we have a solid time series dataset for the price of Bitcoin, let's pull in some data for non-Bitcoin cryptocurrencies, commonly referred to as altcoins.

In [31]:
base_polo_url = 'https://poloniex.com/public?command=returnChartData&currencyPair={}&start={}&end={}&period={}'


def get_json_data(json_url, cache_path):
    """Download and cache JSON data, return as a dataframe."""
    try:        
        f = open(cache_path, 'rb')
        df = pickle.load(f)   
        print(f"Loaded {json_url} from cache")
    except (OSError, IOError) as e:
        print("Downloading {json_url}")
        df = pd.read_json(json_url)
        df.to_pickle(cache_path)
        print(f"Cached {json_url} at {cache_path}")
        
    return df


def get_crypto_data(poloniex_pair: str, start_date: str, end_date: str = None):
    """Retrieve cryptocurrency data from poloniex."""
    end_date = end_date or dt.datetime.now()
    start_date = dt.datetime.strptime(start_date, '%Y-%m-%d')
    pediod = 86400  # pull daily data (86,400 seconds per day)
    
    json_url = base_polo_url.format(poloniex_pair, start_date.timestamp(), end_date.timestamp(), pediod)
    data_df = get_json_data(json_url, f'data/{poloniex_pair}.pkl')
    data_df = data_df.set_index('date')
    
    return data_df

### Step 3.2 - Download Trading Data From Poloniex

Most altcoins cannot be bought directly with USD; to acquire these coins individuals often buy Bitcoins and then trade the Bitcoins for altcoins on cryptocurrency exchanges. For this reason, we'll be downloading the exchange rate to BTC for each coin, and then we'll use our existing BTC pricing data to convert this value to USD.

We'll download exchange data for nine of the top cryptocurrencies -
Ethereum, Litecoin, Ripple, Ethereum Classic, Stellar, Dash, Siacoin, Monero, and NEM.

In [26]:
# We'll download exchange data for nine of the top cryptocurrencies:
# Ethereum, Litecoin, Ripple, Ethereum Classic, Stellar, Dash, Siacoin, Monero, and NEM.
altcoins = ['ETH','LTC','XRP','ETC','STR','DASH','SC','XMR','XEM']

altcoin_data = {}
for altcoin in altcoins:
    coinpair = f'BTC_{altcoin}'
    crypto_price_df = get_crypto_data(coinpair, start_date='2016-01-01')
    altcoin_data[altcoin] = crypto_price_df

Downloading {json_url}
Cached {json_url} at {cache_path}
Downloading {json_url}
Cached {json_url} at {cache_path}
Downloading {json_url}
Cached {json_url} at {cache_path}
Downloading {json_url}
Cached {json_url} at {cache_path}
Downloading {json_url}
Cached {json_url} at {cache_path}
Downloading {json_url}
Cached {json_url} at {cache_path}
Downloading {json_url}
Cached {json_url} at {cache_path}
Downloading {json_url}
Cached {json_url} at {cache_path}
Downloading {json_url}
Cached {json_url} at {cache_path}


In [29]:
altcoin_data['ETH'].tail()

Unnamed: 0_level_0,high,low,open,close,volume,quoteVolume,weightedAverage
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-08-07,0.071511,0.067051,0.0675,0.070886,213.372171,3059.374436,0.069744
2021-08-08,0.071326,0.067921,0.070891,0.06881,560.274508,8069.569589,0.069431
2021-08-09,0.069061,0.066913,0.068751,0.068357,285.637415,4200.095168,0.068007
2021-08-10,0.070137,0.067723,0.068348,0.068912,281.463249,4075.912575,0.069055
2021-08-11,0.069316,0.068881,0.068994,0.069293,3.524464,50.997262,0.069111


In [36]:
# Calculate USD Price as a new column in each altcoin dataframe
for altcoin in altcoin_data.keys():
    altcoin_data[altcoin]['price_usd'] =  altcoin_data[altcoin]['weightedAverage'] * btc_usd_datasets['avg_btc_price_usd']
    
# Merge USD price of each altcoin into single dataframe 
combined_df = merge_dfs_on_column(list(altcoin_data.values()), list(altcoin_data.keys()), 'price_usd')
# Add BTC price to the dataframe
combined_df['BTC'] = btc_usd_datasets['avg_btc_price_usd']

# Chart all of the altocoin prices
df_scatter(combined_df, 'Cryptocurrency Prices (USD)', seperate_y_axis=False, y_axis_label='Coin Value (USD)', scale='log')

In [47]:
# Calculate the pearson correlation coefficients for cryptocurrencies in 2016
combined_df_2016 = combined_df[combined_df.index.year == 2016]
# combined_df_2016.pct_change().corr(method='pearson')

correlation_heatmap(combined_df_2016.pct_change(), "Cryptocurrency Correlations in 2016")