# Exploratory Data Analysis

### Objective and Hypotheses:

#### To analyze the price relations between certain cryptocurrencies and the equities market over a 32 months period and to determine possible correlations.

#### Hypothesis (1): Price of cryptocurrencies (BTC and ETH) strongly correlate, but do not correlate with the broader equities market (SPY).

#### Hypothesis (2): Price of blockchain related stocks correlates better with BTC than with the prices of other stocks.

In [2]:
# Don't modify this cell.
# Dependencies and Setup
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

#import statistics
import requests
import time
#import math
#from scipy import stats
from pprint import pprint

# Import API key
from api_keys import api_key

#import ready packages
from alpha_vantage.timeseries import TimeSeries
from alpha_vantage.cryptocurrencies import CryptoCurrencies

#### The above code imports all packages we might need¶ 

In [4]:
#Using existing alpha_vantage packages:

def get_crypto(sym, mar): # function takes the symbol of the crypto and the market (US) as parameters
    ccb = CryptoCurrencies(key=api_key, output_format='pandas')
    data, meta_data = ccb.get_digital_currency_daily(symbol=sym, market=mar)
    data = data.drop(columns = ['1a. open (USD)', '1b. open (USD)', '2a. high (USD)', '2b. high (USD)',
       '3a. low (USD)', '3b. low (USD)', '4a. close (USD)',
       '5. volume', '6. market cap (USD)']) # we drop all columns we do not need, keep only the close price
    data = data.rename(columns = {'4b. close (USD)' : f'{sym} (USD)'}) # rename column appropriately
    return data, meta_data

b_data, b_meta_data = get_crypto('BTC', 'USD') # request and load data using our function
e_data, e_meta_data = get_crypto('ETH', 'USD')

crypto_data = pd.concat([b_data, e_data], axis = 1) # put both into the same dataframe for ease of use
crypto_data.head(1)

Unnamed: 0_level_0,BTC (USD),ETH (USD)
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-11-11,64535.8,4592.74


#### Here we defined a function that requests and loads the crypto data we will need

In [5]:
def plot_correlations(df, roll, location1, location2, delta, fsize, lsize):
    columns = df.columns.to_list()
# this function takes multiple parameters, the dataframe to work with, the rolling period for the rolling correlation,
# the (x coordinate) location of the line representing the global correlation, the (x coordinate) location of the anno-
# tation, their difference (delta) in y coordinates and title fontsize as well as tick labelsize.
    
    fig = plt.figure(figsize = (22, 18)) # create figure
    
    ax0 = fig.add_subplot(3, 1, 1) # create 3 subplots in a column
    ax2 = fig.add_subplot(3, 1, 2)
    ax3 = fig.add_subplot(3, 1, 3, sharex = ax2) #
    
    fig.tight_layout(pad = 7.0) # set the spacing between plots
    
    ax0.scatter(df[columns[0]], df[columns[1]], c = df.index, cmap = 'viridis') # create scatterplot using the 2 columns of df, set color hue by index (which is the date)
    ax0.set_xlabel(f'Price of {columns[0]}', color = 'r', fontsize = fsize) # set labels with appropriate color and size
    ax0.set_ylabel(f'Price of {columns[1]}', color = 'b', fontsize = fsize)
    ax0.set_title(f'Scatterplot of {columns[0]} and {columns[1]} Prices, Hues Representing Time, Lighter more Recent', fontsize = fsize)
    ax0.tick_params('x', colors = 'r', labelbottom = True, labelsize = lsize) # set the tick parameters
    ax0.tick_params('y', colors = 'b', labelsize = lsize)