In [1]:
# Render our plots inline
%matplotlib inline

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Make the graphs a bit prettier, and bigger
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (15, 5)

# Panda plot sample
# https://pandas.pydata.org/pandas-docs/version/0.13/visualization.html

# 1 Process the raw stock trading history csv file
## 1.1 Load dataframe and combine data from past

In [82]:
df_hourly_21_03_2021 = pd.read_csv("../data/btc_gbtc_hour_3months_combined_21_03_2021.csv", sep=",", index_col='begins_at')
df_hourly_21_03_2021.index.astype('datetime64[ns]')

df_hourly_28_03_2021 = pd.read_csv("../data/btc_gbtc_hour_3months_combined_28_03_2021.csv", sep=",", index_col='begins_at')
df_hourly_28_03_2021.index.astype('datetime64[ns]')

df_5min_21_03_2021 = pd.read_csv("../data/btc_gbtc_5min_weekly_combined_21_03_2021.csv", sep=",", index_col='begins_at')
df_5min_21_03_2021.index.astype('datetime64[ns]')

df_5min_28_03_2021 = pd.read_csv("../data/btc_gbtc_5min_weekly_combined_28_03_2021.csv", sep=",", index_col='begins_at')
df_5min_28_03_2021.index.astype('datetime64[ns]')

df_hourly = pd.concat([df_hourly_21_03_2021, df_hourly_28_03_2021]).drop_duplicates().sort_index(ascending=True)
df_5min = pd.concat([df_5min_21_03_2021, df_5min_28_03_2021]).drop_duplicates().sort_index(ascending=True)
# df_5min
df_hourly[:3]
# _x is btc price, _y is gbtc price, btc trades with 24x7 but gbtc only trades when stock market open.

Unnamed: 0_level_0,open_price_x,close_price_x,high_price_x,low_price_x,volume_x,session_x,interpolated_x,symbol_x,open_price_y,close_price_y,high_price_y,low_price_y,volume_y,session_y,interpolated_y,symbol_y
begins_at,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2020-12-22T00:00:00Z,22725.005,22562.82,23036.395,22461.105795,0,reg,False,BTCUSD,,,,,,,,
2020-12-22T01:00:00Z,22562.82,22762.055,22995.995,22447.084751,0,reg,False,BTCUSD,,,,,,,,
2020-12-22T02:00:00Z,22715.595,22913.89,23055.39,22691.88,0,reg,False,BTCUSD,,,,,,,,


## 1.2 Add calculation NAV column and rename columns

In [86]:
# add btc nav per share column
btc_per_share = 0.00094607

df_hourly = df_hourly.rename(columns={"open_price_y": "gbtc_open_price", "close_price_y": "gbtc_close_price", "high_price_y": "gbtc_high_price", "low_price_y": "gbtc_low_price", "volume_y": "gbtc_volume"})
df_hourly["nav_open_price"] = df_hourly["open_price_x"] * btc_per_share
df_hourly["nav_close_price"] = df_hourly["close_price_x"] * btc_per_share
df_hourly["nav_high_price"] = df_hourly["high_price_x"] * btc_per_share
df_hourly["nav_low_price"] = df_hourly["low_price_x"] * btc_per_share
#df_hourly[:3]

df_5min = df_5min.rename(columns={"open_price_y": "gbtc_open_price", "close_price_y": "gbtc_close_price", "high_price_y": "gbtc_high_price", "low_price_y": "gbtc_low_price", "volume_y": "gbtc_volume"})
df_5min["nav_open_price"] = df_5min["open_price_x"] * btc_per_share
df_5min["nav_close_price"] = df_5min["close_price_x"] * btc_per_share
df_5min["nav_high_price"] = df_5min["high_price_x"] * btc_per_share
df_5min["nav_low_price"] = df_5min["low_price_x"] * btc_per_share

df_5min[:4]

Unnamed: 0_level_0,open_price_x,close_price_x,high_price_x,low_price_x,volume_x,session_x,interpolated_x,symbol_x,gbtc_open_price,gbtc_close_price,gbtc_high_price,gbtc_low_price,gbtc_volume,session_y,interpolated_y,symbol_y,nav_open_price,nav_close_price,nav_high_price,nav_low_price
begins_at,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2021-03-15T00:50:00Z,59928.765,59716.695,60042.66,59297.42,0,reg,False,BTCUSD,,,,,,,,,56.696807,56.496174,56.804559,56.09951
2021-03-15T00:55:00Z,59716.695,59548.09,59918.305,59116.865,0,reg,False,BTCUSD,,,,,,,,,56.496174,56.336662,56.686911,55.928692
2021-03-15T01:00:00Z,59548.09,59573.8,60060.125,59114.36,0,reg,False,BTCUSD,,,,,,,,,56.336662,56.360985,56.821082,55.926323
2021-03-15T01:05:00Z,59447.45,59520.595,59794.595,59011.885,0,reg,False,BTCUSD,,,,,,,,,56.241449,56.310649,56.569872,55.829374


# 2. Graph analysis

## 2.1 Hourly graph

In [100]:
import plotly.graph_objects as go
pd.options.plotting.backend = "plotly"


In [107]:
# btc hourly price history graph (2020-12 / 2021-3)
df_hourly["nav_open_price"].plot()

# TODO: show max diff and percentage in each hour between high and low

fig = df_hourly.loc[:,["nav_open_price", "nav_high_price", "nav_low_price"]].plot.line()
fig.update_layout(
    autosize=False,
    width=5000,
    height=600,
    paper_bgcolor="LightSteelBlue",
)

fig.update_xaxes(nticks=300)          
fig.show()

In [109]:
# hourly chart comparison
import matplotlib.dates as mdates
import matplotlib as plt

fig = df_hourly.loc[:,["nav_open_price", "nav_high_price", "nav_low_price", "gbtc_open_price", "gbtc_high_price", "gbtc_low_price"]].plot.line()
fig.update_layout(
    autosize=False,
    width=5000,
    height=600,
    paper_bgcolor="LightSteelBlue",
)

fig.update_xaxes(nticks=300)          
fig.show()

## 2.2 5 Minutes chart analysis

In [99]:
# 5 minutes chart comparison

# 
df_5min_03_21_2021 = pd.read_csv("../data/btc_gbtc_5min_weekly_combined_21_03_2021.csv", sep=",", index_col='begins_at')

df_5min_03_21_2021 = pd.read_csv("../data/btc_gbtc_5min_weekly_combined_21_03_2021.csv", sep=",", index_col='begins_at')

df_5min["nav"] = df_5min["open_price_x"] * btc_per_share
df_5min.loc[:,['nav','open_price_y']].plot(figsize=(100, 5), grid=True, x_compat=True)

KeyError: "Passing list-likes to .loc or [] with any missing labels is no longer supported. The following labels were missing: Index(['open_price_y'], dtype='object'). See https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike"

In [45]:
df_5min.shape

(2016, 17)

In [47]:
# Try use plotly for interactive graph


fig = df_5min.loc[:,['nav','open_price_y']].plot.line()
fig.update_layout(
    autosize=False,
    width=5000,
    height=800,
    paper_bgcolor="LightSteelBlue",
)

fig.update_xaxes(nticks=200)          
fig.show()

In [34]:
# daily chart comparison
df_day = pd.read_csv("../data/btc_gbtc_daily_year_combined_21_03_2021.csv", sep=",", index_col='begins_at')
df_day["nav"] = df_day["open_price_x"] * btc_per_share

# import plotly.express as px
# df = px.data.stocks()

# fig = px.line(df_day, x="date", y=['nav', 'open_price_y'],
#               hover_data={"begins_at": "|%B %d, %Y"},
#               title='custom tick labels with ticklabelmode="period"')
# fig.update_xaxes(
#     dtick="M1",
#     tickformat="%b\n%Y",
#     ticklabelmode="period")
# fig.show()

fig = df_day.loc[:,['nav','open_price_y', 'close_price_y']].plot.line()
fig.update_layout(
    autosize=False,
    width=5000,
    height=800,
    paper_bgcolor="LightSteelBlue",
)
                 
fig.show()

In [None]:
# Thoughts
# Check premium, low premium (ex: -5%) could be buying areas, high premium (+/- 1%) could be selling areas. 
#  Need to verify the expected number. 
# The thoughts is, arbitrage suppose to buy when the premium is low while retail buyer is selling.  arbitrage suppose to sell when retail buyer wants to buy. 
# So we could use premium change rates as an reverse indicator to decide to buy or sell0