# Bitcoin dataset analysis

inspired by:
<a href=https://medium.com/activewizards-machine-learning-company/bitcoin-price-forecasting-with-deep-learning-algorithms-eb578a2387a3>Bitcoin price forecasting with deep learning algorithms</a>

In [5]:
# download Bitcoin dataset from Kaggle
# !kaggle datasets download -d mczielinski/bitcoin-historical-data
! pip install plotly

Collecting plotly
  Downloading https://files.pythonhosted.org/packages/a7/3d/4dcdbafc9d5c01f468d41999cd9ab733f38e9ea4e4bea5a62841fedf5f0e/plotly-2.5.1.tar.gz (24.9MB)
Building wheels for collected packages: plotly
  Running setup.py bdist_wheel for plotly: started
  Running setup.py bdist_wheel for plotly: finished with status 'done'
  Stored in directory: C:\Users\Lord\AppData\Local\pip\Cache\wheels\33\be\39\f82c0f53ea29777fdc29afaf7bfad87442488a280662d355fb
Successfully built plotly
Installing collected packages: plotly
Successfully installed plotly-2.5.1


In [21]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import tensorflow as tf
import statsmodels.api as sm

from math import sqrt
from random import randint
from matplotlib import pyplot as plt
import plotly.offline as py
import plotly.graph_objs as go

py.init_notebook_mode(connected=True)
%matplotlib inline

In [7]:
# import the data
data = pd.read_csv('./datasets/bitstampUSD_1-min_data_2012-01-01_to_2018-03-27.csv')
data.isnull().values.any() # check if all the data was loaded correctly (if False then there are no Null values)

False

In [8]:
# show first 10 rows of the data
data.head(10)

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
0,1325317920,4.39,4.39,4.39,4.39,0.455581,2.0,4.39
1,1325317980,4.39,4.39,4.39,4.39,0.455581,2.0,4.39
2,1325318040,4.39,4.39,4.39,4.39,0.455581,2.0,4.39
3,1325318100,4.39,4.39,4.39,4.39,0.455581,2.0,4.39
4,1325318160,4.39,4.39,4.39,4.39,0.455581,2.0,4.39
5,1325318220,4.39,4.39,4.39,4.39,0.455581,2.0,4.39
6,1325318280,4.39,4.39,4.39,4.39,0.455581,2.0,4.39
7,1325318340,4.39,4.39,4.39,4.39,0.455581,2.0,4.39
8,1325318400,4.39,4.39,4.39,4.39,0.455581,2.0,4.39
9,1325318460,4.39,4.39,4.39,4.39,0.455581,2.0,4.39


In [10]:
# Transform the data to get the average price by day
data['date'] = pd.to_datetime(data['Timestamp'],unit='s').dt.date # transfer the timestamps (s) into datetime (y-m-d)
group = data.groupby('date') # group the data by date -> we get daily values
Daily_Price = group['Weighted_Price'].mean() # get the mean value of prices in one group (in one day)

Daily_Price.head(10)

date
2011-12-31    4.472579
2012-01-01    4.680778
2012-01-02    5.000000
2012-01-03    5.145917
2012-01-04    5.195443
2012-01-05    6.124426
2012-01-06    6.540973
2012-01-07    6.192222
2012-01-08    6.842292
2012-01-09    6.697618
Name: Weighted_Price, dtype: float64

In [55]:
# define the time-intervals of training and testing data
from datetime import date

d0 = date(2016, 1, 1)
d1 = date(2017, 12, 25)
delta = d1 - d0
days_look = delta.days + 1
print(days_look)

d0 = date(2017, 10, 31)
d1 = date(2017, 12, 30)
delta = d1 - d0
days_from_train = delta.days + 1
print(days_from_train)

d0 = date(2017, 12, 25)
d1 = date(2017, 12, 30)
delta = d1 - d0
days_from_end = delta.days + 1
print(days_from_end)

725
61
6


In [56]:
# Split the dataset into Training and testing data with specific time intervals
df_train= Daily_Price[len(Daily_Price)-days_look-days_from_end:len(Daily_Price)-days_from_train]
df_test= Daily_Price[len(Daily_Price)-days_from_train:]

print(len(df_train), len(df_test))

670 61


In [57]:
# Concatenate train and test data to make analysis and transformations simultaneously.

working_data = [df_train, df_test]
working_data = pd.concat(working_data)

working_data = working_data.reset_index()
working_data['date'] = pd.to_datetime(working_data['date'])
working_data = working_data.set_index('date')

Perform a seasonal decomposition of the data to estimate its trend and seasonality:

In [60]:
s = sm.tsa.seasonal_decompose(working_data.Weighted_Price.values, freq=40)

trace1 = go.Scatter(x = np.arange(0, len(s.trend), 1),y = s.trend,mode = 'lines',name = 'Trend',
    line = dict(color = ('rgb(244, 146, 65)'), width = 4))
trace2 = go.Scatter(x = np.arange(0, len(s.seasonal), 1),y = s.seasonal,mode = 'lines',name = 'Sezónní složka',
    line = dict(color = ('rgb(0, 200,0)'), width = 2))

trace3 = go.Scatter(x = np.arange(0, len(s.resid), 1),y = s.resid,mode = 'lines',name = 'Reziduální složka',
    line = dict(color = ('rgb(255, 0, 150)'), width = 2))

trace4 = go.Scatter(x = np.arange(0, len(s.observed), 1),y = s.observed,mode = 'lines',name = 'Originál',
    line = dict(color = ('rgb(20, 100, 244)'), width = 2))

data = [trace1, trace2, trace3, trace4]
# data = [trace1, trace4]
layout = dict(title = 'Sezónní dekompozice ceny bitcoinu v letech 2016 a 2017', xaxis = dict(title = 'Čas (dny)'), yaxis = dict(title = 'Cena ($)'))
fig = dict(data=data, layout=layout)
py.iplot(fig, filename='seasonal_decomposition')