In [5]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import statsmodels.api as sm
import random
import datetime

In [32]:
df_BTC = pd.read_pickle('BTC-USD-60.pkl')
df_ETH = pd.read_pickle('ETH-USD-60.pkl')
df_LTC = pd.read_pickle('LTC-USD-60.pkl')

In [33]:
df_BTC['time'] = pd.to_datetime(df_BTC['time'],unit='s')
df_ETH['time'] = pd.to_datetime(df_ETH['time'],unit='s')
df_LTC['time'] = pd.to_datetime(df_LTC['time'],unit='s')

# Construct BTC dataframe with 6 attributes

In [4]:
df_BTC.head()

Unnamed: 0,time,low,high,open,close,volume
0,2016-01-01 04:59:00,436.12,436.13,436.12,436.12,6.139053
1,2016-01-01 04:58:00,436.02,436.36,436.02,436.13,0.312749
2,2016-01-01 04:57:00,436.03,437.12,437.02,436.03,23.06055
3,2016-01-01 04:56:00,437.02,437.07,437.02,437.07,1.017
4,2016-01-01 04:55:00,437.02,437.11,437.11,437.02,1.3087


In [34]:
df_BTC.time = df_BTC.time - datetime.timedelta(hours=10)
df_BTC.head()

Unnamed: 0,time,low,high,open,close,volume
0,2015-12-31 18:59:00,436.12,436.13,436.12,436.12,6.139053
1,2015-12-31 18:58:00,436.02,436.36,436.02,436.13,0.312749
2,2015-12-31 18:57:00,436.03,437.12,437.02,436.03,23.06055
3,2015-12-31 18:56:00,437.02,437.07,437.02,437.07,1.017
4,2015-12-31 18:55:00,437.02,437.11,437.11,437.02,1.3087


In [35]:
df_BTC.time = pd.DatetimeIndex(df_BTC.time) 
df_BTC=df_BTC.set_index('time')
df_BTC=df_BTC.sort_index()

In [36]:
daily_grouped = df_BTC.groupby(pd.Grouper(freq='D')) # after this grouping, becomes series
daily_volume = daily_grouped.volume.sum()
daily_mean = daily_grouped.open.mean()
daily_high = daily_grouped.high.max()
daily_low = daily_grouped.low.min()
daily_weighted_mean = daily_grouped.apply(lambda dfx: (dfx["open"] * dfx["volume"]).sum() / dfx["volume"].sum())


In [37]:
daily_volume = daily_volume.reset_index().set_index('time') # from time series back to df
daily_mean = daily_mean.reset_index().set_index('time')
daily_high = daily_high.reset_index().set_index('time')
daily_low = daily_low.reset_index().set_index('time')
daily_weighted_mean = daily_weighted_mean.rename('BTC_weighted_mean').reset_index().set_index('time')

In [38]:
df_daily_BTC = pd.concat([daily_high, daily_low, daily_volume, daily_mean, daily_weighted_mean], axis=1)
df_daily_BTC['BTC_percent_change'] = daily_mean.pct_change()

In [39]:
df_daily_BTC = df_daily_BTC.rename(columns={'high':'BTC_high', 'low':'BTC_low', 'volume':'BTC_volume', 'open':'BTC_mean'})

# Construct ETH dataframe

In [40]:
df_ETH.time = df_ETH.time - datetime.timedelta(hours=10)
df_ETH.time = pd.DatetimeIndex(df_ETH.time) 
df_ETH=df_ETH.set_index('time')
df_ETH=df_ETH.sort_index()

daily_grouped = df_ETH.groupby(pd.Grouper(freq='D')) # after this grouping, becomes series
daily_volume = daily_grouped.volume.sum()
daily_mean = daily_grouped.open.mean()
daily_high = daily_grouped.high.max()
daily_low = daily_grouped.low.min()
daily_weighted_mean = daily_grouped.apply(lambda dfx: (dfx["open"] * dfx["volume"]).sum() / dfx["volume"].sum())

daily_volume = daily_volume.reset_index().set_index('time') # from time series back to df
daily_mean = daily_mean.reset_index().set_index('time')
daily_high = daily_high.reset_index().set_index('time')
daily_low = daily_low.reset_index().set_index('time')
daily_weighted_mean = daily_weighted_mean.rename('ETH_weighted_mean').reset_index().set_index('time')

daily_volume = daily_volume.interpolate()
daily_mean = daily_mean.interpolate()
daily_high = daily_high.interpolate()
daily_low = daily_low.interpolate()
daily_weighted_mean = daily_weighted_mean.interpolate()

df_daily_ETH = pd.concat([daily_high, daily_low, daily_volume, daily_mean, daily_weighted_mean], axis=1)
df_daily_ETH['ETH_percent_change'] = daily_mean.pct_change()

df_daily_ETH = df_daily_ETH.rename(columns={'high':'ETH_high', 'low':'ETH_low', 'volume':'ETH_volume', 'open':'ETH_mean'})

  # This is added back by InteractiveShellApp.init_path()


In [30]:
df_daily_ETH.isnull().sum()

ETH_high              0
ETH_low               0
ETH_volume            0
ETH_mean              0
ETH_weighted_mean     0
ETH_percent_change    1
dtype: int64

# Construct LTC dataframe

In [31]:
df_LTC.time = df_LTC.time - datetime.timedelta(hours=10)
df_LTC.time = pd.DatetimeIndex(df_LTC.time) 
df_LTC=df_LTC.set_index('time')
df_LTC=df_LTC.sort_index()

daily_grouped = df_LTC.groupby(pd.Grouper(freq='D')) # after this grouping, becomes series
daily_volume = daily_grouped.volume.sum()
daily_mean = daily_grouped.open.mean()
daily_high = daily_grouped.high.max()
daily_low = daily_grouped.low.min()

In [14]:
daily_weighted_mean = daily_grouped.apply(lambda dfx: (dfx["open"] * dfx["volume"]).sum() / dfx["volume"].sum())

daily_volume = daily_volume.reset_index().set_index('time') # from time series back to df
daily_mean = daily_mean.reset_index().set_index('time')
daily_high = daily_high.reset_index().set_index('time')
daily_low = daily_low.reset_index().set_index('time')
daily_weighted_mean = daily_weighted_mean.rename('LTC_weighted_mean').reset_index().set_index('time')

daily_volume = daily_volume.interpolate()
daily_mean = daily_mean.interpolate()
daily_high = daily_high.interpolate()
daily_low = daily_low.interpolate()
daily_weighted_mean = daily_weighted_mean.interpolate()

df_daily_LTC = pd.concat([daily_high, daily_low, daily_volume, daily_mean, daily_weighted_mean], axis=1)
df_daily_LTC['LTC_percent_change'] = daily_mean.pct_change()

df_daily_LTC = df_daily_LTC.rename(columns={'high':'LTC_high', 'low':'LTC_low', 'volume':'LTC_volume', 'open':'LTC_mean'})

  """Entry point for launching an IPython kernel.


In [15]:
df_daily_LTC.isnull().sum()

LTC_high              0
LTC_low               0
LTC_volume            0
LTC_mean              0
LTC_weighted_mean     0
LTC_percent_change    1
dtype: int64

# Concat and write to file

In [16]:
df_daily = df_daily_BTC.join(df_daily_ETH).join(df_daily_LTC)

In [17]:
df_daily.index

DatetimeIndex(['2015-12-31', '2016-01-01', '2016-01-02', '2016-01-03',
               '2016-01-04', '2016-01-05', '2016-01-06', '2016-01-07',
               '2016-01-08', '2016-01-09',
               ...
               '2018-03-25', '2018-03-26', '2018-03-27', '2018-03-28',
               '2018-03-29', '2018-03-30', '2018-03-31', '2018-04-01',
               '2018-04-02', '2018-04-03'],
              dtype='datetime64[ns]', name='time', length=825, freq='D')

In [18]:
df_daily.isnull().sum()

BTC_high                0
BTC_low                 0
BTC_volume              0
BTC_mean                0
BTC_weighted_mean       0
BTC_percent_change      1
ETH_high              140
ETH_low               140
ETH_volume            138
ETH_mean              140
ETH_weighted_mean     140
ETH_percent_change    141
LTC_high              229
LTC_low               229
LTC_volume            229
LTC_mean              229
LTC_weighted_mean     229
LTC_percent_change    230
dtype: int64

In [56]:
df_daily.to_pickle('df_daily_interpolated.pickle')