In [1]:
%run ../utils/commonImports.py
%run ../utils/tradingImports.py
%matplotlib inline

# Properties

In [2]:
dataRoot = 'D:\\Dropbox\\My work\\krypl-project\\dataLabeled\\poloniex\\5min'
featuresRoot = 'D:\\Dropbox\\My work\\krypl-project\\dataFeatures\\poloniex\\5min'

In [3]:
fileName = 'USDT_BTC_5min_2016-01-01_2017-12-31.tsv'
file = '{dataRoot}\\{fileName}'.format(dataRoot=dataRoot, fileName=fileName)
featuresFile = '{dataRoot}\\{fileName}'.format(dataRoot=featuresRoot, fileName=fileName)
data = pd.read_csv(file, sep='\t')

In [4]:
featured = data.copy()

# Return

In [8]:
def lag(arr, l):
    laged = arr[:arr.shape[0]-l]
    laged = np.array([0]*l + laged.tolist())
    return laged

In [9]:
def _return(arr):
    arr0 = arr[:arr.shape[0]-1]
    arr1 = arr[1:]
    r = (arr1 / arr0) - 1
    r = np.insert(r, 0, 0., axis=0)
    return r

In [10]:
featured['return'] = _return(featured['close'].values)
featured['return_1'] = lag(featured['return'].values, 1)
featured['return_2'] = lag(featured['return'].values, 2)

# Z-Score

In [11]:
from scipy import stats
def zscore(x, window):
    r = x.rolling(window=window)
    m = r.mean().shift(1)
    s = r.std(ddof=0).shift(1)
    z = (x-m)/s
    return z

In [12]:
featured['zscore_36_return'] = zscore(featured['return'], 36)
featured['zscore_288_return'] = zscore(featured['return'], 288)

In [39]:
featured['zscore_36_volume'] = zscore(featured['volume'], 36)
featured['zscore_288_volume'] = zscore(featured['volume'], 288)
featured['zscore_2016_volume'] = zscore(featured['volume'], 2016)

# Sliding Mean

In [23]:
def sliding_mean(x, window):
    return x.rolling(center=False,window=window).mean()

In [24]:
sliding_mean(featured['close'], 36)

0                nan
1                nan
2                nan
3                nan
4                nan
5                nan
6                nan
7                nan
8                nan
9                nan
10               nan
11               nan
12               nan
13               nan
14               nan
15               nan
16               nan
17               nan
18               nan
19               nan
20               nan
21               nan
22               nan
23               nan
24               nan
25               nan
26               nan
27               nan
28               nan
29               nan
             ...    
210211   12,615.4672
210212   12,624.7966
210213   12,631.5188
210214   12,638.4447
210215   12,642.7276
210216   12,648.6442
210217   12,658.0222
210218   12,662.8855
210219   12,666.2646
210220   12,673.2135
210221   12,683.6028
210222   12,697.2139
210223   12,708.3330
210224   12,719.0861
210225   12,725.7357
210226   12,732.9932
210227   12,7

# Change In Price

$ pma = zscore(\frac{p}{avg(p, w_m)} - 1, w_z) $

In [25]:
def change_in_price(x, mean_window, zscore_window):
    mean_x = sliding_mean(x, mean_window)
    change_x = (x / mean_x) - 1
    zscore_change = zscore(change_x, zscore_window)
    return zscore_change

In [26]:
featured['pma36'] = change_in_price(featured['close'], 36, 288)
featured['pma288'] = change_in_price(featured['close'], 288, 288)
featured['pma2016'] = change_in_price(featured['close'], 2016, 288)

In [26]:
featured['vma36'] = change_in_price(featured['volume'], 36, 288)
featured['vma288'] = change_in_price(featured['volume'], 288, 288)
featured['vma2016'] = change_in_price(featured['volume'], 2016, 288)

# Sliding means change

$ ma = zscore(\frac{avg(p, w_1)}{avg(p, w_2)}-1, w_z) $

In [28]:
def sliding_means_change(x, w1, w2, zscore_window):
    mean1 = sliding_mean(x, w1)
    mean2 = sliding_mean(x, w2)
    change = (mean1 / mean2) - 1
    zscored = zscore(change, zscore_window)
    return zscored

In [30]:
featured['ma_12_108'] = sliding_means_change(featured['close'], 12, 108, 288)
featured['ma_36_288'] = sliding_means_change(featured['close'], 12, 108, 288)

# Price acceleration

$ pavg = \frac{p}{avg(p, w_m)} $

$ ac = zscore(\frac{pavg}{avg(pavg, w_m)}, w_z) $

In [31]:
def price_acceleration(x, w_mean, zscore_window):
    pavg = sliding_mean(x, w_mean)
    pavg_mean = sliding_mean(pavg, w_mean)
    acceleration = pavg / pavg_mean
    zscored = zscore(acceleration, zscore_window)
    return zscored

In [33]:
featured['ac36'] = price_acceleration(featured['close'], 36, 288)
featured['ac288'] = price_acceleration(featured['close'], 288, 288)

# Volatility

In [43]:
def std(x, window):
    return x.rolling(center=False,window=window).std()


def volatility(x, w_std, w_zscore):
    vol = std(x, w_std)
    return zscore(vol, w_zscore)

In [44]:
featured['vol36'] = volatility(featured['return'], 36, 288)
featured['vol288'] = volatility(featured['return'], 288, 288)
featured['vol2016'] = volatility(featured['return'], 2016, 288)

# Change in Volatility

$ sd = std(r, w_s)

$ dv =  zscore(\frac{std}{avg(std, w_a)}, w_z) $

In [46]:
def volatility_change(x, w_std, w_a,  w_zscore):
    sd = std(x, w_std)
    sd_mean = sliding_mean(sd, w_a)
    change = sd / sd_mean
    return zscore(change, w_zscore)

In [47]:
featured['dv_36_288'] = volatility_change(featured['return'], 36, 288, 288)
featured['dv_36_288'] = volatility_change(featured['return'], 288, 2016, 288)

In [48]:
featured.head()

Unnamed: 0,close,timestamp,high,low,open,quoteVolume,volume,weightedAverage,date,entryReturn_001,...,ma_36_288,ac36,ac288,zscore_36_volume,zscore_288_volume,zscore_2016_volume,vol36,vol288,vol2016,dv_36_288
0,430.5732,1451602800,430.5732,430.5732,430.5732,0.0,0.0,430.5732,2016-01-01 00:00:00,0,...,,,,,,,,,,
1,430.5732,1451603100,430.5732,430.5732,430.5732,0.0,0.0,430.5732,2016-01-01 00:05:00,0,...,,,,,,,,,,
2,430.5732,1451603400,430.5732,430.5732,430.5732,0.0,0.0,430.5732,2016-01-01 00:10:00,0,...,,,,,,,,,,
3,430.5732,1451603700,430.5732,430.5732,430.5732,0.0,0.0,430.5732,2016-01-01 00:15:00,0,...,,,,,,,,,,
4,430.5732,1451604000,430.5732,430.5732,430.5732,0.0,0.0,430.5732,2016-01-01 00:20:00,0,...,,,,,,,,,,


# Save Features

In [50]:
featured.columns

Index(['close', 'timestamp', 'high', 'low', 'open', 'quoteVolume', 'volume',
       'weightedAverage', 'date', 'entryReturn_001', 'entryReturn_003',
       'entryReturn_006', 'entryReturn_01', 'entryReturn_03', 'entryReturn_05',
       'entryReturn_07', 'entryReturn_1', 'entryReturn_12', 'entryReturn_15',
       'entryReturn_18', 'entryReturn_2', 'entryReturn_22', 'entryReturn_25',
       'entryReturn_28', 'entryReturn_3', 'labelSlope_close_12', 'returnOpen',
       'returnClose', 'returnHigh', 'returnLow', 'labelSlope_returnClose_12',
       'return', 'return_1', 'return_2', 'zscore_36_return',
       'zscore_288_return', 'pma36', 'ma_12_108', 'ma_36_288', 'ac36', 'ac288',
       'zscore_36_volume', 'zscore_288_volume', 'zscore_2016_volume', 'vol36',
       'vol288', 'vol2016', 'dv_36_288'],
      dtype='object')

In [54]:
write_cols = ['date', 'close', 'return', 'return_1', 'return_2', 'zscore_36_return',
       'zscore_288_return', 'pma36', 'ma_12_108', 'ma_36_288', 'ac36', 'ac288',
       'zscore_36_volume', 'zscore_288_volume', 'zscore_2016_volume', 'vol36',
       'vol288', 'vol2016', 'dv_36_288']

In [55]:
featured[write_cols].to_csv(featuresFile, index=False, sep='\t')

# Explore Features

In [8]:
featured.columns

Index(['close', 'timestamp', 'high', 'low', 'open', 'quoteVolume', 'volume',
       'weightedAverage', 'date', 'entryReturn_001', 'entryReturn_003',
       'entryReturn_006', 'entryReturn_01', 'entryReturn_03', 'entryReturn_05',
       'entryReturn_07', 'entryReturn_1', 'entryReturn_12', 'entryReturn_15',
       'entryReturn_18', 'entryReturn_2', 'entryReturn_22', 'entryReturn_25',
       'entryReturn_28', 'entryReturn_3', 'labelSlope_close_12', 'returnOpen',
       'returnClose', 'returnHigh', 'returnLow', 'labelSlope_returnClose_12',
       'return', 'return_1', 'return_2'],
      dtype='object')