In [1]:
%run ../../utils/commonImports.py
%matplotlib inline

# Properties

In [2]:
dataRoot = 'D:\\Dropbox\\My work\\krypl-project\\dataLabeled'
featuresRoot = 'D:\\Dropbox\\My work\\krypl-project\\dataFeatures'

fileName = 'return-01-loss-max-01-within-day.tsv'
featuresFile = f'{featuresRoot}\\{fileName}'

In [3]:
file = f'{dataRoot}\\{fileName}'
data = pd.read_csv(file, sep='\t')
featured = data.copy()

# Return

In [4]:
def lag(arr, l):
    laged = arr[:arr.shape[0]-l]
    laged = np.array([0]*l + laged.tolist())
    return laged

In [5]:
def _return(arr):
    arr0 = arr[:arr.shape[0]-1]
    arr1 = arr[1:]
    r = (arr1 / arr0) - 1
    r = np.insert(r, 0, 0., axis=0)
    return r

In [6]:
featured['return'] = _return(featured['close'].values)
featured['return_1'] = lag(featured['return'].values, 1)
featured['return_2'] = lag(featured['return'].values, 2)

# Z-Score

In [7]:
from scipy import stats
def zscore(x, window):
    r = x.rolling(window=window)
    m = r.mean().shift(1)
    s = r.std(ddof=0).shift(1)
    z = (x-m)/s
    return z

In [8]:
featured['zscore_36_return'] = zscore(featured['return'], 36)
featured['zscore_288_return'] = zscore(featured['return'], 288)

In [9]:
featured['zscore_36_volume'] = zscore(featured['volume'], 36)
featured['zscore_288_volume'] = zscore(featured['volume'], 288)
featured['zscore_2016_volume'] = zscore(featured['volume'], 2016)

# Sliding Mean

In [10]:
def sliding_mean(x, window):
    return x.rolling(center=False,window=window).mean()

In [11]:
sliding_mean(featured['close'], 36)

0               nan
1               nan
2               nan
3               nan
4               nan
5               nan
6               nan
7               nan
8               nan
9               nan
10              nan
11              nan
12              nan
13              nan
14              nan
15              nan
16              nan
17              nan
18              nan
19              nan
20              nan
21              nan
22              nan
23              nan
24              nan
25              nan
26              nan
27              nan
28              nan
29              nan
            ...    
381212   6,580.6714
381213   6,580.6349
381214   6,580.5917
381215   6,580.5871
381216   6,580.7261
381217   6,580.9746
381218   6,581.1413
381219   6,581.6468
381220   6,581.9646
381221   6,582.2760
381222   6,582.5024
381223   6,583.0001
381224   6,583.3261
381225   6,583.7007
381226   6,584.3477
381227   6,584.9905
381228   6,585.5269
381229   6,585.9986
381230   6,586.4973


# Change In Price

$ pma = zscore(\frac{p}{avg(p, w_m)} - 1, w_z) $

In [12]:
def change_in_price(x, mean_window, zscore_window):
    mean_x = sliding_mean(x, mean_window)
    change_x = (x / mean_x) - 1
    zscore_change = zscore(change_x, zscore_window)
    return zscore_change

In [13]:
featured['pma36'] = change_in_price(featured['close'], 36, 288)
featured['pma288'] = change_in_price(featured['close'], 288, 288)
featured['pma2016'] = change_in_price(featured['close'], 2016, 288)

In [14]:
featured['vma36'] = change_in_price(featured['volume'], 36, 288)
featured['vma288'] = change_in_price(featured['volume'], 288, 288)
featured['vma2016'] = change_in_price(featured['volume'], 2016, 288)

# Sliding means change

$ ma = zscore(\frac{avg(p, w_1)}{avg(p, w_2)}-1, w_z) $

In [15]:
def sliding_means_change(x, w1, w2, zscore_window):
    mean1 = sliding_mean(x, w1)
    mean2 = sliding_mean(x, w2)
    change = (mean1 / mean2) - 1
    zscored = zscore(change, zscore_window)
    return zscored

In [16]:
featured['ma_12_108'] = sliding_means_change(featured['close'], 12, 108, 288)
featured['ma_36_288'] = sliding_means_change(featured['close'], 12, 288, 288)

# Price acceleration

$ pavg = \frac{p}{avg(p, w_m)} $

$ ac = zscore(\frac{pavg}{avg(pavg, w_m)}, w_z) $

In [17]:
def price_acceleration(x, w_mean, zscore_window):
    pavg = sliding_mean(x, w_mean)
    pavg_mean = sliding_mean(pavg, w_mean)
    acceleration = pavg / pavg_mean
    zscored = zscore(acceleration, zscore_window)
    return zscored

In [18]:
featured['ac36'] = price_acceleration(featured['close'], 36, 288)
featured['ac288'] = price_acceleration(featured['close'], 288, 288)

# Volatility

In [19]:
def std(x, window):
    return x.rolling(center=False,window=window).std()


def volatility(x, w_std, w_zscore):
    vol = std(x, w_std)
    return zscore(vol, w_zscore)

In [20]:
featured['vol36'] = volatility(featured['return'], 36, 288)
featured['vol288'] = volatility(featured['return'], 288, 288)
featured['vol2016'] = volatility(featured['return'], 2016, 288)

# Change in Volatility

$ sd = std(r, w_s)

$ dv =  zscore(\frac{std}{avg(std, w_a)}, w_z) $

In [21]:
def volatility_change(x, w_std, w_a,  w_zscore):
    sd = std(x, w_std)
    sd_mean = sliding_mean(sd, w_a)
    change = sd / sd_mean
    return zscore(change, w_zscore)

In [22]:
featured['dv_36_288'] = volatility_change(featured['return'], 36, 288, 288)
featured['dv_288_2016'] = volatility_change(featured['return'], 288, 2016, 288)

In [23]:
featured.head()

Unnamed: 0,timestamp,period,open,high,low,close,volume,quoteVolume,weightedAverage,date,...,vma2016,ma_12_108,ma_36_288,ac36,ac288,vol36,vol288,vol2016,dv_36_288,dv_288_2016
0,1424373000,5min,225.0,225.0,225.0,225.0,1.0,0.0044,225.0,2015-02-19 19:10:00,...,,,,,,,,,,
1,1424373300,5min,225.0,225.0,225.0,225.0,0.0,0.0,225.0,2015-02-19 19:15:00,...,,,,,,,,,,
2,1424373600,5min,225.0,225.0,225.0,225.0,0.0,0.0,225.0,2015-02-19 19:20:00,...,,,,,,,,,,
3,1424373900,5min,225.0,225.0,225.0,225.0,0.0,0.0,225.0,2015-02-19 19:25:00,...,,,,,,,,,,
4,1424374200,5min,225.0,225.0,225.0,225.0,0.0,0.0,225.0,2015-02-19 19:30:00,...,,,,,,,,,,


# Save Features

In [24]:
featured.columns

Index(['timestamp', 'period', 'open', 'high', 'low', 'close', 'volume',
       'quoteVolume', 'weightedAverage', 'date', 'maxReturn', 'tillMax',
       'minReturn', 'label', 'return', 'return_1', 'return_2',
       'zscore_36_return', 'zscore_288_return', 'zscore_36_volume',
       'zscore_288_volume', 'zscore_2016_volume', 'pma36', 'pma288', 'pma2016',
       'vma36', 'vma288', 'vma2016', 'ma_12_108', 'ma_36_288', 'ac36', 'ac288',
       'vol36', 'vol288', 'vol2016', 'dv_36_288', 'dv_288_2016'],
      dtype='object')

In [25]:
write_tsv(fillna(featured, 0), featuresFile)

In [26]:
featured.shape

(381242, 37)

In [27]:
dropna(featured).shape

(344339, 37)