## ETF Compete:  
## Feature engineering: 
## VR,  (Volume Ratio, 容量指標) 
## OBV (On Balance Volume 能量潮)

http://blog.cnyes.com/my/victor888victor/article163587

## n 天內漲跌關於 量 的相對指標
### Date: 18426_L

# 使用變數


$$$$
# vol_up = (n days 上漲的量總和) + 1/2 * (ndays 的量總和)
# vol_dn = (n days 下跌的量總和) + 1/2 * (ndays 的量總和)
# VR =   vol_up/vol_dn*1.0

# note: VR 可能大於100% ，一般介於0.7 ~ 3.5 之間
# 計算完 VR , 使用 min_max 做normalize

$$$$

# OBV_new = OBV_old $\pm$ Today's Volume (+, if today's price up, will use .cumsu() ) 

# Using min_max to do normalization

# OBV_MA12 =  OBV moving average 12
# OBV_diff =  OBV - OBV_MA12 (計算 OBV vs OBV 移動平均差異)

### 參數: 
### df: dataframe 
### n: n days window (normally, 24, using 12 here  )


###  return df 
### feature columns = ['VR', 'OBV', 'OBV_MA12', 'OBV_diff_MA12']


In [1]:
import numpy as np
import pandas as pd
from collections import OrderedDict
from time import time
from sklearn.preprocessing import MinMaxScaler

In [2]:
# read in csv, change column names
etf = pd.read_csv('/Users/LarryGuo/Desktop/nano_degree/Capstone_Talk/ETF_compete/twetf_utf8/tetfp.csv')
col_dtypes = OrderedDict(code=str, date=str, name=str, open=float, high=float, low=float, close=float, volume=int)

etf.columns=col_dtypes.keys()

etf.head()

Unnamed: 0,code,date,name,open,high,low,close,volume
0,50,20130102,元大台灣50,54.0,54.65,53.9,54.4,16487
1,50,20130103,元大台灣50,54.9,55.05,54.65,54.85,29020
2,50,20130104,元大台灣50,54.85,54.85,54.4,54.5,9837
3,50,20130107,元大台灣50,54.55,54.55,53.9,54.25,8910
4,50,20130108,元大台灣50,54.0,54.2,53.65,53.9,12507


In [3]:

etf= etf[:1286] # select 台灣50 
etf.tail()

Unnamed: 0,code,date,name,open,high,low,close,volume
1281,50,20180327,元大台灣50,83.0,83.4,82.95,83.4,3277
1282,50,20180328,元大台灣50,82.9,82.9,82.2,82.25,4161
1283,50,20180329,元大台灣50,82.25,82.35,81.8,82.1,4099
1284,50,20180330,元大台灣50,82.65,83.05,82.65,82.85,4994
1285,50,20180331,元大台灣50,82.85,83.05,82.75,82.95,878


In [4]:
etf.columns

Index(['code', 'date', 'name', 'open', 'high', 'low', 'close', 'volume'], dtype='object')

In [5]:




def VR_OBV(price,volume,n=12):
    
    """Calculate the Volume (vs price diff) Ratio
    TODO:
        1. check input object types (should be pd.Series)
    

    Params:
        price: type: pd.Series, close price
        volume: daily transaction volume
 
        n: day period for calculation, type: int

    Return: feature: ['VR', 'OBV', 'OBV_MA12', 'OBV_diff_MA12']
    """

        
    diff = price.diff().fillna(0)
    diff.name = 'diff'
    
    diff_positive = diff >=0
    upward = (diff_positive * volume ) + 0.5 * volume
    total_up_vol =  upward.rolling(window=n).sum()
    
    
    diff_negative = diff <0 
    downward = (diff_negative * volume ) + 0.5 * volume
    total_dn_vol =  downward.rolling(window=n).sum()
    
    VR = (total_up_vol/total_dn_vol).replace([np.inf, -np.inf], np.nan).fillna(1.0)
    

    min_max_VR = MinMaxScaler()
    VR = min_max_VR.fit_transform(VR.values.reshape(-1,1))   # normalize
    VR  = pd.Series(np.squeeze(VR), name='VR')

    
    # calculate OBV
    
    diff_signal = np.where (diff >= 0,1,-1)
    OBV = diff_signal * volume 
    OBV = OBV.cumsum()
        
    min_max_OBV = MinMaxScaler()
    OBV = min_max_OBV.fit_transform(OBV.values.reshape(-1,1))
    OBV = pd.Series(np.squeeze(OBV), name='OBV')
    
 
    
    OBV_MA12 = OBV.rolling (window=n).mean().fillna(OBV[0])
    OBV_diff_MA12 = OBV - OBV_MA12
    
    OBV_MA12.name = 'OBV_MA12'
    OBV_diff_MA12.name = 'OBV_diff_MA12'
    

    
    return pd.concat([VR,OBV,OBV_MA12,OBV_diff_MA12], axis=1)



In [6]:
df= VR_OBV(etf['close'], etf['volume'])



In [7]:
df.columns

Index(['VR', 'OBV', 'OBV_MA12', 'OBV_diff_MA12'], dtype='object')

In [8]:
df

Unnamed: 0,VR,OBV,OBV_MA12,OBV_diff_MA12
0,0.209005,0.015977,0.015977,0.000000
1,0.209005,0.033894,0.015977,0.017917
2,0.209005,0.027821,0.015977,0.011844
3,0.209005,0.022319,0.015977,0.006343
4,0.209005,0.014597,0.015977,-0.001379
5,0.209005,0.019246,0.015977,0.003269
6,0.209005,0.027861,0.015977,0.011884
7,0.209005,0.020552,0.015977,0.004576
8,0.209005,0.025048,0.015977,0.009072
9,0.209005,0.020968,0.015977,0.004991
