## ETF Compete:  
## Feature engineering: KD/RSV 
### Date: 18424_L

# 使用變數

KD 公式
$$$$
# $RSV = \frac{(price_{now} - price_{min})}{(price_{max} - price_{min})}$
# $K_{new} = w * K_{old} + (1-w) * RSV$
# $D_{new} = w*D_{old} + (1-w) * K_{new}$


### variable: 
### df: dataframe 
### n: n days window (normally, 9, but I used to use 5) 
### w: 加權比重 (一般公式 選2/3, ..我傾向用5/4)
### price_now = 現在 （今日） 價格 
### price_max= n days 最高價
### price_min = n days 最低價

###  return df 
### columns =  ['code', 'date', 'name', 'open', 'high', 'low', 'close', 'volume','K_old', 'K_new', 'D_old', 'D_new', 'K_signal', 'n_max', 'n_min', 'nd_max-min', 'close-n_min', 'RSV', 'K-RSV', 'K-D']



In [1]:
import numpy as np
import pandas as pd
from collections import OrderedDict
from time import time

In [2]:
# read in csv, change column names
etf = pd.read_csv('/Users/LarryGuo/Desktop/nano_degree/Capstone_Talk/ETF_compete/twetf_utf8/tetfp.csv')
col_dtypes = OrderedDict(code=str, date=str, name=str, open=float, high=float, low=float, close=float, volume=int)

etf.columns=col_dtypes.keys()

etf.head()

Unnamed: 0,code,date,name,open,high,low,close,volume
0,50,20130102,元大台灣50,54.0,54.65,53.9,54.4,16487
1,50,20130103,元大台灣50,54.9,55.05,54.65,54.85,29020
2,50,20130104,元大台灣50,54.85,54.85,54.4,54.5,9837
3,50,20130107,元大台灣50,54.55,54.55,53.9,54.25,8910
4,50,20130108,元大台灣50,54.0,54.2,53.65,53.9,12507


In [3]:

etf= etf[:1286] # select 台灣50 
etf.tail()

Unnamed: 0,code,date,name,open,high,low,close,volume
1281,50,20180327,元大台灣50,83.0,83.4,82.95,83.4,3277
1282,50,20180328,元大台灣50,82.9,82.9,82.2,82.25,4161
1283,50,20180329,元大台灣50,82.25,82.35,81.8,82.1,4099
1284,50,20180330,元大台灣50,82.65,83.05,82.65,82.85,4994
1285,50,20180331,元大台灣50,82.85,83.05,82.75,82.95,878


In [4]:
etf.columns

Index(['code', 'date', 'name', 'open', 'high', 'low', 'close', 'volume'], dtype='object')

In [5]:
# calculate KD_RSV

def KD_RSV(df,n,w, high = 'high', low='low'):
    start_time=time()
    
    # create new column and assign initial value 
    # K_signal = 1 if K_new > K_old
    
    df['K_old'] = 0.5
    df['K_new'] = 0
    df['D_old'] = 0.5
    df['D_new'] = 0
    df['K_signal'] =1
    
    df['n_max'] = df[high].rolling(window=5).max()
    df['n_min'] = df[low].rolling(window=5).min()
    

    df['nd_max-min'] = df['n_max'] - df['n_min']
    df['close-n_min'] = df["close"] - df['n_min']
    df['RSV'] = df['close-n_min']/(df['nd_max-min']*1.0)
    
    # loop from n-1, cus rolling.max() results to nan at first n-1 value

    
    for j in range (n-1,len(df)-1):
    
        #print ('j=', j)
        df['K_new'].iloc[j] = w*df['K_old'].iloc[j] + (1-w)*df['RSV'].iloc[j]  # calculate K_new
        df['K_old'].iloc[j+1] = df['K_new'].iloc[j]  # replace next day K_old with toda's  K_new value
        df['D_new'].iloc[j] = w*df['D_old'].iloc[j] + (1-w)*df['K_new'].iloc[j]  # calculate D_new
        df['D_old'].iloc[j+1] = df['D_new'].iloc[j] # replace next day's D_old with today's D_new
        if df['K_new'].iloc[j] < df['K_old'].iloc[j]:
            df['K_signal'].iloc[j] = -1
        #print ('j=', j)
        #print (df[['K_new','K_old','RSV','K_signal']].iloc[j:j+2])
        #print ('\n\n')
    
    df['K-RSV']  = df['K_new'] - df['RSV']
    df['K-D'] = df['K_new'] - df['D_new']
    
    #df_return = df[['RSV','K_new','D_new','K-RSV','K-D']]
    end_time = time()
    
    print ('needed_time = ', end_time - start_time)
    
    return df
 
   

In [6]:
df= KD_RSV(etf,5,0.8)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


needed_time =  154.22963500022888


In [7]:
df.columns

Index(['code', 'date', 'name', 'open', 'high', 'low', 'close', 'volume',
       'K_old', 'K_new', 'D_old', 'D_new', 'K_signal', 'n_max', 'n_min',
       'nd_max-min', 'close-n_min', 'RSV', 'K-RSV', 'K-D'],
      dtype='object')