In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import os
import time
import matplotlib.pyplot as plt

In [2]:
def macd_func(stock_price,slow=26,fast=12):
    '''
    macd(moving average convergence divergence)
    
    dif = emafast - emaslow
    dea = ema(dif,9)
    histogram = dif - dea
    
    when histogram>0, current diff between fast line and slow line is greater than dea (past 9-day diff average), fast and slow lines are diverging, signifying an upward trend in stock price
    or vice versa
    
    '''
    emaSlow=stock_price.ewm(span=slow,adjust=True).mean()
    emaFast=stock_price.ewm(span=fast,adjust=True).mean()
    macd=emaFast-emaSlow
    signal=macd.ewm(span=9,adjust=True).mean()
    
    return emaSlow,emaFast,macd,signal
            

def mtm_func(stock_price,time_frame=10):
    '''
    mtm(momentum)
    
    (current stock price/stock price n days ago -1)*100%
    
    '''
    mtm=stock_price/(stock_price.shift(time_frame))-1
    
    return mtm


def boll_func(stock_price,time_frame=10):
    '''
    boll(bollinger bands)
    
    mid line is n-day SMA
    upper line is SMA+2*std(stock prices in the past n days)
    lower line is SMA-2*std(stock prices in the past n days)
    
    once the stock price goes beyond the upper line (or lower line) and starts to go down (up), it is a sell (or buy) signal
    
    '''
    sma=stock_price.rolling(time_frame).mean()
    sigma=stock_price.rolling(time_frame).std()
    upper=sma+2*sigma
    lower=sma-2*sigma
    bolli=(stock_price-sma)/(2*sigma)
    
    return bolli,sma,upper,lower



In [None]:
data_path='stock_data/'
ticker_list=[i for i in os.walk(data_path)][0][2]
ret_list=[]
for each_ticker in ticker_list:

    df=pd.read_csv(data_path+each_ticker)
    df['Adj Close T-1']=df['Adj Close'].shift(1)
    df['bolli']=boll_func(df['Adj Close'])[0]
    df['mtm']=mtm_func(df['Adj Close'],5)
    df['action']=0
    df['cost']=0
    df['hold']=0
    df['profit']=0

    for i in range(1,len(df)):

    # when we are not holding the stock, plus both boll and mtm meet our conditions,then we buy,update our holding status and holding cost
        if df.loc[i-1,'bolli']<=-1 and df.loc[i-1,'mtm']>=0 and df.loc[i-1,'hold']==0:
            df.loc[i,'hold']=1
            df.loc[i,'action']=1
            df.loc[i,'cost']=df.loc[i,'Adj Close T-1']
            df.loc[i,'profit']=df.loc[i,'Adj Close']-df.loc[i,'Adj Close T-1']
    #         print(df.loc[i+1])
            continue

    # we are holding the stock,but the current price has dropped over 8%,then we sell to stop loss,update our holding status and holding cost
        if df.loc[i-1,'hold']==1 and ((df.loc[i-1,'Adj Close']-df.loc[i-1,'cost'])/df.loc[i-1,'cost']<=-0.08 
                                    or (df.loc[i-1,'Adj Close']-df.loc[i-1,'cost'])/df.loc[i-1,'cost']>=0.08):
            df.loc[i,'hold']=0
            df.loc[i,'action']=-1
            df.loc[i,'cost']=0
    #         print(df.loc[i+1])
            continue

    # if neither of the above occurs,just fill down the status and cost   
        if df.loc[i-1,'hold']==1:
            df.loc[i,'hold']=1
            df.loc[i,'cost']=df.loc[i-1,'cost']
            df.loc[i,'profit']= df.loc[i,'Adj Close']- df.loc[i,'Adj Close T-1']
        else:
            df.loc[i,'hold']=0
            df.loc[i,'cost']=0


    df['daily_ret']=[df.loc[ind,'profit']/df.loc[ind,'cost'] if df.loc[ind,'hold']==1 else 0
                 for ind in df.index]


    daily_ret_avg=df[df['hold']!=0]['daily_ret'].mean()

    ret_list.append(daily_ret_avg)


In [124]:
for a,b in zip(ticker_list,ret_list):
    print(a,b)

600000_浦发银行.csv nan
600004_白云机场.csv nan
600006_东风汽车.csv 0.0033163067206722113
600007_中国国贸.csv nan
600008_首创股份.csv nan
600009_上海机场.csv nan
600010_包钢股份.csv nan
600011_华能国际.csv -0.003073091546497565
600012_皖通高速.csv 0.0027777759870063474
600015_华夏银行.csv nan
600016_民生银行.csv nan
600017_日照港.csv nan
600018_上港集团.csv nan
600019_宝钢股份.csv nan
600020_中原高速.csv nan
600021_上海电力.csv nan
600022_济南钢铁.csv nan
600026_中海发展.csv nan
600027_华电国际.csv nan
600028_中国石化.csv nan
600029_南方航空.csv nan
600030_中信证券.csv nan
600031_三一重工.csv nan
600033_福建高速.csv nan
600035_楚天高速.csv nan
600036_招商银行.csv nan
600037_歌华有线.csv nan
600038_哈飞股份.csv nan
600039_四川路桥.csv nan
600048_保利地产.csv nan
600050_中国联通.csv nan
600051_宁波联合.csv nan
600052_浙江广厦.csv nan
600053_中江地产.csv nan
600054_黄山旅游.csv nan
600055_万东医疗.csv nan
600056_中国医药.csv 0.005333667163790745
600058_五矿发展.csv nan
600059_古越龙山.csv nan
600060_海信电器.csv nan
600061_中纺投资.csv nan
600062_双鹤药业.csv nan
600063_皖维高新.csv nan
600064_南京高科.csv nan
600066_宇通客车.csv nan
600067_冠城大通.csv -0.00282268510

In [132]:
ret_list_new=[x for x in ret_list if np.isnan(x)==False]
np.array(ret_list_new).mean()

0.00480612855655113

compare with alipay fund:
1. annualized 7-day avg daily return of 2.0050%,which means on avg alipay fund daily return will be 2.0050%/365=0.0000548
2. our avg investment daily ret 0.004806 is much higher

limitations: 
1. the survivor bias 
2. equal weights when computing our avg daily ret, should probably give more weights to closed dates

