# 土炮股票分析系統

這個範例只是將上課所講過的所有例子串在一起，提供一個簡單的概念，讓大家知道一個簡單的分析系統大概可以怎麼做。

例子裡面有很多不足的地方，請自行思考如何修改。

In [1]:
# 載入需要的模組
import ffn
import pandas as pd
import pandas_datareader.data as web

from datetime import datetime
import numpy as np

import talib

## 前面定義好的一些函式

In [2]:
# 取得公司資料
def get_companies(ex = "NASDAQ"):
    template = "http://www.nasdaq.com/screening/companies-by-industry.aspx?exchange={}&render=download"
    url = template.format(ex)
    return pd.read_csv(url)

In [3]:
# 計算波動率
def volatility(symbol, startdate):
    try:
        df = web.DataReader(symbol, 'yahoo', startdate)
        dailyRet = df['Close'].pct_change()
        return dailyRet.std()
    except Exception as e:
        print("Symbol: ", symbol, "==> Error: ", e.args)
        return 10000.0

In [4]:
# 計算 MaxDD
def DrawDownAnalysis(cumRet):
    dd_series = ffn.core.to_drawdown_series(cumRet)
    dd_details = ffn.core.drawdown_details(dd_series)
    return dd_details['drawdown'].min(), dd_details['days'].max()

## 抓取公司資料

In [5]:
data = get_companies()

In [6]:
data.head()

Unnamed: 0,Symbol,Name,LastSale,MarketCap,ADR TSO,IPOyear,Sector,Industry,Summary Quote,Unnamed: 9
0,PIH,"1347 Property Insurance Holdings, Inc.",7.35,43988030.0,,2014.0,Finance,Property-Casualty Insurers,http://www.nasdaq.com/symbol/pih,
1,TURN,180 Degree Capital Corp.,2.02,62865560.0,,,Finance,Finance/Investors Services,http://www.nasdaq.com/symbol/turn,
2,FLWS,"1-800 FLOWERS.COM, Inc.",10.25,662400700.0,,1999.0,Consumer Services,Other Specialty Stores,http://www.nasdaq.com/symbol/flws,
3,FCCY,1st Constitution Bancorp (NJ),18.25,147335100.0,,,Finance,Savings Institutions,http://www.nasdaq.com/symbol/fccy,
4,SRCE,1st Source Corporation,51.64,1339342000.0,,,Finance,Major Banks,http://www.nasdaq.com/symbol/srce,


In [7]:
# 看一下有幾筆資料
len(data.index)

3270

In [8]:
# 也可以這樣看
data.shape

(3270, 10)

In [9]:
# 作為範例取出前十筆來做就好了，因為太多筆沒辦法在課堂上示範...
# 另外連續頻繁抓資料，會被 Yahoo Finance 視為攻擊，之後會抓不到資料。
companylist = data['Symbol'][0:10].tolist()

## 波動率選股

我們通常只抓一次股市資料，然後存到資料庫裏面去，之後就從自己的資料庫裏面撈數據出來分析。

這個地方因為為了搭配先前講過的例子，所以只是把例子裡面的做法放到 Function 裡面來。

須注意這樣的作法只是上課 Demo 用。

In [10]:
results = []

for symbol in companylist:
    vo = volatility(symbol, datetime(2016, 1, 1))
    results.append((vo, symbol))
    
results.sort()

Symbol:  FLWS ==> Error:  ('Unable to read URL: https://query1.finance.yahoo.com/v7/finance/download/FLWS?period1=1451577600&period2=1512403199&interval=1d&events=history&crumb=LDj%5Cu002F2xCHNQf',)
Symbol:  FCCY ==> Error:  ('Unable to read URL: https://query1.finance.yahoo.com/v7/finance/download/FCCY?period1=1451577600&period2=1512403199&interval=1d&events=history&crumb=exI7QqfV%5Cu002FS0',)


In [11]:
# 選出波動率最小的前五檔股票
computer_selected = np.array(results)[:, 1][:5].tolist()
computer_selected

['SRCE', 'CAFD', 'JOBS', 'PIH', 'TWOU']

# 加入自選股

In [12]:
# 上面是電腦幫忙選股...
# 底下是自己因為看新聞、聽小道消息等等的靈機一閃後，想看看看回測狀況好不好的股票
# 它只是一個簡單的 list 會被一起放進去回測看看
self_selected = ['TSLA', 'GOOG', 'YHOO', 'MSFT', 'AAPL']

In [13]:
candidates = computer_selected + self_selected

In [14]:
# 放到 set 中的重複性資料只會被保留一份
candidates = set(candidates)

In [15]:
# 這是刪掉重複資料後的候選名單
candidates = list(candidates)

# 回測

In [16]:
# 利用策略產生的持有部位資訊，計算底下四個指標來判斷投資績效
# sharpe ratio: 判斷報酬的好壞跟穩定度，數值越大越好
# maxdd: maximum drawdown, 最糟糕的狀況會賠幾 %
# maxddd: maximum drawdown duration, 低於上一次最高報酬的天數
# cumRet[-1]: 最後賺的 % 數

def indicators(df):
    dailyRet = df['Close'].pct_change()
    excessRet = (dailyRet - 0.04/252)[df['positions'] == 1]
    SharpeRatio = np.sqrt(252.0)*np.mean(excessRet)/np.std(excessRet)
    
    cumRet = np.cumprod(1+excessRet)
    
    maxdd, maxddd = DrawDownAnalysis(cumRet)
    
    return SharpeRatio, maxdd, maxddd, cumRet[-1]

In [17]:
# 這是我們的策略的部分
# 主要只是要算出進出的訊號 signals 跟何時持有部位 positions
# 底下是一個突破系統的範例

def Breakout_strategy(df):
    # Donchian Channel
    df['20d_high'] = np.round(pd.Series.rolling(df['Close'], window=20).max(), 2)
    df['10d_low'] = np.round(pd.Series.rolling(df['Close'], window=10).min(), 2)

    has_position = False
    df['signals'] = np.zeros(df['Close'].shape)
    for t in range(2, df['signals'].size):
        if df['Close'][t] > df['20d_high'][t-1]:
            if not has_position:
                df.loc[df.index[t], 'signals'] = 1
                has_position = True
        elif df['Close'][t] < df['10d_low'][t-1]:
            if has_position:
                df.loc[df.index[t], 'signals'] = -1
                has_position = False

    df['positions'] = df['signals'].cumsum()
    return df

In [18]:
def RSI_7030_strategy(df):
    df['RSI'] = talib.RSI(df['Close'].values)
    
    has_position = False
    df['signals'] = np.zeros(np.size(df['Close']))
    for t in range(2, df['signals'].size):
        if df['RSI'][t-1] < 30:
            if not has_position:
                df.loc[df.index[t], 'signals'] = 1
                has_position = True
        elif df['RSI'][t-1] > 70:
            if has_position:
                df.loc[df.index[t], 'signals'] = -1
                has_position = False

    df['positions'] = df['signals'].cumsum()
    return df

In [19]:
def BBands_strategy(df):
    
    from talib import MA_Type
    df['UBB'], df['MBB'], df['LBB'] = talib.BBANDS(df['Close'].values, matype=MA_Type.T3)

    has_position = False
    df['signals'] = np.zeros(df['Close'].shape)
    for t in range(2, df['signals'].size):
        if df['Close'][t] < df['LBB'][t-1]:
            if not has_position:
                df.loc[df.index[t], 'signals'] = 1
                has_position = True
        elif df['Close'][t] > df['UBB'][t-1]:
            if has_position:
                df.loc[df.index[t], 'signals'] = -1
                has_position = False

    df['positions'] = df['signals'].cumsum().shift()
    return df

In [20]:
def apply_strategy(strategy, df):
    return strategy(df)

In [21]:
# 先把所有股票資料抓下來，放到字典上面去
# 因為我們在這裡沒有使用資料庫，所以用字典來做存放
all_data = {}

for symbol in candidates:
    try:
        all_data[symbol] = web.DataReader(symbol, 'yahoo', datetime(2016,1,1))
    except Exception as e:
        all_data[symbol] = pd.DataFrame()
        print("Error accessing symbol: ", symbol, "==>", e.args)

Error accessing symbol:  MSFT ==> ('Unable to read URL: https://query1.finance.yahoo.com/v7/finance/download/MSFT?period1=1451577600&period2=1512403199&interval=1d&events=history&crumb=44VOEVG%5Cu002FQuC',)
Error accessing symbol:  YHOO ==> ('Unable to read URL: https://query1.finance.yahoo.com/v7/finance/download/YHOO?period1=1451577600&period2=1512403199&interval=1d&events=history&crumb=jeUKbiQC5zr',)


In [30]:
# 計算各支股票的回測結果
results = []

strategies = [Breakout_strategy, RSI_7030_strategy, BBands_strategy]

for symbol in candidates:
    if all_data[symbol].empty:
            continue
    for strategy in strategies:
        apply_strategy(strategy, all_data[symbol])
        if np.all(all_data[symbol]['positions']==0):
            print("Symbol:", symbol, "使用", strategy.__name__, "策略沒有出現買賣訊號。")
            continue
        SharpeRatio, maxdd, maxddd, finalRet = indicators(all_data[symbol])
        results.append((SharpeRatio, maxdd, maxddd, finalRet, all_data[symbol][all_data[symbol] > 0]['signals'].sum(), symbol, strategy.__name__))

Symbol: JOBS 使用 RSI_7030_strategy 策略沒有出現買賣訊號。


In [31]:
results_df = pd.DataFrame(results, columns=['sharpe','MaxDrawDown','MaxDrawDownDuration','returns','entries','symbol','strategy'])

In [32]:
results_df

Unnamed: 0,sharpe,MaxDrawDown,MaxDrawDownDuration,returns,entries,symbol,strategy
0,4.085368,-0.041394,59,2.252879,11.0,AAPL,Breakout_strategy
1,1.855072,-0.086422,61,1.164934,2.0,AAPL,RSI_7030_strategy
2,0.734279,-0.18215,470,1.105947,21.0,AAPL,BBands_strategy
3,4.518138,-0.08867,77,2.881151,10.0,TSLA,Breakout_strategy
4,0.534857,-0.271337,540,1.116542,3.0,TSLA,RSI_7030_strategy
5,-0.140505,-0.306918,585,0.914236,19.0,TSLA,BBands_strategy
6,2.743272,-0.074379,118,1.452572,15.0,GOOG,Breakout_strategy
7,10.805006,-0.006251,3,1.146268,1.0,GOOG,RSI_7030_strategy
8,1.82573,-0.08963,287,1.28318,25.0,GOOG,BBands_strategy
9,2.611089,-0.085871,129,2.244621,14.0,JOBS,Breakout_strategy


In [33]:
results_df.sort_values('MaxDrawDown',ascending=False)

Unnamed: 0,sharpe,MaxDrawDown,MaxDrawDownDuration,returns,entries,symbol,strategy
7,10.805006,-0.006251,3,1.146268,1.0,GOOG,RSI_7030_strategy
0,4.085368,-0.041394,59,2.252879,11.0,AAPL,Breakout_strategy
14,3.14974,-0.059913,98,1.796215,12.0,SRCE,Breakout_strategy
6,2.743272,-0.074379,118,1.452572,15.0,GOOG,Breakout_strategy
15,2.612412,-0.075238,47,1.384797,2.0,SRCE,RSI_7030_strategy
17,3.737727,-0.082732,103,3.2283,15.0,TWOU,Breakout_strategy
9,2.611089,-0.085871,129,2.244621,14.0,JOBS,Breakout_strategy
1,1.855072,-0.086422,61,1.164934,2.0,AAPL,RSI_7030_strategy
3,4.518138,-0.08867,77,2.881151,10.0,TSLA,Breakout_strategy
20,2.64174,-0.089303,102,1.433005,11.0,CAFD,Breakout_strategy


In [34]:
results_df.sort_values('returns',ascending=False)

Unnamed: 0,sharpe,MaxDrawDown,MaxDrawDownDuration,returns,entries,symbol,strategy
17,3.737727,-0.082732,103,3.2283,15.0,TWOU,Breakout_strategy
3,4.518138,-0.08867,77,2.881151,10.0,TSLA,Breakout_strategy
0,4.085368,-0.041394,59,2.252879,11.0,AAPL,Breakout_strategy
9,2.611089,-0.085871,129,2.244621,14.0,JOBS,Breakout_strategy
10,3.290522,-0.106143,140,2.033664,26.0,JOBS,BBands_strategy
14,3.14974,-0.059913,98,1.796215,12.0,SRCE,Breakout_strategy
19,2.108572,-0.143511,252,1.693171,21.0,TWOU,BBands_strategy
11,2.284505,-0.103552,158,1.608814,9.0,PIH,Breakout_strategy
16,2.335527,-0.089935,162,1.542943,23.0,SRCE,BBands_strategy
6,2.743272,-0.074379,118,1.452572,15.0,GOOG,Breakout_strategy
