# 土炮股票分析系統

這個範例只是將上課所講過的所有例子串在一起，提供一個簡單的概念，讓大家知道一個簡單的分析系統大概可以怎麼做。

例子裡面有很多不足的地方，請自行思考如何修改。

這個範例主要做的事是下方方塊圖的藍色區塊部分。

<img src="system.png" />

## 抓取公司資料

In [1]:
import pandas as pd

In [2]:
url = "http://www.nasdaq.com/screening/companies-by-industry.aspx?exchange=NASDAQ&render=download"
data = pd.read_csv(url)

In [3]:
data.head()

Unnamed: 0,Symbol,Name,LastSale,MarketCap,ADR TSO,IPOyear,Sector,Industry,Summary Quote,Unnamed: 9
0,PIH,"1347 Property Insurance Holdings, Inc.",7.75,46164940.0,,2014.0,Finance,Property-Casualty Insurers,http://www.nasdaq.com/symbol/pih,
1,FLWS,"1-800 FLOWERS.COM, Inc.",10.0,655690600.0,,1999.0,Consumer Services,Other Specialty Stores,http://www.nasdaq.com/symbol/flws,
2,FCCY,1st Constitution Bancorp (NJ),18.35,147301700.0,,,Finance,Savings Institutions,http://www.nasdaq.com/symbol/fccy,
3,SRCE,1st Source Corporation,46.0,1237065000.0,,,Finance,Major Banks,http://www.nasdaq.com/symbol/srce,
4,VNET,"21Vianet Group, Inc.",5.45,315906300.0,57964466.0,2011.0,Technology,"Computer Software: Programming, Data Processing",http://www.nasdaq.com/symbol/vnet,


In [30]:
# 看一下有幾筆資料
len(data.index)

3194

In [31]:
# 也可以這樣看
data.shape

(3194, 10)

In [4]:
# 作為範例取出前十筆來做就好了，因為太多筆沒辦法在課堂上示範...
# 另外連續頻繁抓資料，會被 Yahoo Finance 視為攻擊，之後會抓不到資料。
companylist = data['Symbol'][0:10].tolist()

## 波動率選股

我們通常只抓一次股市資料，然後存到資料庫裏面去，之後就從自己的資料庫裏面撈數據出來分析。

這個地方因為為了搭配先前講過的例子，所以只是把例子裡面的做法放到 Function 裡面來。

須注意這樣的作法只是上課 Demo 用。

In [5]:
import pandas_datareader.data as web

from datetime import datetime
import numpy as np

In [6]:
def calculate(symbol):
    df=web.DataReader(symbol, 'yahoo', datetime(2016,1,1))
    dailyRet = df['Close'].pct_change()
    return dailyRet.std() * np.sqrt(dailyRet.count())

In [7]:
results = []
for symbol in companylist:
    vo = calculate(symbol)
    results.append((vo, symbol))
results.sort()
results

[(0.25794035156814438, 'SRCE'),
 (0.31272750011364076, 'FCCY'),
 (0.330983681309094, 'JOBS'),
 (0.34290380530118231, 'CAFD'),
 (0.39479008245348252, 'AVHI'),
 (0.40207272370844577, 'PIH'),
 (0.43052125071921138, 'FLWS'),
 (0.43309126135239229, 'TWOU'),
 (0.43834253033351334, 'EGHT'),
 (0.61780762479051854, 'VNET')]

In [8]:
# 選出波動率最小的前五檔股票
computer_selected = np.array(results)[:, 1][:5].tolist()
computer_selected

['SRCE', 'FCCY', 'JOBS', 'CAFD', 'AVHI']

# 加入自選股

In [9]:
# 上面是電腦幫忙選股...
# 底下是自己因為看新聞、聽小道消息等等的靈機一閃後，想看看看回測狀況好不好的股票
# 它只是一個簡單的 list 會被一起放進去回測看看
self_selected = ['TSLA', 'GOOG', 'YHOO', 'MSFT', 'AAPL']

In [10]:
candidates = computer_selected + self_selected

In [11]:
# 放到 set 中的重複性資料只會被保留一份
candidates = set(candidates)

In [12]:
# 這是刪掉重複資料後的候選名單
candidates = list(candidates)

# 回測

In [13]:
# maxDD: maximum drawdown
# maxDDD: maximum drawdown duration

def calculateMaxDD(cumRet):
    highwatermark = np.zeros(np.size(cumRet))
    drawdownduration = np.zeros(np.size(cumRet))
    drawdown = np.zeros(np.size(cumRet))
    for t in range(2, cumRet.size):
        highwatermark[t] = max(highwatermark[t-1], cumRet[t])
        drawdown[t] = (1 + highwatermark[t]) / (1 + cumRet[t]) - 1
        if (drawdown[t] == 0):
            drawdownduration[t] = 0
        else:
            # 從日期來計算 MaxDDD 的天數
            drawdownduration[t] = drawdownduration[t-1] + (cumRet.index[t]-cumRet.index[t-1]).days
            #drawdownduration[t] = drawdownduration[t-1] + 1
    maxDD = max(drawdown)
    maxDDD = max(drawdownduration)
    #Series(drawdownduration, index=cumRet.index).plot()
    return maxDD, maxDDD

In [26]:
# 利用策略產生的持有部位資訊，計算底下四個指標來判斷投資績效
# sharpe ratio: 判斷報酬的好壞跟穩定度，數值越大越好
# maxdd: maximum drawdown, 最糟糕的狀況會賠幾 %
# maxddd: maximum drawdown duration, 低於上一次最高報酬的天數
# cumRet[-1]: 最後賺的 % 數

def indicators(df):
    dailyRet = df['Close'].pct_change()
    #假設無風險利率為 4%
    #假設一年有252個交易日
    excessRet = (dailyRet - 0.04/252)[df['positions']==1.0]
    SharpeRatio = np.sqrt(252.0)*np.mean(excessRet)/np.std(excessRet)
    
    cumRet = np.cumprod(1+excessRet) - 1
    
    maxdd, maxddd = calculateMaxDD(cumRet)
    
    return SharpeRatio, maxdd, maxddd, cumRet[-1]

In [27]:
# 這是我們的策略的部分
# 主要只是要算出進出的訊號 signals 跟何時持有部位 positions
# 底下是一個突破系統的範例

def strategy(df):
    # Donchian Channel
    df['20d_high'] = np.round(pd.Series.rolling(df['Close'], window=20).max(), 2)
    df['10d_low'] = np.round(pd.Series.rolling(df['Close'], window=10).min(), 2)

    has_position = False
    df['signals'] = np.zeros(df['Close'].shape)
    for t in range(2, df['signals'].size):
        if df['Close'][t] > df['20d_high'][t-1]:
            if not has_position:
                df.loc[df.index[t], 'signals'] = 1.0
                has_position = True
        elif df['Close'][t] < df['10d_low'][t-1]:
            if has_position:
                df.loc[df.index[t], 'signals'] = -1.0
                has_position = False

    df['positions'] = df['signals'].cumsum()
    return df

In [16]:
# 先把所有股票資料抓下來，放到字典上面去
# 因為我們在這裡沒有使用資料庫，所以用字典來做存放
all_data = {}

for symbol in candidates:
    all_data[symbol] = web.DataReader(symbol, 'yahoo', datetime(2016,1,1))

In [28]:
# 計算各支股票的回測結果
results = []
for symbol in candidates:
    strategy(all_data[symbol])
    SharpeRatio, maxdd, maxddd, finalRet = indicators(all_data[symbol])
    results.append((SharpeRatio, maxdd, maxddd, finalRet, symbol))

results

[(5.4564476007438261, 0.07171142504927519, 77.0, 1.1044224304255064, 'TSLA'),
 (2.7822634509541655, 0.098059598858270203, 93.0, 0.22138536512882911, 'CAFD'),
 (3.1486331538755912,
  0.061363744885344795,
  140.0,
  0.56309993390290836,
  'YHOO'),
 (4.708499546920649, 0.028184773453298506, 41.0, 0.73790667871997262, 'AAPL'),
 (1.6168150645512951, 0.062648284422384126, 88.0, 0.14137640240921101, 'GOOG'),
 (3.2011008420921683, 0.070477704348795189, 145.0, 0.6192054325574301, 'FCCY'),
 (3.0388235738433536, 0.042810886085439037, 60.0, 0.32747044600240693, 'MSFT'),
 (3.0951853865829002, 0.063731057069693176, 80.0, 0.5344158833476147, 'SRCE'),
 (2.0064367851757168,
  0.093937685286554329,
  127.0,
  0.41459738917689437,
  'JOBS'),
 (3.3518546962135547, 0.080871012332741632, 77.0, 0.74975492541502708, 'AVHI')]

In [29]:
# 排序股票，取 Sharpe Ratio 高的前幾名當標的
sorted(results, reverse=True)

[(5.4564476007438261, 0.07171142504927519, 77.0, 1.1044224304255064, 'TSLA'),
 (4.708499546920649, 0.028184773453298506, 41.0, 0.73790667871997262, 'AAPL'),
 (3.3518546962135547, 0.080871012332741632, 77.0, 0.74975492541502708, 'AVHI'),
 (3.2011008420921683, 0.070477704348795189, 145.0, 0.6192054325574301, 'FCCY'),
 (3.1486331538755912,
  0.061363744885344795,
  140.0,
  0.56309993390290836,
  'YHOO'),
 (3.0951853865829002, 0.063731057069693176, 80.0, 0.5344158833476147, 'SRCE'),
 (3.0388235738433536, 0.042810886085439037, 60.0, 0.32747044600240693, 'MSFT'),
 (2.7822634509541655, 0.098059598858270203, 93.0, 0.22138536512882911, 'CAFD'),
 (2.0064367851757168,
  0.093937685286554329,
  127.0,
  0.41459738917689437,
  'JOBS'),
 (1.6168150645512951, 0.062648284422384126, 88.0, 0.14137640240921101, 'GOOG')]