# 月營收選股

參考資料：

* [超簡單用python抓取每月營收](https://goo.gl/3bqLou)
* [Python-簡單的月營收選股！](https://bit.ly/2Ly3nLo)

In [2]:
import pandas as pd
import requests
from io import StringIO
import time

In [3]:
def monthly_report(year, month):
    
    # 假如是西元，轉成民國
    if year > 1911:
        year -= 1911
    
    url = 'http://mops.twse.com.tw/nas/t21/sii/t21sc03_'+str(year)+'_'+str(month)+'_0.html'
    if year <= 98:
        url = 'http://mops.twse.com.tw/nas/t21/sii/t21sc03_'+str(year)+'_'+str(month)+'.html'
    
    # 偽瀏覽器
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
    
    # 下載該年月的網站，並用pandas轉換成 dataframe
    r = requests.get(url, headers)
    r.encoding = 'big5'
    html_df = pd.read_html(StringIO(r.text))
    
    # 處理一下資料
    if html_df[0].shape[0] > 500:
        df = html_df[0].copy()
    else:
        df = pd.concat([df for df in html_df if df.shape[1] <= 11])
    df = df[list(range(0,10))]
    column_index = df.index[(df[0] == '公司代號')][0]
    df.columns = df.iloc[column_index]
    df['當月營收'] = pd.to_numeric(df['當月營收'], 'coerce')
    df = df[~df['當月營收'].isnull()]
    df = df[df['公司代號'] != '合計']
    
    return df

In [4]:
df = monthly_report(2017, 1)

In [5]:
df.head()

4,公司代號,公司名稱,當月營收,上月營收,去年當月營收,上月比較增減(%),去年同月增減(%),當月累計營收,去年累計營收,前期比較增減(%)
5,1101,台泥,6131670.0,9418581,6575590,-34.89,-6.75,6131670,6575590,-6.75
6,1102,亞泥,4162130.0,6023954,5066062,-30.9,-17.84,4162130,5066062,-17.84
7,1103,嘉泥,232657.0,382981,197328,-39.25,17.9,232657,197328,17.9
8,1104,環球水泥,363865.0,463842,373311,-21.55,-2.53,363865,373311,-2.53
9,1108,幸福水泥,316773.0,345573,348266,-8.33,-9.04,316773,348266,-9.04


In [6]:
# 抓最近 12 月的資料
import datetime
import pandas as pd
import time
data = {}
n_days = 12
now = datetime.datetime.now()
year = now.year
month = now.month
while len(data) < n_days:
    
    print('parsing', year, month)
    
    # 使用 crawPrice 爬資料
    try:
        data['%d-%d-01'%(year, month)] = monthly_report(year, month)
    except Exception as e:
        print('get 404, please check if the revenues are not revealed')
    
    # 減一個月
    month -= 1
    if month == 0:
        month = 12
        year -= 1
    time.sleep(10)

parsing 2018 5
get 404, please check if the revenues are not revealed
parsing 2018 4
parsing 2018 3
parsing 2018 2
parsing 2018 1
parsing 2017 12
parsing 2017 11
parsing 2017 10
parsing 2017 9
parsing 2017 8
parsing 2017 7
parsing 2017 6
parsing 2017 5


In [7]:
# 彙整資料
for k in data.keys():
    data[k].index = data[k]['公司代號']
    
df = pd.DataFrame({k:df['當月營收'] for k, df in data.items()}).transpose()
df.index = pd.to_datetime(df.index)
df = df.sort_index()

In [8]:
df.head()

Unnamed: 0,1101,1102,1103,1104,1108,1109,1110,1201,1203,1210,...,9939,9940,9941,9942,9943,9944,9945,9946,9955,9958
2017-05-01,8485032.0,5741517.0,183810.0,381948.0,332953.0,447188.0,110025.0,1476996.0,501143.0,5986895.0,...,1553333.0,726469.0,1609240.0,263416.0,232262.0,305817.0,814084.0,17298.0,144120.0,211894.0
2017-06-01,7858569.0,5331442.0,143629.0,318948.0,243699.0,418868.0,107913.0,1536126.0,501186.0,5738622.0,...,1680590.0,743766.0,1595482.0,281120.0,246261.0,300886.0,814937.0,60200.0,118521.0,212550.0
2017-07-01,8034396.0,5252149.0,153905.0,357742.0,270234.0,417078.0,113787.0,1745969.0,514755.0,6158273.0,...,1797089.0,674981.0,1639991.0,256026.0,253048.0,267836.0,825103.0,82237.0,159704.0,189038.0
2017-08-01,6952639.0,5478165.0,174704.0,367333.0,293423.0,487484.0,110747.0,1785571.0,561578.0,6554870.0,...,1757672.0,710351.0,1606952.0,274935.0,243109.0,277898.0,737665.0,88308.0,174798.0,145720.0
2017-09-01,8071192.0,5806573.0,177734.0,403016.0,267312.0,521034.0,232565.0,1685062.0,543886.0,7052418.0,...,1506725.0,3851321.0,1648181.0,272291.0,227517.0,268175.0,1655423.0,80710.0,222351.0,104617.0


In [9]:
# 平均線法選股
method1 = df.iloc[-3:].mean() > df.iloc[-12:].mean()
method1[method1 == True].index

Index(['1219', '1301', '1303', '1310', '1313', '1314', '1316', '1323', '1326',
       '1409',
       ...
       '9926', '9930', '9931', '9937', '9939', '9941', '9943', '9944', '9945',
       '9955'],
      dtype='object', length=221)

In [12]:
# 成長法選股
method2 = df.rolling(4, min_periods=2).mean()
method2 = (method2 > method2.shift()).iloc[-5:].sum()
method2[method2 == 5]

1468    5
1723    5
2327    5
2408    5
2443    5
2701    5
2881    5
3055    5
3532    5
4119    5
4725    5
4739    5
6225    5
6582    5
8131    5
9908    5
9918    5
9941    5
dtype: int64

In [13]:
# 創新高法
method3 = df.iloc[-1] == df.iloc[-12:].max()
method3[method3 == True].index

Index(['1101', '1102', '1313', '1323', '1468', '1583', '1710', '1714', '1773',
       '1806', '2006', '2010', '2013', '2014', '2015', '2033', '2049', '2069',
       '2104', '2106', '2327', '2338', '2342', '2359', '2408', '2431', '2456',
       '2465', '2478', '2492', '2493', '2633', '2731', '3003', '3010', '3011',
       '3016', '3018', '3023', '3047', '3266', '3450', '3528', '3532', '3596',
       '3711', '4526', '4555', '4725', '4764', '4919', '4994', '5259', '5434',
       '6115', '6164', '6191', '6224', '6225', '6405', '6464', '6581', '8131',
       '9904'],
      dtype='object')