上市公司经常会将年报与一季度报告一起发布，这样`pipeline`就无法有效获取上一年度的数据。为解决此问题，在`Fundamentals`中新增年报数据集，即筛选出年末部分的报告。名称与原财报类似，尾缀加'yearly'。

In [1]:
from zipline.pipeline.factors import CustomFactor

In [2]:
from zipline.pipeline.fundamentals import Fundamentals

In [3]:
from zipline.pipeline import CustomFactor, Pipeline

In [4]:
from zipline.utils.numpy_utils import changed_locations

In [5]:
import pandas as pd
import numpy as np

## 辅助区

In [6]:
Fundamentals.profit_col_code('营业利润')

{'A033': '营业利润'}

In [7]:
from zipline.pipeline.filters import StaticSids

In [8]:
from zipline.pipeline.builtin import QTradableStocks

## 使用年报数据

In [9]:
def _select_annual_indices(dates):
    """选取最近第n年年末财务报告位置辅助函数"""
    col_num = dates.shape[1]
    locs = []

    for col in range(col_num):
        singal_dates = dates[:,col]
        loc = changed_locations(singal_dates, True)
        locs.append(loc)
        
    return locs

In [10]:
class AnnualFinancalData(CustomFactor):
    """
    选取当前时间为t,t-n年的年度科目数据
    """
    inputs = [Fundamentals.profit_statement_yearly.A033, 
              Fundamentals.profit_statement_yearly.report_end_date]
    window_length = 250
    params = {'t_n': 1}
    window_safe = True
    def _validate(self):
        super(AnnualFinancalData, self)._validate()
        if len(self.inputs) != 2:
            raise ValueError('inputs列表长度只能为2')
        t_n = self.params.get('t_n')
        win = self.window_length
        at_least = t_n * 245
        if win < at_least:
            raise ValueError('window_length值至少应为t_n*245，即{}'.format(t_n*245))
        last_col = self.inputs[-1]
        if last_col.name != 'report_end_date':
            raise ValueError('inputs列表最后一项必须为"report_end_date"')
            
    def compute(self, today, assets, out, values, dates, t_n):
        locs = _select_annual_indices(dates)
        for col_loc, row_locs in enumerate(locs):
            row_loc = row_locs[-t_n]
            out[col_loc] = values[row_loc, col_loc]

## `t-1`期

In [11]:
from zipline.research import run_pipeline
from zipline.research import select_output_by

In [12]:
def make_pipeline():
    """构造pipeline"""
    return Pipeline(
        columns={
            't-1营业利润': AnnualFinancalData(),
            '最近一期营业利润':Fundamentals.profit_statement.A033.latest,
            '公告日期':Fundamentals.profit_statement.asof_date.latest,
            '报告截至日期':Fundamentals.profit_statement.report_end_date.latest,
        })


In [13]:
df = run_pipeline(make_pipeline(), '2018-5-11','2018-5-11')
select_output_by(df, stock_codes=['000001','600000','000036'])

Unnamed: 0,Unnamed: 1,t-1营业利润,公告日期,报告截至日期,最近一期营业利润
2018-05-11 00:00:00+00:00,平安银行(000001),3022300.0,2018-04-19,2018-03-31,858400.0
2018-05-11 00:00:00+00:00,华联控股(000036),161003.0,2018-04-27,2018-03-31,16203.0
2018-05-11 00:00:00+00:00,浦发银行(600000),7027500.0,2018-04-27,2018-03-31,1710500.0


## `t-2`期

In [14]:
def make_pipeline():
    """构造pipeline"""
    return Pipeline(
        columns={
            't-2营业利润': AnnualFinancalData(t_n=2,window_length=500), #mask=StaticSids([1,333,600000])),
            '最近一期营业利润':Fundamentals.profit_statement.A033.latest,
            '公告日期':Fundamentals.profit_statement.asof_date.latest,
            '报告截至日期':Fundamentals.profit_statement.report_end_date.latest,
        })

In [15]:
df = run_pipeline(make_pipeline(), '2018-5-11','2018-5-11')
select_output_by(df, stock_codes=['000001','600000','000036'])

Unnamed: 0,Unnamed: 1,t-2营业利润,公告日期,报告截至日期,最近一期营业利润
2018-05-11 00:00:00+00:00,平安银行(000001),2977900.0,2018-04-19,2018-03-31,858400.0
2018-05-11 00:00:00+00:00,华联控股(000036),179037.0,2018-04-27,2018-03-31,16203.0
2018-05-11 00:00:00+00:00,浦发银行(600000),6966000.0,2018-04-27,2018-03-31,1710500.0


## 原始数据

In [16]:
from odo import odo
from zipline.pipeline.fundamentals.base import STOCK_DB

In [17]:
profit = odo(STOCK_DB.profit_statements, pd.DataFrame)

In [18]:
pf = profit[profit.股票代码=='600000']

pf[['报告日期','营业利润']].tail(10)

Unnamed: 0,报告日期,营业利润
63114,2015-12-31,6606700.0
63115,2016-03-31,1843300.0
63116,2016-06-30,3523900.0
63117,2016-09-30,5351200.0
63118,2016-12-31,6966000.0
63119,2017-03-31,1839700.0
63120,2017-06-30,3598000.0
63121,2017-09-30,5391800.0
63122,2017-12-31,7027500.0
63123,2018-03-31,1710500.0


In [19]:
pa = profit[profit.股票代码=='000001']

pa[['报告日期','营业利润']].tail(10)

Unnamed: 0,报告日期,营业利润
43,2015-12-31,2889500.0
44,2016-03-31,801400.0
45,2016-06-30,1615600.0
46,2016-09-30,2454500.0
47,2016-12-31,2977900.0
48,2017-03-31,822800.0
49,2017-06-30,1646800.0
50,2017-09-30,2500000.0
51,2017-12-31,3022300.0
52,2018-03-31,858400.0
