In [40]:
import os
import datetime
import xlrd
import collections
import math

In [50]:
def add_years(s, p):
    ''' 在日期s上加上p年，如果日期不存在，顺延到下个月1号 '''
    if s.month != 2 or s.day != 29:
        return datetime.datetime(year=s.year+p, month=s.month, day=s.day)
    else:
        try:
            rs = datetime.datetime(year=s.year+p, month=2, day=29)
        except ValueError, e:
            rs = datetime.datetime(year=s.year+p, month=3, day=1)
        return rs

def add_months(s, p):
    ''' 在日期s上加上p个月，如果日期不存在，顺延到下个月1号 '''
    p_years = s.year + p / 12
    p_months = s.month + p % 12 
    if p_months > 12:
        p_years += 1
        p_months = p_months % 12
    
    try:
        rs = datetime.datetime(p_years, p_months, s.day)
    except ValueError, e:
        if p_months == 12:
            rs = datetime.datetime(p_years + 1, 1, 1)
        else:
            rs = datetime.datetime(p_years, p_months+1, 1)
    return rs

In [41]:
Rt_data=collections.defaultdict(dict) #基金复权单位净值
Rm_data=collections.defaultdict(dict) #指数价格

Rt=None
Rf=None
Rm=None

BEGIN_YEAR = 2004
END_YEAR = 2018

In [42]:
def Rt_load_data(filename = 'Fund_NAV1.txt'):
    '''加载基金复权单位净值文件'''
    with open(filename, "r") as ifid:
        line_num = 0
        for line in ifid:
            if line_num > 0 and len(line) > 5:
                try:
                    FundClassID, TradingDate, Symbol, AccumulativeNAV = line.replace('\x00','').strip().split('\t')
                    TradingDate = datetime.datetime.strptime(TradingDate, "%Y-%m-%d")
                    if TradingDate >= datetime.datetime(BEGIN_YEAR, 1, 1) and TradingDate < datetime.datetime(END_YEAR, 1, 1):
                        Rt_data[Symbol][TradingDate] = float(AccumulativeNAV)
                except ValueError, e:
                    print filename, line_num, line.replace('\x00','').strip()
                                  
            line_num += 1
            
def Rm_load_data(filename="IDX_Idxtrd1.txt"):
    '''加载指数价格文件'''
    with open(filename, "r") as ifid:
        line_num = 0
        for line in ifid:
            if line_num > 0 and len(line) > 5:
                try:
                    Indexcd, Idxtrd01, Idxtrd05 = line.replace('\x00','').strip().split('\t')
                    Idxtrd01 = datetime.datetime.strptime(Idxtrd01, "%Y-%m-%d")
                    if Idxtrd01 >= datetime.datetime(BEGIN_YEAR, 1, 1) and Idxtrd01 < datetime.datetime(END_YEAR, 1, 1):
                        Rm_data[Indexcd][Idxtrd01] = float(Idxtrd05)
                except ValueError, e:
                    print filename, line_num, line.replace('\x00','').strip()
                                  
            line_num += 1

In [43]:
def calc_earning_ratio(data):
    '''计算基金或者市场基准收益率'''
    rtn = collections.defaultdict(dict)
    log = open("log_calc_earning_ratio.txt", "a")
    for id in data:
        id_data_num = 0
        pre_date = None
        pre_value = 0.0
        for cur_date in sorted(data[id].keys()):
            value = data[id][cur_date]
            if id_data_num > 0:
                rtn[id][cur_date] = math.log(value) - math.log(pre_value)
                if cur_date - pre_date > datetime.timedelta(days=10):
                    log.write("%s %d %s %f 的前一条数据是 %s %f，疑似有数据丢失，请注意\n" % (id, (cur_date - pre_date).days, 
                          cur_date.strftime("%Y-%m-%d"), value, pre_date.strftime("%Y-%m-%d"), pre_value))
            id_data_num += 1
            pre_date = cur_date
            pre_value = value
    log.close()
    return rtn

def calc_quater_sigma(data, dump_filename="quater_sigma.txt"):
    '''计算季度波动率'''
    rtn = collections.defaultdict(dict)
    ofid = open(dump_filename, "w")
    quater_date = [datetime.datetime(v1,v2,1) for v1 in xrange(BEGIN_YEAR, END_YEAR) for v2 in xrange(1,12,3)]
    quater_date.append(add_months(quater_date[-1], 4))
    for id in data:
        
    ofid.close()

In [47]:
def load_data():
    Rm_load_data("IDX_Idxtrd1.txt")
    Rm_load_data("IDX_Idxtrd2.txt")
    Rm_load_data("IDX_Idxtrd3.txt")
    Rm_load_data("IDX_Idxtrd4.txt")

    Rt_load_data('Fund_NAV1.txt')
    Rt_load_data('Fund_NAV2.txt')
    Rt_load_data('Fund_NAV3.txt')
    Rt_load_data('Fund_NAV4.txt')
    Rt_load_data('Fund_NAV5.txt')
    Rt_load_data('Fund_NAV6.txt')

    print "共加载%d只基金的%d条累计收益记录"%(len(Rt_data), sum(len(var) for var in Rt_data.itervalues()))
    print "共加载%d只指数的%d条价格记录"%(len(Rm_data), sum(len(var) for var in Rm_data.itervalues()))

def calc():
    if os.path.exists("log_calc_earning_ratio.txt"):
        os.remove("log_calc_earning_ratio.txt")
    Rt = calc_earning_ratio(Rt_data)
    Rm = calc_earning_ratio(Rm_data)

    print "共加载%d只基金的%d天收益率"%(len(Rt), sum(len(var) for var in Rt.itervalues()))
    print "共加载%d只指数的%d天收益率"%(len(Rm), sum(len(var) for var in Rm.itervalues()))
    
def main():
    load_data()
    calc()

共加载1178只基金的1382860条累计收益记录
共加载3只指数的10338条价格记录
共加载1178只基金的1381682天收益率
共加载3只指数的10335天收益率


In [39]:
load_data()

In [49]:
calc()

共加载1178只基金的1381682天收益率
共加载3只指数的10335天收益率
