In [1]:
import collections
import datetime
import xlrd
import xlsxwriter
import numpy as np 

In [2]:
def add_months(s, p):
    ''' 在日期s上加上p个月，如果日期不存在，顺延到下个月1号 '''
    p_years = s.year + p / 12
    p_months = s.month + p % 12 
    if p_months > 12:
        p_years += 1
        p_months = p_months % 12
    
    try:
        rs = datetime.datetime(p_years, p_months, s.day)
    except ValueError, e:
        if p_months == 12:
            rs = datetime.datetime(p_years + 1, 1, 1)
        else:
            rs = datetime.datetime(p_years, p_months+1, 1)
    return rs

def next_quater(cur_quater):
    return add_months(cur_quater, 3)

def next_half(cur_half):
    return add_months(cur_half, 6)

class SharesInfo(object):
    def __init__(self, shares, marketValue, proportion):
        self.shares_ = shares
        self.marketValue_ = marketValue
        self.proportion_ = proportion
    
    def __str__(self):
        return "%f %f %f"%(self.shares_, self.marketValue_, self.proportion_)
    def __repr__(self):
        return "%f %f %f"%(self.shares_, self.marketValue_, self.proportion_)


half_date = [datetime.datetime(2003,1,1), datetime.datetime(2003,7,1)] \
        + [datetime.datetime(v1,v2,1) for v1 in xrange(2004, 2018) for v2 in (1,7)]

In [3]:
raw_data = collections.defaultdict(dict)
funds_set = set()
funds_num = collections.defaultdict(int)
with xlrd.open_workbook("half_year_perf.xlsx") as book:
    sheet = book.sheet_by_index(0)
    for i in xrange(1, sheet.nrows):
        funds_name = sheet.cell(i, 0).value
        funds_set.add(funds_name)
        cur_half = datetime.datetime(2003,1,1)
        for j in xrange(1, sheet.ncols):
            if sheet.cell(i,j).value != 0:
                raw_data[funds_name][cur_half] = sheet.cell(i,j).value
                funds_num[cur_half] += 1
                
            cur_half = next_half(cur_half)

In [4]:
sorted_funds = sorted(funds_set)
funds_map = {name: i for i, name in enumerate(sorted_funds)}
funds_data = collections.defaultdict(lambda : np.full(len(sorted_funds), -1000000000.0))

for name in raw_data:
    for half in raw_data[name]:
        funds_data[half][funds_map[name]] = raw_data[name][half]

In [5]:
funds_rank = collections.defaultdict(dict)
for half in funds_data:
    funds_rank[half] = np.argsort(funds_data[half])[::-1]

In [7]:
with xlsxwriter.Workbook(u"half_pref_order_result.xlsx") as book:
    sheet = book.add_worksheet()
    sheet.write(0,0,"funds")
    sheet.write(0,1,"half")
    sheet.write(0,2,"order")
    wt_idx = 1
    for half in funds_rank:
        for i in xrange(funds_num[half]):
            sheet.write(wt_idx, 0, sorted_funds[funds_rank[half][i]])
            sheet.write(wt_idx, 1, "%sH%s"%(half.year, half.month/6+1))
            sheet.write(wt_idx, 2, (funds_num[half]-i)/float(funds_num[half]))
            wt_idx += 1

In [34]:
debug_funds = u'000001'
debug_half = datetime.datetime(2003,7,1)
for half in sorted(raw_data[debug_funds].keys()):
    print half, raw_data[debug_funds][half]
    
print len(funds_data), debug_half in funds_data
print funds_num[debug_half]
print np.sort(funds_data[debug_half])[::-1]

2003-01-01 00:00:00 6.41600630228
2003-07-01 00:00:00 6.26927029805
2004-01-01 00:00:00 1.07818558959
2004-07-01 00:00:00 2.80003290741
2005-01-01 00:00:00 -8.88450148075
2005-07-01 00:00:00 3.90032502709
2006-01-01 00:00:00 60.3857588501
2006-07-01 00:00:00 35.9549460748
2007-01-01 00:00:00 66.8923397957
2007-07-01 00:00:00 38.25
2008-01-01 00:00:00 -27.5783877668
2008-07-01 00:00:00 -22.7049180328
2009-01-01 00:00:00 50.4772004242
2009-07-01 00:00:00 11.2252942774
2010-01-01 00:00:00 -13.8888888889
2010-07-01 00:00:00 20.2617554404
2011-01-01 00:00:00 -5.48060708263
2011-07-01 00:00:00 -20.0713648528
2012-01-01 00:00:00 5.24553571429
2012-07-01 00:00:00 1.90880169671
2013-01-01 00:00:00 5.30697190427
2013-07-01 00:00:00 9.8814229249
2014-01-01 00:00:00 -2.87769784173
2014-07-01 00:00:00 18.6841349413
2015-01-01 00:00:00 45.246179966
2015-07-01 00:00:00 -13.345672561
2016-01-01 00:00:00 -19.3921852388
2016-07-01 00:00:00 -4.12406556179
2017-01-01 00:00:00 10.7899807322
2017-07-01 00:0