In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import minimize
from WindPy import w
from sklearn.linear_model import LinearRegression
import datetime
%matplotlib inline

In [2]:
w.start()

Welcome to use Wind Quant API for Python (WindPy)!

COPYRIGHT (C) 2017 WIND INFORMATION CO., LTD. ALL RIGHTS RESERVED.
IN NO CIRCUMSTANCE SHALL WIND BE RESPONSIBLE FOR ANY DAMAGES OR LOSSES CAUSED BY USING WIND QUANT API FOR Python.


.ErrorCode=0
.Data=[OK!]

In [3]:
end_date = datetime.datetime.today()
start_date = end_date - datetime.timedelta(200)

# 获取股票指数历史收益率

In [4]:
# codes = 'CI005917.WI,CI005919.WI,CI005018.WI,CI005916.WI' # 金融地产、消费、医药、科技
codes = '000934.SH,000931.CSI,000933.SH,000935.SH' # 中证金融地产、可选消费、必选医药、信息技术

In [5]:
data = w.wsd(codes, 'close', start_date, end_date)

In [6]:
df = pd.DataFrame(np.array(data.Data).T, index=data.Times, columns=data.Codes)

In [7]:
rdf = df.pct_change().dropna()

# 获取股票基金历史收益率

In [8]:
fund_df = pd.read_excel(u'../data/股票基金池.xlsx')
# fund_df = pd.read_excel(u'../data/嘉实股票基金池.xlsx')

In [9]:
fund_df

Unnamed: 0,代码,名称,市值占净值
0,070002.OF,嘉实增长,0.2
1,000595.OF,嘉实泰和,0.140396
2,070021.OF,嘉实主题新动力,0.2
3,003634.OF,嘉实农业产业,0.199999
4,004477.OF,嘉实沪港深回报,0.199999
5,005267.OF,嘉实价值精选,0.017521
6,005303.OF,嘉实医药健康A,0.042069


In [10]:
codes = fund_df[u'代码'].tolist()

In [11]:
codes

[u'070002.OF',
 u'000595.OF',
 u'070021.OF',
 u'003634.OF',
 u'004477.OF',
 u'005267.OF',
 u'005303.OF']

In [12]:
data = w.wsd(codes, 'nav', start_date, end_date)

In [13]:
fdf = pd.DataFrame(np.array(data.Data).T, index=data.Times, columns=data.Codes)

In [14]:
fdf = fdf.dropna(axis=1)

In [15]:
rfdf = fdf.pct_change().dropna()

In [16]:
codes = rfdf.columns.tolist()

In [17]:
print rfdf.shape
print rdf.shape

(132, 7)
(132, 4)


# 非负线性优化求解

In [18]:
# decomp = pd.DataFrame(index=codes, columns=[u'金融地产', u'消费', u'医药', u'科技'])
decomp = pd.DataFrame(index=codes, columns=[u'金融地产', u'可选消费', u'必选医药', u'科技'])
decomp.index.name = u'基金代码'

In [19]:
def objective(coef, args):
    n = coef.size
    X, y = args[0], args[1]
    target = np.dot(X, coef)
#     error = np.sum(np.square(target - y))
    error = np.sum(np.square(target - y)) + np.linalg.norm(coef, 1) # lasso error
    return error / n

def decomposition(code):
    TOLERANCE = 1e-10
    n = rdf.shape[1]
    init_weights = np.array([1. / n] * n)
    X = rdf.values
    y = rfdf[code].values
    
    constraints = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1.0},
                   {'type': 'ineq', 'fun': lambda x: x})
    
    opt_res = minimize(fun=objective,
                       x0=init_weights,
                       args=[X, y],
                       constraints=constraints,
                       tol=TOLERANCE,
                       options={'disp': True})
    
    return opt_res.x

In [20]:
decomposition(codes[0])

Optimization terminated successfully.    (Exit mode 0)
            Current function value: 0.251040255523
            Iterations: 16
            Function evaluations: 96
            Gradient evaluations: 16


array([2.25033098e-01, 7.67663507e-02, 6.98200551e-01, 2.03626721e-18])

In [21]:
for c in codes:
    weight = decomposition(c)
    decomp.loc[c] = weight

Optimization terminated successfully.    (Exit mode 0)
            Current function value: 0.251040255523
            Iterations: 16
            Function evaluations: 96
            Gradient evaluations: 16
Optimization terminated successfully.    (Exit mode 0)
            Current function value: 0.256507057505
            Iterations: 17
            Function evaluations: 102
            Gradient evaluations: 17
Optimization terminated successfully.    (Exit mode 0)
            Current function value: 0.251444686933
            Iterations: 16
            Function evaluations: 96
            Gradient evaluations: 16
Optimization terminated successfully.    (Exit mode 0)
            Current function value: 0.256044702564
            Iterations: 23
            Function evaluations: 138
            Gradient evaluations: 23
Optimization terminated successfully.    (Exit mode 0)
            Current function value: 0.250940234638
            Iterations: 11
            Function evaluations: 66


In [22]:
decomp[u'基金名称'] = fund_df[fund_df[u'代码'].isin(codes)][u'名称'].tolist()

In [23]:
if u'市值占净值' in fund_df.columns:
    decomp[u'市值占净值'] = fund_df[fund_df[u'代码'].isin(codes)][u'市值占净值'].tolist()

In [24]:
decomp = decomp[decomp.columns[4:].tolist() + decomp.columns[:4].tolist()]

In [25]:
decomp

Unnamed: 0_level_0,基金名称,市值占净值,金融地产,可选消费,必选医药,科技
基金代码,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
070002.OF,嘉实增长,0.2,0.225033,0.0767664,0.698201,2.03627e-18
000595.OF,嘉实泰和,0.140396,-1.0349e-17,5.25476e-18,0.720364,0.279636
070021.OF,嘉实主题新动力,0.2,0.0409531,0.0737921,0.193036,0.692218
003634.OF,嘉实农业产业,0.199999,0.105178,0.335179,0.467355,0.092288
004477.OF,嘉实沪港深回报,0.199999,0.46926,0.0716834,0.459057,-3.72694e-18
005267.OF,嘉实价值精选,0.017521,0.446564,0.291024,0.262411,-5.60736e-18
005303.OF,嘉实医药健康A,0.042069,8.89046e-18,0.0,1.0,-2.71051e-18


In [26]:
if u'市值占净值' in fund_df.columns:
    decomp = decomp.append(pd.DataFrame([['', '',
                           np.dot(decomp[u'金融地产'], fund_df[u'市值占净值']) / fund_df[u'市值占净值'].sum(),
                           np.dot(decomp[u'可选消费'], fund_df[u'市值占净值']) / fund_df[u'市值占净值'].sum(),
                           np.dot(decomp[u'必选医药'], fund_df[u'市值占净值']) / fund_df[u'市值占净值'].sum(),
                           np.dot(decomp[u'科技'], fund_df[u'市值占净值']) / fund_df[u'市值占净值'].sum(), 
                           ]], index=[u'组合'], columns=decomp.columns))

In [27]:
decomp.to_excel(u'../data/股票基金收益率分解.xlsx')
# decomp.to_excel(u'../data/嘉实股票基金收益率分解.xlsx')