# PPP Replication Code (Version 0.92)

In [1]:
import pandas as pd
import numpy as np

In [2]:
from UtilityFunctionModule import UtilityFunction
from WeightFunctionModule import WeightFunction
from SupportFunction import (portfolio_return_calculation,
                             single_factor_reshape, 
                             panel_data_apply,
                             panel_data_re_frequency,
                             factor_data_reshape)
from DataManipulation import BasicManipulation
from test_support_function import panel_data_shift

## Data Cleaning and Manipulation Step

### Import Sample Data

In [3]:
price_data = pd.read_csv("Price.csv")
factor_data = pd.read_csv("Factor.csv")

In [47]:
price_data.date = pd.to_datetime(price_data.date,yearfirst=True)
factor_data.date = pd.to_datetime(factor_data.date,yearfirst=True)
price_data = price_data.sort_values(by=["ticker","date"])
factor_data = factor_data.sort_values(by=["ticker","date"])

In [48]:
price_data.index = price_data.date
factor_data.index = factor_data.date

Obtain the ticker list. The ticker list contains the universe of stocks that you want to form portfolio with.

The assets are reordered based on the relative value of the string.

In [54]:
ticker_list = list(set(price_data.ticker))
ticker_list.sort()

## Calculate Return and Change Format of Factor Data

In [55]:
frequency = "BM"

In [56]:
data_manipulation = BasicManipulation()

In [57]:
ret_df = data_manipulation.multiple_asset_return_calculation(price_data,
                                                             by="ticker",
                                                             target="price",
                                                             index_name="date",
                                                             ticker_list=ticker_list,
                                                             frequency=frequency,log_ret=False)

### Need a Step to match the factor to the return data

one way to achieve this is to match the frequency of the factor to the same frequency as the ret data. This works under the following two conditions
1. factor data has the same date as price data
2. the date used are roughly close to the end of the quarter

In [59]:
factor_data = panel_data_re_frequency(ticker_list,dataframe=factor_data,frequency=frequency,method="ffill")

Now we can return to the main process.
All these tedius process of data manipulation should be avoided in the future stage by building a better database and data manipulation module.

In [60]:
N = len(ticker_list)
T = len(factor_data[factor_data.ticker==ticker_list[0]].date)

In [61]:
factor_dic = factor_data_reshape(factor_colname_list=["factor_1","factor_2","factor_3"]
                                 ,dataframe=factor_data
                                 ,number_of_assets=N
                                 ,number_of_dates=T)

In [62]:
factor_list = list(factor_dic.keys())
factor_list.sort()

## Remark

1. Need a data extraction module with methods such as getting all daily data, all monthly data, all quarterly data...
2. Need a dataframe reshape module

## Formal Portfolio Optimization Step

1. weight_func

In [63]:
wb = np.ones([T,N])/len(ticker_list)

In [64]:
weight_creation = WeightFunction(factor_dic)
weight_func = weight_creation.linear_weight_function(wb)

2. ret_mat

In [66]:
def panel_data_shift(ticker_list,dataframe,colname,shift_step):
    
    storage = []

    for ticker in ticker_list:
        sub_df = dataframe[dataframe["ticker"]==ticker]
        sub_df[colname] = sub_df[colname].shift(shift_step)
        storage.append(sub_df)
        
    result = pd.concat(storage)

In [67]:
storage = []
for ticker in ticker_list:
    sub_df = ret_df[ret_df["ticker"]==ticker]
    sub_df["price"] = sub_df["price"].shift(-1)
    storage.append(sub_df)
ret_df_forward = pd.concat(storage)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


In [68]:
ret_df_forward = ret_df_forward.sort_values(by=["ticker","date"])

In [69]:
ret_mat = single_factor_reshape(factor_series = ret_df_forward.price,
                              number_of_assets = N,
                              number_of_dates = T)

3. util

In [70]:
utility_func_creator = UtilityFunction()

In [71]:
util = utility_func_creator.crra_utility(risk_aversion=5)

4. portfolio_return_cal

In [72]:
def portfolio_return_cal(theta,weight_func,ret_mat):
    
    w_mat = weight_func(theta)
    wet_ret = w_mat * ret_mat
    rp = np.apply_along_axis(np.sum,1,wet_ret)
    return rp

### Final Objective Function

In [73]:
def objective_func(theta):
    
    rp = portfolio_return_cal(theta,weight_func,ret_mat)
    
    util_vec = util(rp)
    
    ave_util = np.mean(util_vec)
    
    return -ave_util

## Optimizer

In [74]:
import scipy.optimize as opt

In [75]:
x0 = [1.0,10.0,1.0]
myoptions = {"disp":True}

In [76]:
opt_results = opt.minimize(objective_func,x0,options=myoptions,method = 'Nelder-Mead')

Optimization terminated successfully.
         Current function value: 0.244971
         Iterations: 140
         Function evaluations: 254


In [77]:
theta = opt_results['x']
theta = np.reshape(theta,[theta.shape[0],1])

In [78]:
opt.minimize(objective_func,x0,options=myoptions,method = 'Powell')

Optimization terminated successfully.
         Current function value: 0.244974
         Iterations: 3
         Function evaluations: 101


   direc: array([[ 0.        ,  0.        ,  1.        ],
       [ 0.        ,  1.        ,  0.        ],
       [ 2.06998113, -0.86347338, -2.07754822]])
     fun: 0.24497426302800646
 message: 'Optimization terminated successfully.'
    nfev: 101
     nit: 3
  status: 0
 success: True
       x: array([-4.43730369,  4.91596928, -9.51004765])

## Predict Future Weight

In [79]:
def weight_prediction(wb,theta,factor):
    '''
    :pars:wb(arrary) N X 1
    :pars:theta(array) K X1
    :pars:factor(array) K X N
    '''
    N = wb.shape[0]
    active_weight = np.matmul(factor.T,theta)/N
    final_weight = wb + active_weight
    return final_weight

def get_new_factor_value(factor_dic,factor_list,new_factor_row=-1):
    
    new_factor_list = []
    
    for factor in factor_list:
        single_factor_df = factor_dic[factor]
        new_single_factor = single_factor_df[new_factor_row,:]
        new_factor_list.append(new_single_factor)
    
    new_factor_list = np.array(new_factor_list)
    
    return new_factor_list

In [80]:
wb = np.ones((N,1))/N

In [81]:
factor = get_new_factor_value(factor_dic,factor_list,new_factor_row=-1)

In [82]:
weight_prediction(wb,theta,factor)

array([[0.2030573 ],
       [0.31436766],
       [0.26265535],
       [0.30058318],
       [0.12068953]])