# Attribution Demo Case 2: AddingFactors

## Environment setup

In [6]:
import sys
sys.path.append("../src/")

## Basic configuration

In [7]:
from attribution.common.constants import wq
from attribution.common.utils import ParamRFunc
from attribution.attribution_core import (
    data_query, filtered_by_in, portfolio_weights_regularization
)

from attribution.attribution_class import Attribution

2020-07-21 11:41:19,473 - lquantPy.LQuant - INFO - Initial LQuant. This may take some time...
2020-07-21 11:41:19,474 - lquantPy.LQuant - INFO - Initialized LQuant environment
2020-07-21 11:41:19,475 - lquantPy.LQuant - INFO - Initializing LQuant, This will take some time....


Library Path -Djava.library.path=/usr/local/lib/R/site-library/rJava/jri


In [9]:
sample_config = {
    "factor_universe": "CIQ_INDEX_2668794",
    "factor_universe_name": "Russell 1000",
    "port_id": "FS_Event_US",
    "port_name": "FS Event",
    "weight_tag": "WEIGHT",
    "style_factors": [
        "EPSYLD_LTM_B", "RTN_12M1M", "ES_EPS_NTM_R3M", "ROE",
        "REAL_VOL", "MKTCAP", "DIVYLD_TRL",
        "BOOKP", "GR_EINTR_EPS", "SMART_CROWDING", "HF_CROWDING",
    ],
    "style_factor_names": [
        "Earnings Yld", "Momentum (12M-1M)", "Revision", "Profitability",
        "Volatility", "Size (MktCap)", "Dividend Yld",
        "Book To Market", "FY1/FY0 Exp Growth",
        "Smart Crowding", "Hedge Fund Crowding",
    ],
    "style_factor_from_lquant": [
        "EPSYLD_LTM_B", "RTN_12M1M", "ES_EPS_NTM_R3M", "ROE",
        "REAL_VOL", "MKTCAP", "DIVYLD_TRL",
        "BOOKP", "GR_EINTR_EPS",
    ],
    "base_dir": "/mnt/ebs1/data/hwei/attribution/test/case2",
    "sector_spliting_functions": [
        ParamRFunc("function(x) { str_replace(x, '^(?!35)([0-9]{2})[0-9]{6}$', '\\\\1') }"),
        ParamRFunc("function(x) { str_replace(x, '^([0-9]{4})[0-9]{4}$', '\\\\1') }"),
    ],
}

## Get Portofolio Weight Matrix
- Raw data from data_query function
- market value from filtered_by_in function
- portofolio weight matrix from portfolio_weights_regularization function

In [10]:
def get_portfolio_date_list(port_id):
    var_name = "tmp"
    r_cmd = "{} <- wq.port.get(id = '{}')$dates()".format(var_name, port_id)
    wq.env().run(r_cmd)
    date_list = wq.env().get(var_name).as_string_array()
    wq.env().run("rm({})".format(var_name))
    return date_list

In [11]:
portfolio_date_list = get_portfolio_date_list(port_id=sample_config["port_id"])
print("Portfolio[{}], Start[{}], End[{}]".format(sample_config["port_id"], portfolio_date_list[0], portfolio_date_list[-1]))

Portfolio[FS_Event_US], Start[2009-12-31], End[2019-04-02]


In [12]:
raw_data = data_query(
    universe_name=sample_config["port_id"],
    date_list=portfolio_date_list,
    factor_list=["IN_{}".format(sample_config["port_id"]), "{}_{}".format(sample_config["port_id"], sample_config["weight_tag"])],
    s_date=None, e_date=None, freq=None, weekdays_only=False,
    local_mode=False, stocks=False, region=False
)

In [13]:
raw_data["IN_{}".format(sample_config["port_id"])].head()

Unnamed: 0,2009-12-31,2010-01-31,2010-02-28,2010-03-31,2010-04-30,2010-05-31,2010-06-30,2010-07-31,2010-08-31,2010-09-30,...,2018-06-30,2018-07-31,2018-08-31,2018-09-30,2018-10-31,2018-11-30,2018-12-31,2019-02-01,2019-03-04,2019-04-02
61302.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0
163946.01,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
29649.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
105365.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
164494.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0


In [14]:
raw_data["{}_{}".format(sample_config["port_id"], sample_config["weight_tag"])].head()

Unnamed: 0,2009-12-31,2010-01-31,2010-02-28,2010-03-31,2010-04-30,2010-05-31,2010-06-30,2010-07-31,2010-08-31,2010-09-30,...,2018-06-30,2018-07-31,2018-08-31,2018-09-30,2018-10-31,2018-11-30,2018-12-31,2019-02-01,2019-03-04,2019-04-02
61302.01,,,,,,,,,,,...,-0.008496,-0.007895,-0.005339,-0.005153,-0.005146,-0.004664,-0.004488,-0.005303,,
163946.01,0.014763,0.014938,0.016899,0.019331,0.020677,0.021296,0.018412,0.016882,0.018243,0.015199,...,,,,0.005023,0.006676,,,,,
29649.01,,,,,,,,,,,...,,,,,,,,,,
105365.01,,,,,,,,,,,...,,,,,,,,,,0.005186
164494.01,,,,,,,,,,,...,,,,,,-0.005455,-0.005514,-0.005807,,


In [15]:
market_value = filtered_by_in(
    signal_matrix=raw_data["{}_{}".format(sample_config["port_id"], sample_config["weight_tag"])],
    in_matrix=raw_data["IN_{}".format(sample_config["port_id"])]
)

In [16]:
market_value.head()

Unnamed: 0,2009-12-31,2010-01-31,2010-02-28,2010-03-31,2010-04-30,2010-05-31,2010-06-30,2010-07-31,2010-08-31,2010-09-30,...,2018-06-30,2018-07-31,2018-08-31,2018-09-30,2018-10-31,2018-11-30,2018-12-31,2019-02-01,2019-03-04,2019-04-02
61302.01,,,,,,,,,,,...,-0.008496,-0.007895,-0.005339,-0.005153,-0.005146,-0.004664,-0.004488,-0.005303,,
163946.01,0.014763,0.014938,0.016899,0.019331,0.020677,0.021296,0.018412,0.016882,0.018243,0.015199,...,,,,0.005023,0.006676,,,,,
29649.01,,,,,,,,,,,...,,,,,,,,,,
105365.01,,,,,,,,,,,...,,,,,,,,,,0.005186
164494.01,,,,,,,,,,,...,,,,,,-0.005455,-0.005514,-0.005807,,


In [17]:
portfolio_weight = portfolio_weights_regularization(
    portfolio=market_value, method="long_short"
)

In [18]:
portfolio_weight.head()

Unnamed: 0,2009-12-31,2010-01-31,2010-02-28,2010-03-31,2010-04-30,2010-05-31,2010-06-30,2010-07-31,2010-08-31,2010-09-30,...,2018-06-30,2018-07-31,2018-08-31,2018-09-30,2018-10-31,2018-11-30,2018-12-31,2019-02-01,2019-03-04,2019-04-02
61302.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.008496,-0.007895,-0.005339,-0.005153,-0.005146,-0.004664,-0.004488,-0.005303,0.0,0.0
163946.01,0.014763,0.014938,0.016899,0.019331,0.020677,0.021296,0.018412,0.016882,0.018243,0.015199,...,0.0,0.0,0.0,0.005023,0.006676,0.0,0.0,0.0,0.0,0.0
29649.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
105365.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.005186
164494.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,-0.005455,-0.005514,-0.005807,0.0,0.0


## Attribution class, CASE2, AddingFactors
- Setup inputs and configurations, including add_exterior_factors_query_func to setup external factors
- Calculate
- Get raw output data from calculation results
- Generate figures and PDF files 

In [19]:
!rm -rf {sample_config["base_dir"]}
!mkdir -p {sample_config["base_dir"]}

In [24]:
# Load external factors and define query function
r_cmd = "lqtool.load(file = '/mnt/ebs1/data/Client/woodline/own_13f/data/own_usjp_neu_factor_mats.rData')"
wq.env().run(r_cmd)
str_exterior_factors_query_func = """function(fData, configs) {
  SMART_CROWDING_reshape <- lqtool.reshape.factorMatReshape(SMART_CROWDING, targetMat = fData[['PRCCD']], default = as.numeric(NA))
  HF_OWNERSHIP_reshape <- lqtool.reshape.factorMatReshape(HF_OWNERSHIP, targetMat = fData[['PRCCD']], default = as.numeric(NA))
  return(list('HF_CROWDING' = HF_OWNERSHIP_reshape, 'SMART_CROWDING' = SMART_CROWDING_reshape))
}"""
r_exterior_factors_query_func = ParamRFunc(str_exterior_factors_query_func)

In [25]:
attr_cls = Attribution()
attr_cls = attr_cls.set_output_dir(output_dir=sample_config["base_dir"])
attr_cls = attr_cls.set_output_file_name_prefix(file_name_prefix=sample_config["port_id"])
attr_cls = attr_cls.set_portfolio_matrix(portfolio_matrix=portfolio_weight)
attr_cls = attr_cls.run_with_daily_performance(run_daily=True)
attr_cls = attr_cls.set_sector_split_function(digits=0, sector_split_function=sample_config["sector_spliting_functions"])
attr_cls = attr_cls.load_axioma_idio_risk(load=True)
attr_cls = attr_cls.add_exterior_factors_query_func(func=r_exterior_factors_query_func)
attr_cls = attr_cls.set_fmp_universe(fmp_universe=sample_config["factor_universe"], universe_id=True, universe_name=sample_config["factor_universe_name"])
attr_cls = attr_cls.set_style_factors(
    style_factors=sample_config["style_factors"],
    style_factor_names=sample_config["style_factor_names"],
    style_factor_from_lquant=sample_config["style_factor_from_lquant"]
)

rAttribution <- Attribution$new()
rAttribution <- rAttribution$setOutputDir(outputDir = '/mnt/ebs1/data/hwei/attribution/test/case2')
rAttribution <- rAttribution$setOutputFileNamePrefix(fileNamePrefix = 'FS_Event_US', timeStamp = 'u%Y%m%d')
rAttribution <- rAttribution$setPortfolioMatrix(portfolioMatrix = pMat)
rm(pMat)
rAttribution <- rAttribution$runWithDailyPerformance(runDaily = TRUE)
rAttribution <- rAttribution$setSectorSplitFunction(digits = 0, sectorSplitFunction = c(function(x) { str_replace(x, '^(?!35)([0-9]{2})[0-9]{6}$', '\\1') }, function(x) { str_replace(x, '^([0-9]{4})[0-9]{4}$', '\\1') }))
rAttribution <- rAttribution$loadAxiomaIdioRisk(load = TRUE)
rAttribution <- rAttribution$addExteriorFactorsQueryFunc(func = function(fData, configs) {
  SMART_CROWDING_reshape <- lqtool.reshape.factorMatReshape(SMART_CROWDING, targetMat = fData[['PRCCD']], default = as.numeric(NA))
  HF_OWNERSHIP_reshape <- lqtool.reshape.factorMatReshape(HF_OWNERSHIP, targetMat = fData[['PRCCD']], 

In [26]:
attr_cls = attr_cls.calculate()

rAttribution <- rAttribution$calculate()


In [27]:
attr_data = attr_cls.get_output_raw()

res <- rAttribution$getOutputRaw()


AttributeError: 'NoneType' object has no attribute 'toGenericData'

In [15]:
attr_data.keys()

dict_keys(['ICs', 'Low Weight CutRETURN', 'Sectors Turnover', 'Cum Residual Return Contribution', 'Cum SecRetAlloc Contribution', 'Post Cut ResidualCUM_RETURN', 'Low Weight Cut_PERC', 'portfolio_smooth_RETURN', 'Port Adj ADV Perc', 'Correlation', 'FMP Cum Return', 'SecBr_PIT', 'Cum Return Contribution', 'LSreturns', 'Resi Low Weight CutRETURN', 'Bias Stat', 'Concentration Analysis', 'Resi Risk Contribution', 'Sector Decomp Port Return', 'PostSmoothResi_CUM_RETURN', 'Factor Score', 'Cum Return after Adv Cut', 'Post Cut Residual_PERC', 'Residual Risk', 'AttributionSummary_PIT', 'dailyCumReturnContri', 'Exposure (Vol Adj)', 'Portfolio Turnover', 'Cum Portfolio Return Contribution', 'dailyReturnContri_plain', 'fmpDailyReturns', 'Risk Contribution', 'Resi Low Weight CutCUM_RETURN', 'wealth', 'SCs', 'Low Weight CutCUM_RETURN', 'fmpDailyCumReturns', 'portfolio_smooth_CUM_RETURN', 'Rev Concentration Analysis', 'Factor Decile', 'residualSummary_PIT', 'Return after Adv Cut', 'Risk Contribution(%

In [16]:
attr_data["Summary"].head()

Unnamed: 0,Portfolio,Portfolio_COPY
Coverage,305,305
StartDate,2009-12-31,2009-12-31
EndDate,2019-04-04,2019-04-04
Last month return (%),-0.05,-0.05
3 Month return (%),-1.4,-1.4


In [17]:
attr_cls = attr_cls.output_excel()

rAttribution <- rAttribution$outputExcel()


In [18]:
!ls {sample_config["base_dir"]}/data

FS_Event_US_u20200719_SummaryAndTS_20200719211408.xlsx
FS_Event_US_u20200720_SummaryAndTS_20200720191525.xlsx


In [19]:
attr_cls = attr_cls.output_charts()
attr_cls = attr_cls.output_pit_charts()

rAttribution <- rAttribution$outputCharts()
rAttribution <- rAttribution$outputPITCharts()


In [20]:
!ls -R {sample_config["base_dir"]}/charts

/mnt/ebs1/data/hwei/attribution/test/20200719/charts:
FS_Event_US_u20200719	   FS_Event_US_u20200720
FS_Event_US_u20200719_PIT  FS_Event_US_u20200720_PIT

/mnt/ebs1/data/hwei/attribution/test/20200719/charts/FS_Event_US_u20200719:
0_1_summaryTable.png
0_2_residualTable.png
10_ReturnContri_Sectors.png
11_Performance_Sectors.png
12_1_ReturnContribution_Residuals.png
12_CumReturns_Residuals.png
13_Risk_vs_Return_Styles.png
14_Risk_vs_Return_Sectors.png
17_10_Communication_Sectors_Exp_vs_Return.png
17_11_Utilities_Sectors_Exp_vs_Return.png
17_12_Real Estate_Sectors_Exp_vs_Return.png
17_1_Energy_Sectors_Exp_vs_Return.png
17_2_Materials_Sectors_Exp_vs_Return.png
17_3_Industrials_Sectors_Exp_vs_Return.png
17_4_Con Discretionary_Sectors_Exp_vs_Return.png
17_5_Con Staples_Sectors_Exp_vs_Return.png
17_6_HealthCare E&S_Sectors_Exp_vs_Return.png
17_7_Pharm&Bio_Sectors_Exp_vs_Return.png
17_8_Financials_Sectors_Exp_vs_Return.png
17_9_Info Tech_Sectors_Exp_vs_Return.png
18_1_

In [21]:
attr_cls = attr_cls.set_pdf_title(
    title="{} Portfolio Historical Attribution".format(sample_config["port_name"])
)
attr_cls = attr_cls.set_pdf_title_pit(
    title="{} Portfolio Attribution Point-In-Time".format(sample_config["port_name"])
)
attr_cls = attr_cls.generate_historical_pdf()
attr_cls = attr_cls.generate_pit_pdf()

rAttribution <- rAttribution$setPDFTitle(title = 'FS Event Portfolio Historical Attribution')
rAttribution <- rAttribution$setPDFTitle_PIT(title = 'FS Event Portfolio Attribution Point-In-Time')
rAttribution <- rAttribution$generateHistoricalPDF()
rAttribution <- rAttribution$generatePITPDF()


In [22]:
!ls {sample_config["base_dir"]}/*.pdf

/mnt/ebs1/data/hwei/attribution/test/20200719/FS_Event_US_u20200719_Historical.pdf
/mnt/ebs1/data/hwei/attribution/test/20200719/FS_Event_US_u20200719_PIT.pdf
/mnt/ebs1/data/hwei/attribution/test/20200719/FS_Event_US_u20200720_Historical.pdf
/mnt/ebs1/data/hwei/attribution/test/20200719/FS_Event_US_u20200720_PIT.pdf


In [22]:
del attr_cls

## Visualization (TODO)

### End of file