In [3]:
# Import the modules
import pandas as pd
import numpy as np
from datetime import datetime
from tqdm import tqdm_notebook as tqdm
import statsmodels.api as sm
from astropy.table import QTable, Table, Column
from astropy import units as u

In [4]:
# Import Data
df_monthly = pd.read_excel('PredictorData2019.xlsx',sheet_name="Monthly")
# Parse the dates properly
time = [str(d) for d in df_monthly.yyyymm]
df_monthly.index = pd.to_datetime(time,format="%Y%m")

In [5]:
# Variable construction
df_monthly['ExRet'] = df_monthly['CRSP_SPvw']-df_monthly['Rfree']
df_monthly['DP'] = np.log(df_monthly['D12'])-np.log(df_monthly['Index'])
df_monthly['DY'] = np.log(df_monthly['D12'])-np.log(df_monthly['Index'].shift())
df_monthly['EP'] = np.log(df_monthly['E12'])-np.log(df_monthly['Index'])
df_monthly['DE'] = np.log(df_monthly['D12'])-np.log(df_monthly['E12'])
df_monthly['tms'] = df_monthly['lty']-df_monthly['tbl']
df_monthly['dfr'] = df_monthly['corpr']-df_monthly['ltr']
df_monthly['dfy'] = df_monthly['BAA']-df_monthly['AAA']


# infl needs to be lagged one more month
df_monthly['infl'] = df_monthly['infl'].shift().copy()

# Construction of dependent and independent variables
dep_var = 'ExRet'
indep_vars = ['DE','svar','dfr','lty','ltr','infl','tms','tbl','dfy','DP','DY','EP','b/m','ntis']

# Use the data from 1926/12 to 2019/12
subperiod = df_monthly.index>='1926-12-01'
df = df_monthly[subperiod]
M = 240 # Initial length of estimation window
gam = 3 # risk aversion coefficient

# Create the benchmark using historical average
Hist_Mean = np.asarray(df[dep_var].expanding().mean().shift())
Hist_Variance = np.asarray(df[dep_var].expanding().var().shift())

# Benchmark SSE (Historical Average)
OOS_SSE_Hist = np.sum((df[dep_var][M+1:]-Hist_Mean[M+1:])**2)

# Benchmark Certainty Equivalence
w0 = ((1/gam)*(Hist_Mean/Hist_Variance)).clip(None,1.5);
r0 = df[dep_var]*w0
CE_Hist = np.mean(r0[M+1:])-gam/2*np.var(r0[M+1:],ddof=1)

### The following codes demonstrate how to compute OOS $R^2$ and CEV for one predictive regression (using DY)

In [101]:
Y = np.asarray(df[dep_var])
X = np.asarray(df['DY'])
Y_Hat = np.full(len(Y), np.nan)
X = sm.add_constant(X)
# Note that we start the index at M+1 because the first element of predicted return is at t=M+2.
for i in range(M+1,len(Y)):
    Y1 = Y[1:i]
    X1 = X[0:i-1,:] 
    reg = sm.OLS(Y1, X1, missing='drop').fit()
    Y_Hat[i] = reg.predict(X[i-1,:]) 

    # The predicted value is based on the observation before

In [50]:
OOS_SSE = np.sum((Y[M+1:]-Y_Hat[M+1:])**2)
OOS_R2 = 1-OOS_SSE/OOS_SSE_Hist
w1 = ((1/gam)*(Y_Hat/Hist_Variance)).clip(None,1.5);
r1 = Y*w1
CE = np.mean(r1[M+1:])-gam/2*np.var(r1[M+1:],ddof=1)



### In-sample $R^2$ and out-of-sample $R^2$

In [51]:
reg1 = sm.OLS(Y[1:],X[0:len(Y)-1,:],missing='drop').fit()
IS_R2 = reg1.rsquared
print("IS R^2 = %6.3f"%(100.0*IS_R2))
print("OOS R^2 = %6.3f"%(100.0*OOS_R2))

IS R^2 =  0.435
OOS R^2 = -0.920


### $\Delta CEV$

In [52]:
print('Difference in Certainty Equivalence = %7.4f'%(100*(CE-CE_Hist)))

Difference in Certainty Equivalence = -0.1489


In [23]:
#function for finding the three main deliverables
def r2(var):
    Y = np.asarray(df[dep_var])
    X = np.asarray(df[var])
    Y_Hat = np.full(len(Y), np.nan)
    X = sm.add_constant(X)
    for i in range(M+1,len(Y)):
        Y1 = Y[1:i]
        X1 = X[0:i-1,:] 
        reg = sm.OLS(Y1, X1, missing='drop').fit()
        Y_Hat[i] = reg.predict(X[i-1,:])       # The predicted value is based on the observation before
    OOS_SSE = np.sum((Y[M+1:]-Y_Hat[M+1:])**2)
    OOS_R2 = 1-OOS_SSE/OOS_SSE_Hist
    w1 = ((1/gam)*(Y_Hat/Hist_Variance)).clip(None,1.5);
    r1 = Y*w1
    CE = np.mean(r1[M+1:])-gam/2*np.var(r1[M+1:],ddof=1)
    reg1 = sm.OLS(Y[1:],X[0:len(Y)-1,:],missing='drop').fit()
    IS_R2 = reg1.rsquared
    return((100.0*IS_R2))

def oos(var):
    Y = np.asarray(df[dep_var])
    X = np.asarray(df[var])
    Y_Hat = np.full(len(Y), np.nan)
    X = sm.add_constant(X)
    for i in range(M+1,len(Y)):
        Y1 = Y[1:i]
        X1 = X[0:i-1,:] 
        reg = sm.OLS(Y1, X1, missing='drop').fit()
        Y_Hat[i] = reg.predict(X[i-1,:])       # The predicted value is based on the observation before
    OOS_SSE = np.sum((Y[M+1:]-Y_Hat[M+1:])**2)
    OOS_R2 = 1-OOS_SSE/OOS_SSE_Hist
    w1 = ((1/gam)*(Y_Hat/Hist_Variance)).clip(None,1.5);
    r1 = Y*w1
    CE = np.mean(r1[M+1:])-gam/2*np.var(r1[M+1:],ddof=1)
    reg1 = sm.OLS(Y[1:],X[0:len(Y)-1,:],missing='drop').fit()
    IS_R2 = reg1.rsquared
    return ((100.0*OOS_R2))

def cev(var):
    Y = np.asarray(df[dep_var])
    X = np.asarray(df[var])
    Y_Hat = np.full(len(Y), np.nan)
    X = sm.add_constant(X)
    for i in range(M+1,len(Y)):
        Y1 = Y[1:i]
        X1 = X[0:i-1,:] 
        reg = sm.OLS(Y1, X1, missing='drop').fit()
        Y_Hat[i] = reg.predict(X[i-1,:])       # The predicted value is based on the observation before
    OOS_SSE = np.sum((Y[M+1:]-Y_Hat[M+1:])**2)
    OOS_R2 = 1-OOS_SSE/OOS_SSE_Hist
    w1 = ((1/gam)*(Y_Hat/Hist_Variance)).clip(None,1.5);
    r1 = Y*w1
    CE = np.mean(r1[M+1:])-gam/2*np.var(r1[M+1:],ddof=1)
    reg1 = sm.OLS(Y[1:],X[0:len(Y)-1,:],missing='drop').fit()
    IS_R2 = reg1.rsquared
    return ((100*(CE-CE_Hist)))

In [58]:
t = Table()

t = Table(names=("Variable",'In Sample R^2', 'Out of sample R^2', 'CEV'), dtype=("S2",'f4','f4','f4'))
#t.add_row((r2("svar"), oos("svar"), cev("svar")))

for variable in indep_vars:
    t.add_row((variable, r2(variable), oos(variable), cev(variable)))
    
print(t)

#Q1- finding report from predictive regression


Variable In Sample R^2 Out of sample R^2     CEV     
-------- ------------- ----------------- ------------
      DE  0.0006394605        -0.7178052  0.003935941
    svar   0.009658814        -0.5716513  -0.04712237
     dfr   0.114465244       -0.20323741  0.001140755
     lty     0.1896013       -0.32233942  -0.07532967
     ltr     0.2963518        -0.1928927   0.02597267
    infl    0.13845016         -0.035171 -0.011508059
     tms    0.15968142       0.119112745    0.0493356
     tbl    0.31838322        0.24510683 -0.013011963
     dfy    0.24592756       -0.35886228 -0.061638128
      DP    0.32735413       -0.27166054   -0.1182665
      DY    0.43484208       -0.92020184   -0.1488644
      EP    0.38387606        -1.5207888 -0.089466274
     b/m    0.63807136        -2.3932865  -0.21761905
    ntis     0.4635411        -0.8107004  0.046325497


In [None]:
#The in sample r^2 are all all close to 0 and positive
#Out of sample r^2 is negative and have large numbers
#this demonstrates the data is not linear and has a lot of outliers
#CEV: predicts risk tolreance- these low numbers indicate low appetite for risk

In [None]:
#kitchen sink regression

In [20]:
#function for finding the three main deliverables
def r21(varlist):
    Y = np.asarray(df[dep_var])
    X = np.asarray(df[varlist])
    Y_Hat = np.full(len(Y), np.nan)
    X = sm.add_constant(X)
    for i in range(M+1,len(Y)):
        Y1 = Y[1:i]
        X1 = X[0:i-1,:] 
        reg = sm.OLS(Y1, X1, missing='drop').fit()
        Y_Hat[i] = reg.predict(X[i-1,:])       # The predicted value is based on the observation before
    OOS_SSE = np.sum((Y[M+1:]-Y_Hat[M+1:])**2)
    OOS_R2 = 1-OOS_SSE/OOS_SSE_Hist
    w1 = ((1/gam)*(Y_Hat/Hist_Variance)).clip(None,1.5);
    r1 = Y*w1
    CE = np.mean(r1[M+1:])-gam/2*np.var(r1[M+1:],ddof=1)
    reg1 = sm.OLS(Y[1:],X[0:len(Y)-1,:],missing='drop').fit()
    IS_R2 = reg1.rsquared
    return((100.0*IS_R2))

def oos1(varlist):
    Y = np.asarray(df[dep_var])
    X = np.asarray(df[varlist])
    Y_Hat = np.full(len(Y), np.nan)
    X = sm.add_constant(X)
    for i in range(M+1,len(Y)):
        Y1 = Y[1:i]
        X1 = X[0:i-1,:] 
        reg = sm.OLS(Y1, X1, missing='drop').fit()
        Y_Hat[i] = reg.predict(X[i-1,:])       # The predicted value is based on the observation before
    OOS_SSE = np.sum((Y[M+1:]-Y_Hat[M+1:])**2)
    OOS_R2 = 1-OOS_SSE/OOS_SSE_Hist
    w1 = ((1/gam)*(Y_Hat/Hist_Variance)).clip(None,1.5);
    r1 = Y*w1
    CE = np.mean(r1[M+1:])-gam/2*np.var(r1[M+1:],ddof=1)
    reg1 = sm.OLS(Y[1:],X[0:len(Y)-1,:],missing='drop').fit()
    IS_R2 = reg1.rsquared
    return ((100.0*OOS_R2))

def cev1(varlist):
    Y = np.asarray(df[dep_var])
    X = np.asarray(df[varlist])
    Y_Hat = np.full(len(Y), np.nan)
    X = sm.add_constant(X)
    for i in range(M+1,len(Y)):
        Y1 = Y[1:i]
        X1 = X[0:i-1,:] 
        reg = sm.OLS(Y1, X1, missing='drop').fit()
        Y_Hat[i] = reg.predict(X[i-1,:])       # The predicted value is based on the observation before
    OOS_SSE = np.sum((Y[M+1:]-Y_Hat[M+1:])**2)
    OOS_R2 = 1-OOS_SSE/OOS_SSE_Hist
    w1 = ((1/gam)*(Y_Hat/Hist_Variance)).clip(None,1.5);
    r1 = Y*w1
    CE = np.mean(r1[M+1:])-gam/2*np.var(r1[M+1:],ddof=1)
    reg1 = sm.OLS(Y[1:],X[0:len(Y)-1,:],missing='drop').fit()
    IS_R2 = reg1.rsquared
    return ((100*(CE-CE_Hist)))

In [70]:
k_vars = ['svar','dfr','lty','ltr','infl','tbl','dfy','DP','DY','EP','b/m','ntis']

kitchensink = Table()

kitchensink = Table(names=('In Sample R^2', 'Out of sample R^2', 'CEV'))
    
kitchensink.add_row((r21(k_vars), oos1(k_vars), cev1(k_vars)))
 
#result for kitchen sink regression
print(kitchensink)


  In Sample R^2     Out of sample R^2          CEV        
------------------ ------------------- -------------------
3.1219264453077655 -12.468861821616306 -0.4585651279479323


In [None]:
#running a multiple regression does not improve the linearity of the results
#The in sample R^2 and out of sample R^2 become more extreme
# it is better to predict results using simple regression

In [91]:
#question 2- market risk premium non-negative

#redefining functions under this new rule 

def oos2(var):
    Y = np.asarray(df[dep_var])
    X = np.asarray(df[var])
    Y_Hat = np.full(len(Y), np.nan)
    X = sm.add_constant(X)
    for i in range(M+1,len(Y)):
        Y1 = Y[1:i]
        X1 = X[0:i-1,:] 
        reg = sm.OLS(Y1, X1, missing='drop').fit()
        if reg.predict(X[i-1,:])<0:
            Y_Hat[i]=0
        else:
            Y_Hat[i] = reg.predict(X[i-1,:])     #the change is made here- all negative values are now 0
    OOS_SSE = np.sum((Y[M+1:]-Y_Hat[M+1:])**2)
    OOS_R2 = 1-OOS_SSE/OOS_SSE_Hist
    w1 = ((1/gam)*(Y_Hat/Hist_Variance)).clip(None,1.5);
    r1 = Y*w1
    CE = np.mean(r1[M+1:])-gam/2*np.var(r1[M+1:],ddof=1)
    reg1 = sm.OLS(Y[1:],X[0:len(Y)-1,:],missing='drop').fit()
    IS_R2 = reg1.rsquared
    return ((100.0*OOS_R2))

def cev2(var):
    Y = np.asarray(df[dep_var])
    X = np.asarray(df[var])
    Y_Hat = np.full(len(Y), np.nan)
    X = sm.add_constant(X)
    for i in range(M+1,len(Y)):
        Y1 = Y[1:i]
        X1 = X[0:i-1,:] 
        reg = sm.OLS(Y1, X1, missing='drop').fit()
        if reg.predict(X[i-1,:])<0:
            Y_Hat[i]=0
        else:
            Y_Hat[i] = reg.predict(X[i-1,:]) 
    OOS_SSE = np.sum((Y[M+1:]-Y_Hat[M+1:])**2)
    OOS_R2 = 1-OOS_SSE/OOS_SSE_Hist
    w1 = ((1/gam)*(Y_Hat/Hist_Variance)).clip(None,1.5);
    r1 = Y*w1
    CE = np.mean(r1[M+1:])-gam/2*np.var(r1[M+1:],ddof=1)
    reg1 = sm.OLS(Y[1:],X[0:len(Y)-1,:],missing='drop').fit()
    IS_R2 = reg1.rsquared
    return ((100*(CE-CE_Hist)))


In [94]:
#single variable regression
t1 = Table()

t1 = Table(names=("Variable", 'Out of sample R^2', 'CEV'), dtype=("S2",'f4','f4'))

for variable in indep_vars:
    t1.add_row((variable, oos2(variable), cev2(variable)))
    
print(t1)

#table that gives out of sample R^2 and CEV using only positive market risk premiums (Y^)

Variable Out of sample R^2     CEV     
-------- ----------------- ------------
      DE       -0.69191146 0.0063462476
    svar        -0.5716513  -0.04712237
     dfr       -0.27880135 -0.004435554
     lty        0.51333714 -0.013300285
     ltr       -0.17034924  0.026736056
    infl      -0.006068724  -0.00967761
     tms        0.13921714    0.0496991
     tbl         0.4560654 -0.002042573
     dfy       -0.34492996 -0.061112046
      DP        0.12286735  -0.08646371
      DY       -0.09960274 -0.089261435
      EP        -0.7773259  -0.01412735
     b/m        -1.7703474   -0.1690757
    ntis        -0.8107004  0.046325497


In [None]:
#Making the market risk premium non-negative imrpoved linearity of results 
# This function is a better way of predicting expected returns- there are less outliers

In [16]:
#multivariable regression

#redefining functions 

def oos3(varlist):
    Y = np.asarray(df[dep_var])
    X = np.asarray(df[varlist])
    Y_Hat = np.full(len(Y), np.nan)
    X = sm.add_constant(X)
    for i in range(M+1,len(Y)):
        Y1 = Y[1:i]
        X1 = X[0:i-1,:] 
        reg = sm.OLS(Y1, X1, missing='drop').fit()
        if reg.predict(X[i-1,:])<0:
            Y_Hat[i]=0
        else:
            Y_Hat[i] = reg.predict(X[i-1,:])     #the change is made here- all negative values are now 0
    OOS_SSE = np.sum((Y[M+1:]-Y_Hat[M+1:])**2)
    OOS_R2 = 1-OOS_SSE/OOS_SSE_Hist
    w1 = ((1/gam)*(Y_Hat/Hist_Variance)).clip(None,1.5);
    r1 = Y*w1
    CE = np.mean(r1[M+1:])-gam/2*np.var(r1[M+1:],ddof=1)
    reg1 = sm.OLS(Y[1:],X[0:len(Y)-1,:],missing='drop').fit()
    IS_R2 = reg1.rsquared
    return ((100.0*OOS_R2))

def cev3(varlist):
    Y = np.asarray(df[dep_var])
    X = np.asarray(df[varlist])
    Y_Hat = np.full(len(Y), np.nan)
    X = sm.add_constant(X)
    for i in range(M+1,len(Y)):
        Y1 = Y[1:i]
        X1 = X[0:i-1,:] 
        reg = sm.OLS(Y1, X1, missing='drop').fit()
        if reg.predict(X[i-1,:])<0:
            Y_Hat[i]=0
        else:
            Y_Hat[i] = reg.predict(X[i-1,:]) 
    OOS_SSE = np.sum((Y[M+1:]-Y_Hat[M+1:])**2)
    OOS_R2 = 1-OOS_SSE/OOS_SSE_Hist
    w1 = ((1/gam)*(Y_Hat/Hist_Variance)).clip(None,1.5);
    r1 = Y*w1
    CE = np.mean(r1[M+1:])-gam/2*np.var(r1[M+1:],ddof=1)
    reg1 = sm.OLS(Y[1:],X[0:len(Y)-1,:],missing='drop').fit()
    IS_R2 = reg1.rsquared
    return ((100*(CE-CE_Hist)))

In [17]:
k_vars = ['svar','dfr','lty','ltr','infl','tbl','dfy','DP','DY','EP','b/m','ntis']

kitchensink = Table()

kitchensink = Table(names=('Out of sample R^2', 'CEV'))
    
kitchensink.add_row((oos3(k_vars), cev3(k_vars)))
 
#result for kitchen sink regression
print(kitchensink)

Out of sample R^2           CEV         
------------------ ---------------------
-6.803982814387743 -0.002086763174287973


In [None]:
#while the out of sample R^2 is less extreme, it is still less accurate compared to the simple regression analysis

In [None]:
#question 3- two combination forecasts

In [6]:
#find the combination forcast using mean and median
#reinitialzing a new database using the values from the old database
df1= pd.DataFrame(columns=["B/M", "TBL", "LTY", "NTIS", "INFL", "LTR", "SVAR", "dy", "dp", "ep", "de", "TMS", "DFR", "DFY"])
df1["B/M"]= df['b/m']
df1["TBL"]= df['tbl']
df1["LTY"]= df["lty"]
df1["NTIS"]= df["ntis"]
df1["INFL"]= df["infl"]
df1["LTR"]= df["ltr"]
df1["SVAR"]= df["svar"]
df1["dy"]=df["DY"]
df1["dp"]= df["DP"]
df1["ep"]= df["EP"]
df1["de"]= df["DE"]
df1["TMS"]= df["tms"]
df1["DFR"]= df["dfr"]
df1["DFY"]= df["dfy"]


df["mean_var"]= df1.mean(axis=1)
df1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["mean_var"]= df1.mean(axis=1)


Unnamed: 0,B/M,TBL,LTY,NTIS,INFL,LTR,SVAR,dy,dp,ep,de,TMS,DFR,DFY
1926-12-01,0.441476,0.0307,0.0354,0.050885,0.005682,0.0078,0.000465,-2.956570,-2.973012,-2.386837,-0.586175,0.0047,-0.0022,0.0100
1927-01-01,0.443706,0.0323,0.0351,0.050833,0.000000,0.0075,0.000470,-2.963349,-2.942374,-2.374773,-0.567601,0.0028,-0.0019,0.0095
1927-02-01,0.428501,0.0329,0.0347,0.051681,-0.011299,0.0088,0.000287,-2.932946,-2.979535,-2.430353,-0.549182,0.0018,-0.0019,0.0092
1927-03-01,0.469765,0.0320,0.0331,0.046370,-0.005714,0.0253,0.000924,-2.970053,-2.976535,-2.445079,-0.531456,0.0011,-0.0170,0.0092
1927-04-01,0.456754,0.0339,0.0333,0.050518,-0.005747,-0.0005,0.000603,-2.967143,-2.984225,-2.471309,-0.512916,-0.0006,0.0060,0.0090
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-08-01,0.237917,0.0195,0.0163,-0.010244,0.001671,0.0797,0.004318,-3.959588,-3.941330,-3.086025,-0.855305,-0.0032,-0.0059,0.0089
2019-09-01,0.233377,0.0189,0.0170,-0.010959,-0.000051,-0.0192,0.000605,-3.934654,-3.951689,-3.108987,-0.842702,-0.0019,0.0002,0.0088
2019-10-01,0.232261,0.0165,0.0171,-0.013267,0.000783,-0.0052,0.001510,-3.945758,-3.965984,-3.112869,-0.853115,0.0006,0.0058,0.0091
2019-11-01,0.223938,0.0154,0.0181,-0.007907,0.002286,-0.0059,0.000306,-3.960088,-3.993568,-3.130267,-0.863301,0.0027,0.0073,0.0088


In [10]:
#finding out of sample r^2 and cev for mean 

Y= np.asarray(df[dep_var])
X= np.asarray(df['mean_var'])
Y_Hat= np.full(len(Y), np.nan)
X= sm.add_constant(X)

for a in range(M+1, len(Y)):
    Y1= Y[1:a]
    X1=X[0:a-1,:]
    reg= sm.OLS(Y1,X1, missing="drop").fit()
    Y_Hat[a]= reg.predict(X[a-1,:])
    
OOS_SSE= np.sum((Y[M+1:]-Y_Hat[M+1:])**2)
OOS_R2= 1-OOS_SSE/OOS_SSE_Hist
w1= ((1/gam)*(Y_Hat/Hist_Variance)).clip(None,1.5);
r1= Y*w1
CE = np.mean(r1[M+1:])-gam/2*np.var(r1[M+1:],ddof=1)

arr=np.array(Y_Hat).tolist()
df1["Y_m_pred"]=arr

print("OOS R^2 = %6.3f"%(100.0*OOS_R2))
print('Difference in Certainty Equivalence = %7.4f'%(100*(CE-CE_Hist)))



OOS R^2 = -0.942
Difference in Certainty Equivalence = -0.1530


In [11]:
# repeat same process for median
df["median_var"]= df1.median(axis=1)

Y= np.asarray(df[dep_var])
X= np.asarray(df['median_var'])
Y_Hat= np.full(len(Y), np.nan)
X= sm.add_constant(X)

for a in range(M+1, len(Y)):
    Y1= Y[1:a]
    X1=X[0:a-1,:]
    reg= sm.OLS(Y1,X1, missing="drop").fit()
    Y_Hat[a]= reg.predict(X[a-1,:])
    
OOS_SSE= np.sum((Y[M+1:]-Y_Hat[M+1:])**2)
OOS_R2= 1-OOS_SSE/OOS_SSE_Hist
w1= ((1/gam)*(Y_Hat/Hist_Variance)).clip(None,1.5);
r1= Y*w1
CE = np.mean(r1[M+1:])-gam/2*np.var(r1[M+1:],ddof=1)

arr=np.array(Y_Hat).tolist()
df1["Y_m_pred"]=arr

print("OOS R^2 = %6.3f"%(100.0*OOS_R2))
print('Difference in Certainty Equivalence = %7.4f'%(100*(CE-CE_Hist)))


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["median_var"]= df1.median(axis=1)


OOS R^2 = -0.592
Difference in Certainty Equivalence = -0.0165


In [None]:
#Interpretation: values for mean are not a good predictor as the numbers are high and away from 0- however they are a better predictor compared to regression
#for median- it is a good predictor, the out of sample r^2 and CEV is close to 0 