# Analyze correlators

Perform model averaging on 4pt function for free theory
Nov 28, 2022

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from ipywidgets import *
import glob 
import sys
import pickle 
import scipy.special as sc
# from scipy.special import logsumexp as sc.logsumexp

## Import fitting modules
import gvar as gv
import lsqfit


In [2]:
%matplotlib widget

In [3]:
from modules import *

$$ \frac{\langle \phi_4 \rangle }{ \langle \phi_2^2 \rangle} = \sum_{l,\Delta} A_{l,\Delta} \cosh\left[c \ (\Delta + l ) (t-T/2) \right] $$

In [4]:
### Perform model averaging : done in separate .py file as well

# if __name__=="__main__":
    
#     ### Set values for 2pt function and speed of light #####
#     dict_global={}
#     dict_global['4']=  {'2pt':1.126500594310e-02, 'c':2.758810226094e-01}
#     dict_global['8']=  {'2pt':5.591911340388e-03, 'c':1.395714549742e-01}
#     dict_global['16']= {'2pt':2.790831203460e-03, 'c':6.999296068404e-02}
#     print(dict_global)
    
#     ### Load entire data ####
#     data_dict={}

#     s_list=[4,8,16]
#     data_dir='/projectnb/qfe/vayyar/qfe_rs2/qfe_code/qfe_3d/analysis_code/14_free_theory_fits/data/'
#     for s in s_list:
        
#         print(data_dir+'s2xr_free_q5k{0}t*_4pt_pl.dat'.format(s))
#         fname=glob.glob(data_dir+'s2xr_free_q5k{0}t*_4pt_pl.dat'.format(s))[0]
#         print(fname,s)
#         df,Lt=f_get_data_df(fname,dict_global)
#         data_dict[str(s)]={'df':df,'Lt':Lt}
    
#     ### Perform all fits and save results ###
    
#     save_loc='/projectnb/qfe/vayyar/qfe_rs2/qfe_data/fit_results/free_theory_fit_results_rxs2/'

#     cols=['s','l','E0','a0','E1','a1','const']
#     df_results=pd.DataFrame(columns=cols)
#     count=0 
#     # fits_list=[]

#     for s in [str(i) for i in s_list][:]:
#         for l in [0,2,4,6][:]:
#             print(s,l)
#             df_temp=data_dict[s]['df']
#             df_temp=df_temp[df_temp.l==l][['t','coeff']]
#             Lt=data_dict[s]['Lt']

#             tmax=int(np.max(df_temp.t.values))
#             print(df_temp.shape)        
#             df_fits,fits_list=f_fit_all_models(df_temp,dict_global,l,s,Lt,tmin_max=int(Lt//4),tmax=tmax,num_exp_min=2,num_exp_max=2)
#     #         print(df_fits.shape,len(fits_list))

#             ##### Save fits ########
#             ## Save fit results in dataframe
#             fname=save_loc+'l{0}_s{1}.df_fits'.format(l,s)
#             print(fname)
#             df_fits.to_pickle(fname)

#     #         print(fits_list)
#             ## Save fit objects as a list. To compare fit with dataframe, you need to get the correct row.
#     #         fname2=save_loc+'l{0}_s{1}.fits'.format(l,s)
#     #         with open(fname2,'wb') as f: 
#     #             pickle.dump(fits_list,f)

#     #         Read fits from file
#     #         df_fits2=pd.read_pickle(fname)
#     #         with open(fname2,'rb') as f: 
#     #             new_list=pickle.load(f)

### Load data for troubleshooting fits

In [5]:
if __name__=="__main__":
    
    ## Read global data from files and store in dictionary ######

    data_dir='../data/free_theory/'

    a1=np.loadtxt(data_dir+'anti_2pt.dat')
    a2=np.loadtxt(data_dir+'lattice_spacing.dat')
    assert a1.shape==a2.shape, "Error in 2pt or lattice spacing files"

    dict_global={}
    # format : dict_global['4']=  {'2pt':1.126500594310e-02, 'c':2.758810226094e-01}
    
    for i,j in zip(a1,a2):
        key=i[0]
        val1=i[1]
        val2=j[1]
        assert i[0]==j[0], "Error in (%d,%d) "%(i[0],j[0]) ## Ensure both files have same 's' values

        dict_global[str(int(key))]={'2pt':val1, 'c': val2}

    print(dict_global)
    
    ### Load entire data ####
    data_dict={}

    s_list=[4,8,16]
    for s in s_list:
        
        print(data_dir+'s2xr_free_q5k{0}t*_4pt.dat'.format(s))
        fname=glob.glob(data_dir+'s2xr_free_q5k{0}t*_4pt.dat'.format(s))[0]
        print(fname,s)
        df,Lt=f_get_data_df(fname,dict_global)
        data_dict[str(s)]={'df':df,'Lt':Lt}


{'2': {'2pt': 0.02317760518557, 'c': 0.5270296682649}, '4': {'2pt': 0.0112650059431, 'c': 0.2758719374171}, '6': {'2pt': 0.007469977974209, 'c': 0.1855242165075}, '8': {'2pt': 0.005591911340388, 'c': 0.1395708555955}, '12': {'2pt': 0.003722883903237, 'c': 0.09325201809255}, '16': {'2pt': 0.00279083120346, 'c': 0.06999293504594}, '24': {'2pt': 0.001859918908819, 'c': 0.04668766899803}, '32': {'2pt': 0.001394772259558, 'c': 0.03502250646692}}
../../data/free_theory/data_2/s2xr_free_q5k4t*_4pt.dat
../../data/free_theory/data_2/s2xr_free_q5k4t64_4pt.dat 4
4 64
../../data/free_theory/data_2/s2xr_free_q5k8t*_4pt.dat
../../data/free_theory/data_2/s2xr_free_q5k8t128_4pt.dat 8
8 128
../../data/free_theory/data_2/s2xr_free_q5k16t*_4pt.dat
../../data/free_theory/data_2/s2xr_free_q5k16t256_4pt.dat 16
16 256


In [6]:

def f_avg_all_models(df_fits, num_exp_list):
    ''' Module to implement model averaging for different nexp and tmin values for fixed tmax'''
    
    
    
    ## Dataframe storing model averaged parameters and other info
    num_exp_max=np.max(np.unique(df_fits.num_exp.values))
    model_params=["a{0}".format(i) for i in range(num_exp_max)]+["E{0}".format(i) for i in range(num_exp_max)] + ['const']
    df_avg=pd.DataFrame(columns=model_params)
    
    # Cut data
    ## Only select fits with given number of exponenets
    df_fits=df_fits[df_fits.num_exp.isin(num_exp_list)] 
    ## Select fits with small chi2
    df_fits=df_fits[(df_fits.chi2_dof<2.0)]
    
    if df_fits.shape[0]<1: ## Fitting procedure didn't work well.
        for par in model_params:
            df_avg.loc['num_avg',par]=0
            df_avg.loc['value',par]=np.nan
        return df_fits,df_avg
    
    # values of nexp that are present in df_fits after cuts
    
    print("Exp values after cut ",np.unique(df_fits.num_exp.values))
        
    ### Normed prob for entire fit
    S=sc.logsumexp(df_fits.fit_wt.values)
#     print(S)
    df_fits['normed_prob']=df_fits.apply(lambda row : np.exp(row.fit_wt-S),axis=1)
    
    ## Select most probable fits and repeat normalization
    df_fits=df_fits[df_fits.normed_prob>1e-2]
    S=sc.logsumexp(df_fits.fit_wt.values)
    df_fits['normed_prob']=df_fits.apply(lambda row : np.exp(row.fit_wt-S),axis=1)
    
    
    # Compute model average parameters
    dict1={'name':'model_avg','fit_IC':None,'Q':None,'prob_AIC':None}
    
    for par in model_params:
        df_1=df_fits[['fit_wt',par]]
        # Normed prob for each parameter
        S=sc.logsumexp(df_1.fit_wt.values)
        df_1['normed_prob']=df_1.apply(lambda row : np.exp(row.fit_wt-S),axis=1)
#         print(par,df_1.shape)
        df_avg.loc['num_in_avg',par]=df_1.shape[0]
#         display(df_1)
        df_1=df_1[df_1.normed_prob>1e-2]
        
        dict1[par]=model_avg(df_1[par].values, df_1['normed_prob'].values)
        df_avg.loc['value',par]=dict1[par]
    
    dict1['normed_prob']=sum(df_fits.normed_prob)
#     print(dict1)
    df_fits=pd.concat([df_fits,pd.DataFrame(dict1,index=[df_fits.shape[0]+1])])

    return df_fits,df_avg

### Read stored data and average

In [7]:
save_loc='../../data/stored_results/'

s_list=[4,8,16,32]
# fits_list=[]

all_pars=["a{0}".format(i) for i in range(4)]+["E{0}".format(i) for i in range(4)]+['const']
cols=['s','l']+all_pars
df_results=pd.DataFrame(columns=cols)
    
    
for s in [str(i) for i in s_list][:]:
    for l in [0,2,4,6][:]:
        print(s,l)
#         Read fits from file
        fname=save_loc+'l{0}_s{1}.df_fits'.format(l,s)
        print(fname)
        df_fits=pd.read_pickle(fname)
#         print(df_fits.shape)
#         display(df_fits)
#         ## Read stored fit objects in sequence
#         fname2=save_loc+'l{0}_s{1}.fits'.format(l,s)
#         print(fname2)
#         with open(fname2,'rb') as f: 
#             fits_list=pickle.load(f)
        
#         print(np.unique(df_fits.num_exp.values))
        df_fits,df_avg=f_avg_all_models(df_fits,np.arange(1,5))
        print(df_fits.shape)
        # Store result
        dict2=dict.fromkeys(all_pars,np.nan)
        dict2['s']=int(s)
        dict2['l']=l

        count=0
        pars=df_avg.columns
        for key in pars:    dict2[key]=df_avg.loc['value',key]
#         print(dict2)
        df_results=pd.concat([df_results,pd.DataFrame(dict2,index=[count])])
        count+=1

4 0
../../data/stored_results/l0_s4.df_fits
Exp values after cut  [2 3 4]
(14, 24)
4 2
../../data/stored_results/l2_s4.df_fits
Exp values after cut  [2 3 4]
(8, 24)
4 4
../../data/stored_results/l4_s4.df_fits
Exp values after cut  [2 3 4]
(7, 24)
4 6
../../data/stored_results/l6_s4.df_fits
Exp values after cut  [2 3 4]
(10, 24)
8 0
../../data/stored_results/l0_s8.df_fits


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_1['normed_prob']=df_1.apply(lambda row : np.exp(row.fit_wt-S),axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_1['normed_prob']=df_1.apply(lambda row : np.exp(row.fit_wt-S),axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_1['normed_prob']=df_1.apply(lambda row : np.exp(row.fit_

Exp values after cut  [2 3 4]
(10, 24)
8 2
../../data/stored_results/l2_s8.df_fits
Exp values after cut  [2 3 4]
(6, 24)
8 4
../../data/stored_results/l4_s8.df_fits
Exp values after cut  [2 3 4]
(6, 24)
8 6
../../data/stored_results/l6_s8.df_fits
Exp values after cut  [2 3 4]
(6, 24)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_1['normed_prob']=df_1.apply(lambda row : np.exp(row.fit_wt-S),axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_1['normed_prob']=df_1.apply(lambda row : np.exp(row.fit_wt-S),axis=1)


### View results

In [8]:
df_results.sort_values(by=['l','s'])


Unnamed: 0,s,l,a0,a1,a2,a3,E0,E1,E2,E3,const
0,4,0,8(166),3(166),nan +- nan,nan +- nan,1.0(3.3),2.95(43),nan +- nan,nan +- nan,0.0023185(28)
0,8,0,7.9589(11),2.78(23),2.8(1.3),nan +- nan,0.999811(28),3.023(44),5.9(1.1),nan +- nan,0.00210524(36)
0,4,2,0.000489(30),3.89(66),1.41(80),nan +- nan,1.9381(76),2.935(16),4.59(40),nan +- nan,7.91(30)e-12
0,8,2,0.000426(12),4.1970(16),1.91(14),2.32(39),1.9834(35),2.98402(11),4.981(45),7.92(61),5.12(11)e-12
0,4,4,0.00059(66),0.0034(14),2.822(64),nan +- nan,2.75(23),3.21(26),4.644(10),nan +- nan,5.1(4.5)e-14
0,8,4,0.0001540(77),2.64(86),1.07(67),nan +- nan,2.9711(91),4.881(37),6.02(61),nan +- nan,1.5(2.2)e-14
0,4,6,0.00118790(50),0.0016(10),2.2(4.7),nan +- nan,0.999268(79),3.07(25),6.01(93),nan +- nan,3.5058(13)e-07
0,8,6,0.000091618(21),0.000115(26),2.616(90),2.02(26),0.999823(44),3.080(92),6.730(16),9.16(37),2.42333(54)e-08


## Troubleshooting 

### View fits dataframe

In [11]:
s=16;l=0
print(s,l)
fname=save_loc+'l{0}_s{1}.df_fits'.format(l,s)
df_fits=pd.read_pickle(fname)

16 0


In [12]:
sub_cols=['name','chi2_dof','fit_wt','num_exp','a0','a1','a2','a3','E0','E1','E2','E3']
df_fits[sub_cols].sort_values(by=['fit_wt'],ascending=False).head(15)

Unnamed: 0,name,chi2_dof,fit_wt,num_exp,a0,a1,a2,a3,E0,E1,E2,E3
131,trange_3-128,0.005102794,-12.295962,4,7.98531(69),2.799(88),2.77(21),3.60(15),0.999957(18),3.024(20),5.83(28),12.9(1.1)
132,trange_4-128,0.0009160847,-13.052675,4,7.98514(76),2.74(14),2.52(43),3.27(30),0.999953(19),3.013(28),5.59(49),11.5(1.9)
130,trange_2-128,0.03569327,-13.088057,4,7.98574(62),2.896(52),3.123(97),4.092(73),0.999967(17),3.044(13),6.23(16),15.30(57)
133,trange_5-128,0.000189358,-14.010793,4,7.98507(82),2.71(20),2.33(81),3.02(59),0.999951(21),3.008(38),5.43(80),10.5(3.2)
71,trange_7-128,0.01700214,-14.969122,3,7.98559(64),2.874(59),3.18(11),,0.999963(17),3.039(14),6.19(17),
134,trange_6-128,4.308638e-05,-15.002434,4,7.98503(88),2.69(28),2.2(1.4),2.8(1.1),0.999950(22),3.004(50),5.3(1.3),9.9(5.4)
72,trange_8-128,0.00610535,-15.344952,3,7.98538(68),2.824(77),3.00(16),,0.999958(18),3.028(18),5.99(24),
70,trange_6-128,0.04937213,-15.838897,3,7.98594(60),2.939(43),3.385(69),,0.999972(16),3.053(11),6.46(12),
135,trange_7-128,1.05143e-05,-16.000589,4,7.98501(94),2.68(38),2.1(2.5),2.7(2.1),0.999950(23),3.002(65),5.2(2.0),9.4(9.0)
73,trange_9-128,0.002264383,-16.126805,3,7.98524(71),2.79(10),2.85(25),,0.999955(19),3.021(22),5.83(33),


In [13]:
# df_fits.sort_values(by=['chi2_dof'],ascending=True)

### Perform fit for specific trange

In [None]:
s='4';l=0
df_temp=data_dict[s]['df']
Lt=data_dict[s]['Lt']
df_temp=df_temp[df_temp.l==l][['t','coeff']]

Corr=corr(df=df_temp,l=l,c=dict_global[str(s)]['c'],s=s,Lt=Lt)

In [None]:
tmax=int(np.max(df_temp.t.values)) # Max value of t obtained from dataframe

func_w=Dropdown(options=[f_multi_exp,f_multi_exp2,f_cosh],value=f_multi_exp2,description='fit func',disabled=False)
tr=IntRangeSlider(value=[20,tmax],min=0,max=tmax,step=1,description='trange')
pr=IntRangeSlider(value=[0,tmax],min=0,max=tmax,step=1,description='plot range')
pr_w=widgets.Checkbox(value=False,description='use_prior',indent=False,disabled=False)
plt_w=Checkbox(value=True,description='plot',indent=False,disabled=False)
numexp_w=IntSlider(value=2,min=1,max=8,step=1,description='num_exp')
verbose_w=IntSlider(value=0,min=-1,max=2,step=1,description='verbose_w')

# (self,f_make_pars,func,fit_range,plt_range,num_exp=3,verbose=0,use_prior=False,plot=True)

v1=VBox([tr,pr])
v2=VBox([numexp_w,verbose_w])
v3=VBox([pr_w,plt_w])

ui=HBox(children=[v1,func_w,v2,v3])
        
out=interactive_output(Corr.f_perform_fit_exp, {'f_make_pars':fixed(f_make_pars),
         'func':func_w,'fit_range':tr, 'plt_range':pr,'num_exp':numexp_w, 
         'verbose':verbose_w,'use_prior':pr_w,'plot':plt_w})

display(out,ui)

In [None]:
# Corr.f_perform_fit_exp(f_make_pars,f_multi_exp2,np.arange(6,24),[0,32],1,0,False,True) # l=0,1,2,3

In [None]:
pwd

In [None]:
df_fits[df_fits.num_exp.isin([4])]

In [None]:
df_fits[df_fits.num_exp==2]

In [None]:
df_fits.num_exp.values

In [None]:
ls /projectnb/qfe/vayyar/qfe_rs2/qfe_data/fit_results/free_theory_fit_results_rxs2_2/

In [21]:
8**4 * 4 * 16 * 2 * 8  / 1e6, 8**4 * 4 * 9 * 2 * 8  / 1e6

(4.194304, 2.359296)

In [24]:
8**4 * 4 * 4 * 2 * 4 / 1e6

0.524288