# Analysis with python notebook of all results

Steps:

1. Collect all results of the simulation on a python readable file, having done reblocking before

2. Analyze molecules, QMC versus DFT

3. Analyze dimers binding energies \
    a. Do extrapolations w/ w/o Jopt on molecules \
    b. Define gold, silver, bronze schemes, and their cost

4. Compare binding energies with other references 

5. Tool to predict QMC cost \
    a. Multilinear fit of the variance \
    b. Use of Student's t distribution to estimate cost 

6. Plots for some relevant cases

7. Plots for the paper


In [1]:
import os
import re
import math

equil_time_au = 10.
#pickunits = 'au'
pickunits = 'kcal/mol'


units = { 'au':1, 'eV':2, 'kcal/mol':3,
        'au/atom':4, 'eV/atom':5, 'kcal/mol/atom':6 }

conv2au = {
        'au':1.0,
        'eV':27.21138505,
        'kcal/mol':627.503,
        'kJ/mol':2625.5,
        'cm^-1':219474.63,
        'K': 315777.,
        'J': 43.60E-19,
        'Hz':6.57966E+15,
        }

results = {}

#os.getcwd()
dir_analysis = '/Users/zen/Dropbox/WORK/2021_S66/ANALYSIS/'

#reblockexe = '/Users/zen/CASINO/bin_qmc//utils/macos-gnu-parallel.Andreas-iMac/reblock'
reblockexe = '/Users/zen/CASINO/bin_qmc/utils/macos-gnu-parallel.Andreas-MacBook-Pro-2021/reblock'


mol_names = ( 
'AcNH2',
'AcOH',
'Benzene',
'Cyclopentane',
'Ethene',
'Ethyne',
'MeNH2',
'MeOH',
'Neopentane',
'Pentane',
'Peptide',
'Pyridine',
'Uracil',
'Water'
)

In [2]:
### THIS IS A TEST

#basedir = os.getcwd() +'/../DATA_CAM/Mol_Water_01_2/DIM'
basedir = dir_analysis +'/../DATA_CAM/01_Water-Water/DIM'
l = os.listdir( basedir )
dmcdirs = [ s for s in l if s.startswith('DMC') ]

print(dmcdirs)

results = {}


for dmcdir in sorted(dmcdirs):
    print('{:^40}'.format(dmcdir))
    name, tau = dmcdir.split('_')
    ftau = float(tau)
    equil = int(math.ceil( equil_time_au / ftau ))

    command = """
{reblock} << EOF > _reblock
{eq}
{u}
{b}
EOF
""".format( eq=equil, u=units[pickunits], b='-1', reblock=reblockexe )

    #print( '{} : {}'.format( dmcdir, command ) )

    results[dmcdir] = {
            'name' : name,
            'tau' : ftau,
            }

    for conf in [ 'DIM' ]:
        if conf == 'DIM':
            dmcpath = os.path.join( basedir, dmcdir )
        else:
            #l = dmcdir.split('_')
            #dmcdirmol = l[0]+'_Jopt_'+l[1]
            #dmcpath = os.path.join( basedir, conf, dmcdirmol )
            dmcpath = os.path.join( basedir, conf, dmcdir )
        try:
            os.chdir( dmcpath )
            if (os.path.isfile( 'dmc.hist' )):
                # run reblocking
                os.system( command )
                # read output
                flag_poperr = False
                flag_taueff = False
                for line in open( '_reblock', 'r' ):
                    if re.search( '   Total energy : ', line ):
                        lsplit = line.split()
                        ene = lsplit[-2]
                        err = lsplit[-1]
                    if re.search( 'Mean population :', line ):
                        pop = float(line.split()[-1])
                        flag_poperr = True
                    if re.search( 'Std error :', line ) and flag_poperr:
                        poperr = float(line.split()[-1])
                        flag_poperr = False
                    if re.search( 'Mean time step', line ):
                        taueff = float(line.split()[-1])
                        flag_taueff = True
                    if re.search( 'Std error', line ) and flag_taueff:
                        tauefferr = float(line.split()[-1])
                        flag_taueff = False
                        #print(f'Effective tau = {taueff} +/- {tauefferr}')
                    if re.search( '   Variance ', line ):
                        var = float(line.split()[-1])
                    if re.search( 'Correlation time', line ):
                        autocorrtime = float(line.split()[-1])
                    if re.search( 'lines of data in total', line ):
                        Nsteps = int(line.split()[2])
                    if re.search( 'Effective population size :', line ):
                        lsplit = line.split()
                        popeff = float(lsplit[-2])
                        popefferr = float(lsplit[-1])
                print( f'{conf:<6}: {ene} {err} {pickunits} Tsim {Nsteps*ftau:3.1f} Pop {pop:7.1f} {poperr:3.1f} Var {var*pop*conv2au[pickunits]**-2:5.3f} tau_eff {taueff:5.4f} corr-time {autocorrtime*ftau:3.2f}'
 )
                results[dmcdir][conf] = {
                        'ene' : ene,
                        'err' : err,
                        'units' : pickunits,
                        'TotTimeSim[au]' : Nsteps*ftau,
                        'Pop' : pop,
                        'Pop-err' : poperr,
                        'Var[au]' : var*pop*conv2au[pickunits]**-2,
                        'tau_eff[au]' : taueff,
                        'corr-time[au]' : autocorrtime*ftau,
                        }
        except Exception as e:
                print(f'Exception reading {dmcdir}: {e}')

#def gettau(r):
#    return r['tau']
#results.sort( key=gettau )
#print(results)

results

['DMCdla5_0.01', 'DMCdla5_0.003', 'DMCdla5_0.03', 'DMCdla5_0.10', 'DMCdla5_0.001']
             DMCdla5_0.001              
DIM   : -2.16040342674093E+04 2.46862587657743E-02 kcal/mol Tsim 1000.5 Pop 63991.4 0.1 Var 0.531 tau_eff 0.0010 corr-time 0.17
             DMCdla5_0.003              
DIM   : -2.16039745711407E+04 2.52596564983912E-02 kcal/mol Tsim 1201.5 Pop 63994.6 0.2 Var 0.541 tau_eff 0.0030 corr-time 0.18
              DMCdla5_0.01              
DIM   : -2.16039201237714E+04 2.61183708410063E-02 kcal/mol Tsim 1005.0 Pop 63980.9 0.4 Var 0.538 tau_eff 0.0099 corr-time 0.20
              DMCdla5_0.03              
DIM   : -2.16044009308860E+04 2.31465077786707E-02 kcal/mol Tsim 1215.0 Pop 63981.0 0.7 Var 0.524 tau_eff 0.0289 corr-time 0.18
              DMCdla5_0.10              
DIM   : -2.16093203117517E+04 3.04674666484188E-02 kcal/mol Tsim 1050.0 Pop 63898.5 1.2 Var 0.524 tau_eff 0.0856 corr-time 0.25


{'DMCdla5_0.001': {'name': 'DMCdla5',
  'tau': 0.001,
  'DIM': {'ene': '-2.16040342674093E+04',
   'err': '2.46862587657743E-02',
   'units': 'kcal/mol',
   'TotTimeSim[au]': 1000.5,
   'Pop': 63991.396458354364,
   'Pop-err': 0.14156891628379983,
   'Var[au]': 0.5312942210873576,
   'tau_eff[au]': 0.0009995971965992537,
   'corr-time[au]': 0.16801089394027072}},
 'DMCdla5_0.003': {'name': 'DMCdla5',
  'tau': 0.003,
  'DIM': {'ene': '-2.16039745711407E+04',
   'err': '2.52596564983912E-02',
   'units': 'kcal/mol',
   'TotTimeSim[au]': 1201.5,
   'Pop': 63994.58279157833,
   'Pop-err': 0.2198454567181318,
   'Var[au]': 0.5408061717374198,
   'tau_eff[au]': 0.0029945123338876835,
   'corr-time[au]': 0.17831660093009108}},
 'DMCdla5_0.01': {'name': 'DMCdla5',
  'tau': 0.01,
  'DIM': {'ene': '-2.16039201237714E+04',
   'err': '2.61183708410063E-02',
   'units': 'kcal/mol',
   'TotTimeSim[au]': 1005.0,
   'Pop': 63980.92201005025,
   'Pop-err': 0.43308853950056975,
   'Var[au]': 0.537513947