# Analysis with python notebook of all results

Steps:

1. Collect all results of the simulation on a python readable file, having done reblocking before

2. Analyze molecules, QMC versus DFT

3. Analyze dimers binding energies \
    a. Do extrapolations w/ w/o Jopt on molecules \
    b. Define gold, silver, bronze schemes, and their cost

4. Compare binding energies with other references 

5. Tool to predict QMC cost \
    a. Multilinear fit of the variance \
    b. Use of Student's t distribution to estimate cost 

6. Plots for some relevant cases

7. Plots for the paper


In [1]:
import os
import re
import math

equil_time_au = 10.
#pickunits = 'au'
pickunits = 'kcal/mol'


units = { 'au':1, 'eV':2, 'kcal/mol':3,
        'au/atom':4, 'eV/atom':5, 'kcal/mol/atom':6 }

conv2au = {
        'au':1.0,
        'eV':27.21138505,
        'kcal/mol':627.503,
        'kJ/mol':2625.5,
        'cm^-1':219474.63,
        'K': 315777.,
        'J': 43.60E-19,
        'Hz':6.57966E+15,
        }

results = {}

#os.getcwd()
dir_analysis = '/Users/zen/Dropbox/WORK/2021_S66/ANALYSIS/'
datadir = dir_analysis +'/../DATA_CAM/'

#reblockexe = '/Users/zen/CASINO/bin_qmc//utils/macos-gnu-parallel.Andreas-iMac/reblock'
reblockexe = '/Users/zen/CASINO/bin_qmc/utils/macos-gnu-parallel.Andreas-MacBook-Pro-2021/reblock'


mol_names = ( 
'AcNH2',
'AcOH',
'Benzene',
'Cyclopentane',
'Ethene',
'Ethyne',
'MeNH2',
'MeOH',
'Neopentane',
'Pentane',
'Peptide',
'Pyridine',
'Uracil',
'Water'
)

In [2]:
l = os.listdir( datadir )
ldir = [ s for s in l if os.path.isdir(datadir+s) ]
dir_molecules = [ s for s in ldir if s.startswith('Mol_') ]
dir_dimers = [ s for s in ldir if s.split('_')[0].isdigit()  ]
#dir_molecules
#dir_dimers

In [3]:
dir_molecules

['Mol_MeOH_55_2',
 'Mol_Ethyne_32_2',
 'Mol_Cyclopentane_42_2',
 'Mol_Uracil_42_1',
 'Mol_Peptide_14_1',
 'Mol_AcOH_20_1',
 'Mol_Benzene_24_1',
 'Mol_Pyridine_25_2',
 'Mol_Cyclopentane_39_2',
 'Mol_Peptide_11_2',
 'Mol_Uracil_26_2',
 'Mol_Peptide_13_1',
 'Mol_Pentane_34_1',
 'Mol_AcOH_61_2',
 'Mol_Uracil_22_2',
 'Mol_Uracil_41_1',
 'Mol_Uracil_28_2',
 'Mol_Peptide_15_2',
 'Mol_Uracil_17_2',
 'Mol_AcNH2_21_1',
 'Mol_Cyclopentane_38_1',
 'Mol_Water_01_2',
 'Mol_AcOH_60_2',
 'Mol_AcOH_22_1',
 'Mol_Uracil_29_2',
 'Mol_Peptide_16_1',
 'Mol_MeNH2_56_2',
 'Mol_AcOH_20_2',
 'Mol_Uracil_23_2',
 'Mol_AcNH2_53_2',
 'Mol_Pyridine_25_1',
 'Mol_Peptide_04_2',
 'Mol_Uracil_31_1',
 'Mol_Benzene_24_2',
 'Mol_Cyclopentane_37_1',
 'Mol_AcNH2_62_2',
 'Mol_Peptide_46_1',
 'Mol_Peptide_57_2',
 'Mol_AcOH_63_2',
 'Mol_Uracil_26_1',
 'Mol_Peptide_15_1',
 'Mol_Uracil_43_1',
 'Mol_Peptide_07_2',
 'Mol_Peptide_64_1',
 'Mol_AcNH2_21_2',
 'Mol_Uracil_32_1',
 'Mol_AcOH_52_2',
 'Mol_AcNH2_23_1',
 'Mol_Ethene_30_2',
 

In [6]:
results_mol = {}
for mol in mol_names:
    results_mol[mol] = {}

for d in dir_molecules:
    print('*** Reading data in {} ***'.format(d) )
    aus, name, ndim, nmol = d.split('_')
    mol_id = '{ndim}_{nmol}'.format(ndim=ndim,nmol=nmol)
    print('\tname={}\tndim={}\tnmol={}\tmol_in={}'.format(name, ndim, nmol,mol_id))

    results_mol[name][mol_id] = {}
    
    dmcdirs = [s for s in os.listdir( datadir+d+'/DIM/' ) if s.startswith('DMC') ]
    #print(dmcdirs)
    for dmcdir in dmcdirs:
        print('> Reading data in',dmcdir)
        i_dmcdir = dmcdir.split('_')
        dmc_type = i_dmcdir[0]
        if len(i_dmcdir)==2:
            dmc_Jas = 'Jdimer'
            tau = i_dmcdir[1]
        elif len(i_dmcdir)==3:
            dmc_Jas = i_dmcdir[1]
            tau = i_dmcdir[2]
        else:
            print('!!! Problem in directory',dmcdir,' !!!')
            break
        ftau = float(tau)
        equil = int(math.ceil( equil_time_au / ftau ))
        command = """
{reblock} << EOF > _reblock
{eq}
{u}
{b}
EOF
""".format( eq=equil, u=units[pickunits], b='-1', reblock=reblockexe )

        dmcpath = datadir+d+'/DIM/'+dmcdir

        if dmc_type not in results_mol[name][mol_id].keys():
            results_mol[name][mol_id][dmc_type]={}
            results_mol[name][mol_id][dmc_type][dmc_Jas]={}
            results_mol[name][mol_id][dmc_type][dmc_Jas][tau]={}
        elif dmc_Jas not in results_mol[name][mol_id][dmc_type].keys():
            results_mol[name][mol_id][dmc_type][dmc_Jas]={}
            results_mol[name][mol_id][dmc_type][dmc_Jas][tau]={}
        elif tau not in results_mol[name][mol_id][dmc_type][dmc_Jas]:
            results_mol[name][mol_id][dmc_type][dmc_Jas][tau]={}
        
        try:
            os.chdir( dmcpath )
            if (os.path.isfile( 'dmc.hist' )):
                # run reblocking
                os.system( command )
                # read output
                flag_poperr = False
                flag_taueff = False
                for line in open( '_reblock', 'r' ):
                    if re.search( '   Total energy : ', line ):
                        lsplit = line.split()
                        ene = lsplit[-2]
                        err = lsplit[-1]
                    if re.search( 'Mean population :', line ):
                        pop = float(line.split()[-1])
                        flag_poperr = True
                    if re.search( 'Std error :', line ) and flag_poperr:
                        poperr = float(line.split()[-1])
                        flag_poperr = False
                    if re.search( 'Mean time step', line ):
                        taueff = float(line.split()[-1])
                        flag_taueff = True
                    if re.search( 'Std error', line ) and flag_taueff:
                        tauefferr = float(line.split()[-1])
                        flag_taueff = False
                        #print(f'Effective tau = {taueff} +/- {tauefferr}')
                    if re.search( '   Variance ', line ):
                        var = float(line.split()[-1])
                    if re.search( 'Correlation time', line ):
                        autocorrtime = float(line.split()[-1])
                    if re.search( 'lines of data in total', line ):
                        Nsteps = int(line.split()[2])
                    if re.search( 'Effective population size :', line ):
                        lsplit = line.split()
                        popeff = float(lsplit[-2])
                        popefferr = float(lsplit[-1])
                #print( f'{dmcdir:<6}: {ene} {err} {pickunits} Tsim {Nsteps*ftau:3.1f} Pop {pop:7.1f} {poperr:3.1f} Var {var*pop*conv2au[pickunits]**-2:5.3f} tau_eff {taueff:5.4f} corr-time {autocorrtime*ftau:3.2f}' )
                results_mol[name][mol_id][dmc_type][dmc_Jas][tau] = {
                        'ene' : ene,
                        'err' : err,
                        'units' : pickunits,
                        'TotTimeSim[au]' : Nsteps*ftau,
                        'Pop' : pop,
                        'Pop-err' : poperr,
                        'Var[au]' : var*pop*conv2au[pickunits]**-2,
                        'tau_eff[au]' : taueff,
                        'corr-time[au]' : autocorrtime*ftau,
                        }
        except Exception as e:
                print(f'Exception reading {dmcdir}: {e}')

os.chdir( dir_analysis )

*** Reading data in Mol_MeOH_55_2 ***
	name=MeOH	ndim=55	nmol=2	mol_in=55_2
> Reading data in DMCdla5_Jopt_0.006
> Reading data in DMCdla5_Jopt_0.10
> Reading data in DMCdla5_0.01
> Reading data in DMCdla5_Jopt_0.02
> Reading data in DMCdla5_Jopt_0.03
> Reading data in DMCdla5_0.003
> Reading data in DMCdla5_Jopt_0.003
> Reading data in DMCdla5_0.03
> Reading data in DMCdla5_Jopt_0.01
> Reading data in DMCdla5_0.10
*** Reading data in Mol_Ethyne_32_2 ***
	name=Ethyne	ndim=32	nmol=2	mol_in=32_2
> Reading data in DMCdla5_Jopt_0.10
> Reading data in DMCdla5_0.01
> Reading data in DMCdla5_Jopt_0.03
> Reading data in DMCdla5_0.03
> Reading data in DMCdla5_Jopt_0.01
> Reading data in DMCdla5_0.10
*** Reading data in Mol_Cyclopentane_42_2 ***
	name=Cyclopentane	ndim=42	nmol=2	mol_in=42_2
> Reading data in DMCdla5_Jopt_0.10
> Reading data in DMCdla5_Jopt_0.03
> Reading data in DMCdla5_Jopt_0.003
> Reading data in DMCdla5_Jopt_0.01
*** Reading data in Mol_Uracil_42_1 ***
	name=Uracil	ndim=42	nm

In [11]:
import pickle
with open( dir_analysis+'results_mol.pkl', 'wb' ) as f:
    pickle.dump( results_mol, f, pickle.HIGHEST_PROTOCOL )

## Read with:
#import pickle
#with open( dir_analysis+'results_mol.pkl', 'rb' ) as f:
#    results_mol = pickle.load( f )

In [7]:
results_mol


{'AcNH2': {'21_1': {'DMCdla5': {'Jopt': {'0.10': {'ene': '-2.52929921443583E+04',
      'err': '3.07536309887326E-02',
      'units': 'kcal/mol',
      'TotTimeSim[au]': 1050.0,
      'Pop': 63920.817019230766,
      'Pop-err': 1.3154712761750742,
      'Var[au]': 0.5961076213072163,
      'tau_eff[au]': 0.08884902953994137,
      'corr-time[au]': 0.3431103334672527},
     '0.03': {'ene': '-2.52899713744994E+04',
      'err': '3.14834930284663E-02',
      'units': 'kcal/mol',
      'TotTimeSim[au]': 1215.0,
      'Pop': 63964.995991634714,
      'Pop-err': 0.7210601811144244,
      'Var[au]': 0.6166911550480881,
      'tau_eff[au]': 0.029180580046736782,
      'corr-time[au]': 0.3073690968359111},
     '0.003': {'ene': '-2.52902111336110E+04',
      'err': '2.98113865371793E-02',
      'units': 'kcal/mol',
      'TotTimeSim[au]': 1201.5,
      'Pop': 63992.27442429614,
      'Pop-err': 0.2264317169551177,
      'Var[au]': 0.6186569596759108,
      'tau_eff[au]': 0.002996175208002912,
 