# Analysis with python notebook of all results

Steps:

1. Collect all results of the simulation on a python readable file, having done reblocking before\
    a. Monomers\
    b. Dimers (writes *results_dim.pkl*)\
    c. Refs

2. Analyze molecules, QMC versus DFT

3. Analyze dimers binding energies \
    a. Do extrapolations w/ w/o Jopt on molecules \
    b. Define gold, silver, bronze schemes, and their cost

4. Compare binding energies with other references 

5. Tool to predict QMC cost \
    a. Multilinear fit of the variance \
    b. Use of Student's t distribution to estimate cost 

6. Plots for some relevant cases

7. Plots for the paper


OUTPUT:\
results_dim.pkl\
results_dim.csv

In [1]:
import os
import re
import math
import numpy as np
import pandas as pd
import pickle

equil_time_au = 10.
#pickunits = 'au'
pickunits = 'kcal/mol'


units = { 'au':1, 'eV':2, 'kcal/mol':3,
        'au/atom':4, 'eV/atom':5, 'kcal/mol/atom':6 }

conv2au = {
        'au':1.0,
        'eV':27.21138505,
        'kcal/mol':627.503,
        'kJ/mol':2625.5,
        'cm^-1':219474.63,
        'K': 315777.,
        'J': 43.60E-19,
        'Hz':6.57966E+15,
        }

results = {}

#os.getcwd()
dir_analysis = '/Users/zen/Dropbox/WORK/2021_S66/ANALYSIS/'
datadir = dir_analysis +'/../DATA_CAM/'

reblockexe = '/Users/zen/APPS/CASINO/bin_qmc/utils/macos-gnu-parallel.Andreas-MacBook-Pro-2021/reblock'
#reblockexe = '/Users/zen/CASINO/bin_qmc/utils/macos-gnu-parallel.Andreas-MacBook-Pro-2021/reblock'


mol_names = ( 
'AcNH2',
'AcOH',
'Benzene',
'Cyclopentane',
'Ethene',
'Ethyne',
'MeNH2',
'MeOH',
'Neopentane',
'Pentane',
'Peptide',
'Pyridine',
'Uracil',
'Water'
)

# Read name dir dimers
with open( dir_analysis+'dir_dimers.txt','r') as fp:
    dir_dimers = fp.readlines()
dir_dimers = [ s.strip() for s in dir_dimers ]
dir_dimers

['01_Water-Water',
 '02_Water-MeOH',
 '03_Water-MeNH2',
 '04_Water-Peptide',
 '05_MeOH-MeOH',
 '06_MeOH-MeNH2',
 '07_MeOH-Peptide',
 '08_MeOH-Water',
 '09_MeNH2-MeOH',
 '10_MeNH2-MeNH2',
 '11_MeNH2-Peptide',
 '12_MeNH2-Water',
 '13_Peptide-MeOH',
 '14_Peptide-MeNH2',
 '15_Peptide-Peptide',
 '16_Peptide-Water',
 '17_Uracil-Uracil_BP',
 '18_Water-Pyridine',
 '19_MeOH-Pyridine',
 '20_AcOH-AcOH',
 '21_AcNH2-AcNH2',
 '22_AcOH-Uracil',
 '23_AcNH2-Uracil',
 '24_Benzene-Benzene_pi-pi',
 '25_Pyridine-Pyridine_pi-pi',
 '26_Uracil-Uracil_pi-pi',
 '27_Benzene-Pyridine_pi-pi',
 '28_Benzene-Uracil_pi-pi',
 '29_Pyridine-Uracil_pi-pi',
 '30_Benzene-Ethene',
 '31_Uracil-Ethene',
 '32_Uracil-Ethyne',
 '33_Pyridine-Ethene',
 '34_Pentane-Pentane',
 '35_Neopentane-Pentane',
 '36_Neopentane-Neopentane',
 '37_Cyclopentane-Neopentane',
 '38_Cyclopentane-Cyclopentane',
 '39_Benzene-Cyclopentane',
 '40_Benzene-Neopentane',
 '41_Uracil-Pentane',
 '42_Uracil-Cyclopentane',
 '43_Uracil-Neopentane',
 '44_Ethene-Pen

In [2]:
# Read info monomers in mol_info
with open( 'mol_info.pkl', 'rb' ) as f:
    mol_info = pickle.load( f )
#mol_info

# Info dimers
dim_info = {}
for s in dir_dimers:
    l = s.split('_')
    dim_id = int( l[0] )
    l2 = l[1].split('-')
    mol1 = l2[0]
    mol2 = l2[1]
    dim_info[dim_id] = {}
    dim_info[dim_id]['name'] = s
    dim_info[dim_id]['mol1'] = mol1
    dim_info[dim_id]['mol2'] = mol2
    dim_info[dim_id]['Nel'] = mol_info[mol1]['Nel'] + mol_info[mol2]['Nel']
    dim_info[dim_id]['Nelv'] = mol_info[mol1]['Nelv'] + mol_info[mol2]['Nelv']
    dim_info[dim_id]['atoms'] = {}
    for atom in ['H','C','N','O']:
        dim_info[dim_id]['atoms'][atom] = mol_info[mol1]['atoms'][atom] + mol_info[mol2]['atoms'][atom]

# Write in file
with open( 'dim_info.pkl', 'wb' ) as f:
    pickle.dump( dim_info, f, pickle.HIGHEST_PROTOCOL )

dim_info

# Write in csv file
df_dim_info = pd.DataFrame(dim_info).transpose()
df_dim_info.index.name = 'ID'

df_dim_info.to_csv('dim_info.csv')

df_dim_info

Unnamed: 0_level_0,name,mol1,mol2,Nel,Nelv,atoms
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,01_Water-Water,Water,Water,20,16,"{'H': 4, 'C': 0, 'N': 0, 'O': 2}"
2,02_Water-MeOH,Water,MeOH,28,22,"{'H': 6, 'C': 1, 'N': 0, 'O': 2}"
3,03_Water-MeNH2,Water,MeNH2,28,22,"{'H': 7, 'C': 1, 'N': 1, 'O': 1}"
4,04_Water-Peptide,Water,Peptide,50,38,"{'H': 9, 'C': 3, 'N': 1, 'O': 2}"
5,05_MeOH-MeOH,MeOH,MeOH,36,28,"{'H': 8, 'C': 2, 'N': 0, 'O': 2}"
...,...,...,...,...,...,...
62,62_Pentane-AcNH2,Pentane,AcNH2,74,56,"{'H': 17, 'C': 7, 'N': 1, 'O': 1}"
63,63_Benzene-AcOH,Benzene,AcOH,74,54,"{'H': 10, 'C': 8, 'N': 0, 'O': 2}"
64,64_Peptide-Ethene,Peptide,Ethene,56,42,"{'H': 11, 'C': 5, 'N': 1, 'O': 1}"
65,65_Pyridine-Ethyne,Pyridine,Ethyne,56,40,"{'H': 7, 'C': 7, 'N': 1, 'O': 0}"


In [3]:
results_dim = {}
#for d in ['01_Water-Water']: 
#for d in ['01_Water-Water','02_Water-MeOH']: 
for d in dir_dimers:
    print('*** Reading data in {} ***'.format(d) )
    l = d.split('_')
    dim_id = int( l[0] )
    l2 = l[1].split('-')
    mol1 = l2[0]
    mol2 = l2[1]

    results_dim[dim_id] = {}
    #results_dim[dim_id] = { 'mol1': mol1, 'mol2': mol2, 'name':d }
    
    dmcdirs = [s for s in os.listdir( datadir+d+'/DIM/' ) if s.startswith('DMC') ]
    #print(dmcdirs)
    for dmcdir in dmcdirs:
        print('> Reading data in',dmcdir)
        i_dmcdir = dmcdir.split('_')
        dmc_type = i_dmcdir[0]
        if len(i_dmcdir)==2:
            dmc_Jas = 'Jopt'
            tau = i_dmcdir[1]
        elif len(i_dmcdir)==3:
            dmc_Jas = i_dmcdir[1]
            tau = i_dmcdir[2]
        else:
            print('!!! Problem in directory',dmcdir,' !!!')
            break
        ftau = float(tau)
        equil = int(math.ceil( equil_time_au / ftau ))
        command = """
{reblock} << EOF > _reblock
{eq}
{u}
{b}
EOF
""".format( eq=equil, u=units[pickunits], b='-1', reblock=reblockexe )

        dmcpath = datadir+d+'/DIM/'+dmcdir

        try:
            os.chdir( dmcpath )
            if (os.path.isfile( 'dmc.hist' )):
                # check there is data inside
                with open('dmc.hist','r') as fp: 
                    for Nlines, line in enumerate(fp):
                        pass
                    Nlines += 1
                if Nlines>5*equil:
                    # run reblocking
                    os.system( command )
                    # read output
                    flag_poperr = False
                    flag_taueff = False
                    for line in open( '_reblock', 'r' ):
                        if re.search( '   Total energy : ', line ):
                            lsplit = line.split()
                            ene = float( lsplit[-2] )
                            err = float( lsplit[-1] )
                        if re.search( 'Mean population :', line ):
                            pop = float(line.split()[-1])
                            flag_poperr = True
                        if re.search( 'Std error :', line ) and flag_poperr:
                            poperr = float(line.split()[-1])
                            flag_poperr = False
                        if re.search( 'Mean time step', line ):
                            taueff = float(line.split()[-1])
                            flag_taueff = True
                        if re.search( 'Std error', line ) and flag_taueff:
                            tauefferr = float(line.split()[-1])
                            flag_taueff = False
                            #print(f'Effective tau = {taueff} +/- {tauefferr}')
                        if re.search( '   Variance ', line ):
                            var = float(line.split()[-1])
                        if re.search( 'Correlation time', line ):
                            autocorrtime = float(line.split()[-1])
                        if re.search( 'lines of data in total', line ):
                            Nsteps = int(line.split()[2])
                        if re.search( 'Effective population size :', line ):
                            lsplit = line.split()
                            popeff = float(lsplit[-2])
                            popefferr = float(lsplit[-1])

                    #print(f'{results_dim[dim_id].keys()}')
                    #print(f'{i_dmcdir}, {tau}, {equil}')

                    # Write results analysis in dictionary results_dim
                    if dmc_type not in results_dim[dim_id].keys():
                        results_dim[dim_id][dmc_type]={}
                        #results_dim[dim_id][dmc_type][dmc_Jas]={}
                        #results_dim[dim_id][dmc_type][dmc_Jas][tau]={}
                    if dmc_Jas not in results_dim[dim_id][dmc_type].keys():
                        results_dim[dim_id][dmc_type][dmc_Jas]={}
                        #results_dim[dim_id][dmc_type][dmc_Jas][tau]={}
                    if tau not in results_dim[dim_id][dmc_type][dmc_Jas]:
                        results_dim[dim_id][dmc_type][dmc_Jas][tau]={}

                    #print(f'Qui!')
                    
                    #print( f'{dmcdir:<6}: {ene} {err} {pickunits} Tsim {Nsteps*ftau:3.1f} Pop {pop:7.1f} {poperr:3.1f} Var {var*pop*conv2au[pickunits]**-2:5.3f} tau_eff {taueff:5.4f} corr-time {autocorrtime*ftau:3.2f}' )
                    results_dim[dim_id][dmc_type][dmc_Jas][tau] = {
                            'ene' : ene,
                            'err' : err,
                            'units' : pickunits,
                            'TotTimeSim[au]' : Nsteps*ftau,
                            'Pop' : pop,
                            'Pop-err' : poperr,
                            'Var[au]' : var*pop*conv2au[pickunits]**-2,
                            'tau_eff[au]' : taueff,
                            'corr-time[au]' : autocorrtime*ftau,
                            }
                else: 
                    continue


            res = results_dim[dim_id][dmc_type][dmc_Jas][tau]
            if (os.path.isfile( 'out' )):
                # read output
                flag_dmcstat = False
                dmcblock_time  = []
                dmcblock_steps = []
                for line in open( 'out', 'r' ):
                    if re.search('Running on login', line):
                        lsplit = line.split()
                        machine = lsplit[-1].strip('[]')
                    if re.search(' Running in parallel using ', line):
                        lsplit = line.split()
                        N_MPI = int(lsplit[4])
                    if re.search(' DMC_TARGET_WEIGHT   ',line):
                        DMC_TARGET_WEIGHT = float( line.split()[-1] )
                    if re.search(' DMC STATISTICS-ACCUMULATION CALCULATION.', line):
                        flag_dmcstat = True
                    if re.search('Time taken in block', line ) and flag_dmcstat:
                        dmcblock_time.append( float(line.split()[-1]) )
                    if re.search('Number of moves in block', line) and flag_dmcstat:
                        dmcblock_steps.append( int( line.split()[-1] ) )

                #print( machine )
                #print( N_MPI )
                #print( dmcblock_time )
                #print( dmcblock_steps )
                dmc_cost = sum( dmcblock_time ) / sum( dmcblock_steps )

                res['machine'] = machine
                res['N_MPI'] = N_MPI
                res['cost'] = dmc_cost
                res['target_weight'] = DMC_TARGET_WEIGHT

        except Exception as e:
                print(f'Exception reading {dmcdir}: {e}')
                #print(f'{i_dmcdir}, {tau}, {equil}, {command}')

os.chdir( dir_analysis )

*** Reading data in 01_Water-Water ***
> Reading data in DMCtm5_JoptLA_0.10
> Reading data in DMCdla5_0.08
> Reading data in DMCdla5_0.06
> Reading data in DMCdla5_0.01
> Reading data in DMCdla5_0.13
> Reading data in DMCdla5_0.25
> Reading data in DMCtm5_JoptLA_0.006
> Reading data in DMCtm5_JoptLA_0.03
> Reading data in DMCdla5_0.003
> Reading data in DMCtm5_0.006
> Reading data in DMCtm5_JoptLA_0.02
> Reading data in DMCtm5_0.01
> Reading data in DMCtm5_0.06
> Reading data in DMCdla5_0.2
> Reading data in DMCtm5_0.10
> Reading data in DMCdla5_0.3
> Reading data in DMCdla5_0.04
> Reading data in DMCdla5_0.03
> Reading data in DMCdla5_0.02
> Reading data in DMCdla5_0.05
> Reading data in DMCtm5_JoptLA_0.003
> Reading data in DMCdla5_0.16
> Reading data in DMCdla5_0.10
> Reading data in DMCtm5_0.03
> Reading data in DMCtm5_0.003
> Reading data in DMCdla5_0.001
> Reading data in DMCtm5_JoptLA_0.01
> Reading data in DMCtm5_JoptLA_0.06
*** Reading data in 02_Water-MeOH ***
> Reading data 

> Reading data in DMCdla5_0.13
> Reading data in DMCdla5_0.25
> Reading data in DMCdla5_0.003
> Reading data in DMCdla5_0.2
> Reading data in DMCdla5_0.3
> Reading data in DMCdla5_0.04
> Reading data in DMCdla5_0.03
> Reading data in DMCdla5_0.02
> Reading data in DMCdla5_0.05
> Reading data in DMCdla5_0.16
> Reading data in DMCdla5_0.10
*** Reading data in 18_Water-Pyridine ***
> Reading data in DMCdla5_0.08
> Reading data in DMCdla5_0.06
> Reading data in DMCdla5_0.01
> Reading data in DMCdla5_0.13
> Reading data in DMCdla5_0.25
> Reading data in DMCdla5_0.003
> Reading data in DMCdla5_0.2
> Reading data in DMCdla5_0.3
> Reading data in DMCdla5_0.04
> Reading data in DMCdla5_0.03
> Reading data in DMCdla5_0.02
> Reading data in DMCdla5_0.05
> Reading data in DMCdla5_0.16
> Reading data in DMCdla5_0.10
*** Reading data in 19_MeOH-Pyridine ***
> Reading data in DMCdla5_0.08
> Reading data in DMCdla5_0.06
> Reading data in DMCdla5_0.01
> Reading data in DMCdla5_0.13
> Reading data in DM

> Reading data in DMCdla5_0.2
> Reading data in DMCdla5_0.3
> Reading data in DMCdla5_0.04
> Reading data in DMCdla5_0.03
> Reading data in DMCdla5_0.02
> Reading data in DMCdla5_0.05
> Reading data in DMCdla5_0.16
> Reading data in DMCdla5_0.10
*** Reading data in 33_Pyridine-Ethene ***
> Reading data in DMCdla5_0.08
> Reading data in DMCdla5_0.06
> Reading data in DMCdla5_0.01
> Reading data in DMCdla5_0.13
> Reading data in DMCdla5_0.25
> Reading data in DMCdla5_0.003
> Reading data in DMCdla5_0.2
> Reading data in DMCdla5_0.3
> Reading data in DMCdla5_0.04
> Reading data in DMCdla5_0.03
> Reading data in DMCdla5_0.02
> Reading data in DMCdla5_0.05
> Reading data in DMCdla5_0.16
> Reading data in DMCdla5_0.10
*** Reading data in 34_Pentane-Pentane ***
> Reading data in DMCdla5_0.08
> Reading data in DMCdla5_0.06
> Reading data in DMCdla5_0.01
> Reading data in DMCdla5_0.13
> Reading data in DMCdla5_0.25
> Reading data in DMCdla5_0.003
> Reading data in DMCdla5_0.2
> Reading data in 

> Reading data in DMCdla5_0.13
> Reading data in DMCdla5_0.25
> Reading data in DMCdla5_0.003
> Reading data in DMCdla5_0.2
> Reading data in DMCdla5_0.3
> Reading data in DMCdla5_0.04
> Reading data in DMCdla5_0.03
> Reading data in DMCdla5_0.02
> Reading data in DMCdla5_0.05
> Reading data in DMCdla5_0.16
> Reading data in DMCdla5_0.10
*** Reading data in 50_Benzene-Ethyne_CH-pi ***
> Reading data in DMCdla5_0.08
> Reading data in DMCdla5_0.06
> Reading data in DMCdla5_0.01
> Reading data in DMCdla5_0.13
> Reading data in DMCdla5_0.25
> Reading data in DMCdla5_0.003
> Reading data in DMCdla5_0.2
> Reading data in DMCdla5_0.3
> Reading data in DMCdla5_0.04
> Reading data in DMCdla5_0.03
> Reading data in DMCdla5_0.02
> Reading data in DMCdla5_0.05
> Reading data in DMCdla5_0.16
> Reading data in DMCdla5_0.10
*** Reading data in 51_Ethyne-Ethyne_TS ***
> Reading data in DMCdla5_0.08
> Reading data in DMCdla5_0.06
> Reading data in DMCdla5_0.01
> Reading data in DMCdla5_0.13
> Reading d

> Reading data in DMCdla5_0.003
> Reading data in DMCdla5_0.2
> Reading data in DMCdla5_0.3
> Reading data in DMCdla5_0.04
> Reading data in DMCdla5_0.03
> Reading data in DMCdla5_0.02
> Reading data in DMCdla5_0.05
> Reading data in DMCdla5_0.16
> Reading data in DMCdla5_0.10


In [4]:
import pickle
with open( dir_analysis+'results_dim.pkl', 'wb' ) as f:
    pickle.dump( results_dim, f, pickle.HIGHEST_PROTOCOL )

In [5]:
results_dim[1]

{'DMCtm5': {'JoptLA': {'0.10': {'ene': -21599.485845655,
    'err': 0.0267893546073787,
    'units': 'kcal/mol',
    'TotTimeSim[au]': 1050.0,
    'Pop': 63628.20942307692,
    'Pop-err': 1.33290985370179,
    'Var[au]': 0.5963515873463652,
    'tau_eff[au]': 0.08542461155574686,
    'corr-time[au]': 0.1902801224394427,
    'machine': 'linuxpc-intel-slurm-parallel.peta4',
    'N_MPI': 64,
    'cost': 0.9221106300000002,
    'target_weight': 64000.0},
   '0.006': {'ene': -21594.8537505891,
    'err': 0.0209711570675259,
    'units': 'kcal/mol',
    'TotTimeSim[au]': 1203.0,
    'Pop': 63979.908782747334,
    'Pop-err': 0.28992449467742687,
    'Var[au]': 0.5484351138183677,
    'tau_eff[au]': 0.005971623109908103,
    'corr-time[au]': 0.11683421764651994,
    'machine': 'linuxpc-intel-slurm-parallel.peta4',
    'N_MPI': 64,
    'cost': 0.8979901549295771,
    'target_weight': 64000.0},
   '0.03': {'ene': -21594.7426612819,
    'err': 0.021171561456522,
    'units': 'kcal/mol',
    'TotT

In [6]:
# Write csv file with results for the mols

with open( dir_analysis+'results_dim.pkl', 'rb' ) as f:
    results_dim = pickle.load(f)

l = []
for dim_id in results_dim:
    for dmc_type in results_dim[dim_id]:
        for dmc_Jas in results_dim[dim_id][dmc_type]:
            for tau in results_dim[dim_id][dmc_type][dmc_Jas]:
                data = results_dim[dim_id][dmc_type][dmc_Jas][tau]
                #print( f'{dim_id} {dmc_type} {tau} {data}\n' )
                data['ID']     = dim_id
                data['mol1']     = dim_info[dim_id]['mol1']
                data['mol2']     = dim_info[dim_id]['mol2']
                data['dmc_type'] = dmc_type
                data['dmc_Jas']  = dmc_Jas
                data['tau']      = tau
                if tau == '0.30':
                    #data['tau']      = '0.3001'
                    pass
                elif data['TotTimeSim[au]']>300:
                    l.append(data)

df_results_dim = pd.DataFrame(l)
cols = list(df_results_dim.columns)
cols = cols[-5:] + cols[:-5]
df_results_dim = df_results_dim[cols]
df_results_dim['tau[au]'] = df_results_dim['tau'].to_numpy().astype(np.float64)

df_results_dim.to_csv('results_dim.csv')

  values = values.astype(str)


In [7]:
df_results_dim[ df_results_dim.ID==24 ]

Unnamed: 0,mol1,mol2,dmc_type,dmc_Jas,tau,ene,err,units,TotTimeSim[au],Pop,Pop-err,Var[au],tau_eff[au],corr-time[au],machine,N_MPI,cost,target_weight,ID,tau[au]
352,Benzene,Benzene,DMCdla5,Jopt,0.08,-47250.281119,0.047531,kcal/mol,1208.0,63969.776962,1.522385,1.037642,0.074904,0.470157,linuxpc-intel-slurm-parallel.peta4-cclake,224.0,1.884792,64000.0,24,0.08
353,Benzene,Benzene,DMCdla5,Jopt,0.06,-47249.79281,0.040536,kcal/mol,2430.0,63962.899487,0.898487,1.080475,0.057301,0.480125,linuxpc-intel-slurm-parallel.peta4-icelake-zen,76.0,3.772226,64000.0,24,0.06
354,Benzene,Benzene,DMCdla5,Jopt,0.01,-47250.491556,0.038532,kcal/mol,1965.0,63992.847688,0.424242,1.081065,0.009958,0.412673,linuxpc-intel-slurm-parallel.peta4,320.0,1.375802,64000.0,24,0.01
355,Benzene,Benzene,DMCdla5,Jopt,0.7,-47287.405907,0.036167,kcal/mol,7350.0,63463.640439,1.805369,1.028348,0.336258,1.31116,linuxpc-intel-slurm-parallel.peta4-knl,320.0,7.623159,64000.0,24,0.7
356,Benzene,Benzene,DMCdla5,Jopt,0.13,-47252.745678,0.052768,kcal/mol,1313.0,63924.856031,1.799285,1.03985,0.11553,0.491686,linuxpc-intel-slurm-parallel.peta4-cclake,224.0,1.898267,64000.0,24,0.13
357,Benzene,Benzene,DMCdla5,Jopt,0.25,-47260.524984,0.047671,kcal/mol,1275.0,63812.28004,2.462223,1.032931,0.19494,0.54626,linuxpc-intel-slurm-parallel.peta4-cclake,224.0,1.852303,64000.0,24,0.25
358,Benzene,Benzene,DMCdla5,Jopt,0.003,-47250.972791,0.047744,kcal/mol,1327.5,63987.69379,0.28537,1.094101,0.002998,0.4403,linuxpc-intel-slurm-parallel.peta4-cclake,224.0,1.856408,64000.0,24,0.003
359,Benzene,Benzene,DMCdla5,Jopt,0.2,-47257.149224,0.057873,kcal/mol,1100.0,63867.824587,2.373011,1.052984,0.164751,0.565432,linuxpc-intel-slurm-parallel.peta4-cclake,224.0,1.86223,64000.0,24,0.2
360,Benzene,Benzene,DMCdla5,Jopt,0.3,-47264.08655,0.032982,kcal/mol,3150.0,63776.799541,1.664414,1.04016,0.221404,0.620292,linuxpc-intel-slurm-parallel.peta4-knl,640.0,3.877563,64000.0,24,0.3
361,Benzene,Benzene,DMCdla5,Jopt,0.04,-47249.688062,0.053338,kcal/mol,1220.0,63984.287107,1.04655,1.064678,0.038918,0.470777,linuxpc-intel-slurm-parallel.peta4-icelake-zen,304.0,0.956626,64000.0,24,0.04


In [8]:
set( df_results_dim['dmc_Jas'] )

{'Jopt', 'JoptLA'}

In [9]:
df_results_dim.columns

Index(['mol1', 'mol2', 'dmc_type', 'dmc_Jas', 'tau', 'ene', 'err', 'units',
       'TotTimeSim[au]', 'Pop', 'Pop-err', 'Var[au]', 'tau_eff[au]',
       'corr-time[au]', 'machine', 'N_MPI', 'cost', 'target_weight', 'ID',
       'tau[au]'],
      dtype='object')

In [10]:
set(df_results_dim.dmc_Jas)

{'Jopt', 'JoptLA'}

In [11]:
set(df_results_dim.dmc_type)

{'DMC', 'DMCdla', 'DMCdla5', 'DMCtm5'}

In [12]:
set(df_results_dim['tau'])

{'0.001',
 '0.003',
 '0.006',
 '0.01',
 '0.02',
 '0.03',
 '0.04',
 '0.05',
 '0.06',
 '0.08',
 '0.10',
 '0.13',
 '0.16',
 '0.2',
 '0.25',
 '0.3',
 '0.70'}

In [13]:
dfx = df_results_dim.loc[(df_results_dim.dmc_type=='DMCdla5') & 
                   (df_results_dim.dmc_Jas=='Jopt' ) & 
                   (df_results_dim.tau=='0.16' ) ]
print(len(dfx))
print('Simulated: ',set(dfx.ID))
print('Not simulated: ', (set(range(1,67)) - set(dfx.ID)) )
for i in range(100,1001,100):
    xx = dfx.loc[ dfx['TotTimeSim[au]'] < i ]
    xy = dfx.loc[ (dfx['TotTimeSim[au]'] < i) & (dfx['TotTimeSim[au]'] > i-100) ]
    print(f'{i}: ',len(xx), ' -> ',set(xy.ID))
finished = dfx.loc[ dfx['TotTimeSim[au]'] > 1000 ]
print('Simulation finished: ',len(finished),' -> ',set( finished.ID ) )
dfx.loc[ (dfx['TotTimeSim[au]'] > 800) & (dfx['TotTimeSim[au]'] < 1000) ]

66
Simulated:  {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66}
Not simulated:  set()
100:  0  ->  set()
200:  0  ->  set()
300:  0  ->  set()
400:  0  ->  set()
500:  0  ->  set()
600:  0  ->  set()
700:  0  ->  set()
800:  0  ->  set()
900:  0  ->  set()
1000:  0  ->  set()
Simulation finished:  66  ->  {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66}


Unnamed: 0,mol1,mol2,dmc_type,dmc_Jas,tau,ene,err,units,TotTimeSim[au],Pop,Pop-err,Var[au],tau_eff[au],corr-time[au],machine,N_MPI,cost,target_weight,ID,tau[au]


In [14]:
dfx = df_results_dim.loc[(df_results_dim.dmc_type=='DMCtm5') & 
                   (df_results_dim.dmc_Jas=='JoptLA' ) & 
                   (df_results_dim.tau=='0.01' ) ]
print(len(dfx))
print(set(dfx.ID))
print( (set(range(1,67)) - set(dfx.ID)) )
print(len(dfx.loc[ dfx['TotTimeSim[au]'] < 1000 ]))
dfx.loc[ dfx['TotTimeSim[au]'] < 1000 ]

2
{1, 20}
{2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66}
0


Unnamed: 0,mol1,mol2,dmc_type,dmc_Jas,tau,ene,err,units,TotTimeSim[au],Pop,Pop-err,Var[au],tau_eff[au],corr-time[au],machine,N_MPI,cost,target_weight,ID,tau[au]


In [15]:
os.getcwd()

'/Users/zen/Dropbox/WORK/2021_S66/ANALYSIS'