In [1]:
import os
from pathlib import Path
from typing import Any, Dict, List, Tuple, Union
#from tqdm.notebook import tqdm
from tqdm import tqdm


In [2]:
os.chdir(os.path.split(os.getcwd())[0]) # set working directory to Molli top level

import ase_utils as au
import gaussian_utils as GU
import metrics as ms
import utils as ut


### Case 1: generate results in the directory of the experiment (sub folder: molli_results)

In [3]:
def process_inside_experiments_dirs():
    print("Processing gaussian log files...")
    progress_bar = tqdm(experiment_dirs)

    #for experiment_dir in tqdm(experimentr_dirs, desc="Processing gaussian log files"):
    for experiment_dir in progress_bar:
        progress_bar.set_description(experiment_dir.stem)
        input_dir = experiment_dir.joinpath(gaussian_logs_folder)
        experiment_name = experiment_dir.stem
        gaussian_log_files = ut.get_file_paths_in_dir(input_dir, ".log")
        output_dir = Path(experiment_dir.joinpath(output_folder_in_experiment_dir))
        aggregate_log_file_name = f"{experiment_name}_log.txt"
        aggregate_xyz_file = output_dir.joinpath(f"{experiment_name}_confs.xyz")

        GU.process_many_log_files(
                                input_paths=gaussian_log_files,
                                output_dir=output_dir,
                                aggregate_log_file_name=aggregate_log_file_name,
        #                          extract_summary_step_nr=10,
        #                          do_only_summary=True,
                                write_last_opt_steps_file_path=aggregate_xyz_file
                                )


In [4]:
parent_dir = Path("C:/tmp/gaussian/optimize")
gaussian_logs_folder = "gaussian_results"
output_folder_in_experiment_dir = "molli_results"
experiment_dirs = [x for x in parent_dir.iterdir() if x.is_dir()]


In [5]:
experiment_dirs

[WindowsPath('C:/tmp/gaussian/optimize/crest_ex0a_BP86_Def2SVPP_SVPFit'),
 WindowsPath('C:/tmp/gaussian/optimize/crest_ex0a_BP86_STO3G'),
 WindowsPath('C:/tmp/gaussian/optimize/crest_ex0a_PBE1PBE_Def2SVPP_SVPFit'),
 WindowsPath('C:/tmp/gaussian/optimize/crest_ex0a_PBE1PBE_STO3G'),
 WindowsPath('C:/tmp/gaussian/optimize/crest_ex0a_s1_BP86_Def2SVPP_SVPFit'),
 WindowsPath('C:/tmp/gaussian/optimize/crest_ex0b_BP86_Def2SVPP_SVPFit'),
 WindowsPath('C:/tmp/gaussian/optimize/crest_ex0b_BP86_STO3G'),
 WindowsPath('C:/tmp/gaussian/optimize/crest_ex0ff_BP86_Def2SVPP_SVPFit'),
 WindowsPath('C:/tmp/gaussian/optimize/crest_ex15_BP86_Def2SVPP_SVPFit'),
 WindowsPath('C:/tmp/gaussian/optimize/crest_ex15_BP86_STO3G'),
 WindowsPath('C:/tmp/gaussian/optimize/crest_ex16_BP86_Def2SVPP_SVPFit'),
 WindowsPath('C:/tmp/gaussian/optimize/crest_ex16_crest3_BP86_Def2SVPP_SVPFit'),
 WindowsPath('C:/tmp/gaussian/optimize/crest_ex16_crest3_PBE1PBE_Def2SVPP_SVPFit'),
 WindowsPath('C:/tmp/gaussian/optimize/crest_ex16_c

In [6]:
%time process_inside_experiments_dirs()

Processing gaussian log files...


pbe1pbe_cc_pvtz_tzvpfit: 100%|█████████████████| 37/37 [00:47<00:00,  1.28s/it]

Wall time: 47.4 s





### Case 2: Process Gaussian log files and write aggregated results into single output directory

In [4]:
log_files_dirs = [
        Path("C:/tmp/gaussian/optimize/crest_ex0a_PBE1PBE_STO3G/gaussian_results"),
    ]

aggregate_name = "crest_ex0a_pbe1pbe_sto3g"
output_dir = Path(f"C:/tmp/gaussian/optimize/{aggregate_name}/molli_results")
output_dir


WindowsPath('C:/tmp/gaussian/optimize/crest_ex0a_pbe1pbe_sto3g/molli_results')

In [6]:
gaussian_log_files = ut.get_file_paths_in_many_dirs(log_files_dirs, ".log")
aggregate_log_file_name = f"{aggregate_name}_log.txt"
aggregate_xyz_file = output_dir.joinpath(f"{aggregate_name}_confs.xyz")

GU.process_many_log_files(
                        input_paths=gaussian_log_files,
                        output_dir=output_dir,
                        aggregate_log_file_name=aggregate_log_file_name,
#                        extract_summary_step_nr=0,
                        do_only_summary=True,
                        write_last_opt_steps_file_path=aggregate_xyz_file
                    )



### Case 3: Process Gaussian log files, aggregate results by DFT and write into separate directories

In [3]:
def process_log_files(
        aggregate_name: str,
        dft_names: List[str],
        extract_summary_step_nr: int=0,
        ignore_shorter_runs:bool=False,
        do_only_summary: bool=True,
        return_converged_only: bool=False,
        show_errors_in_output: bool=True
    ):

    aggregate_name = aggregate_name
    output_dir = output_parent_dir

    experiment_dirs = []
    for dft_name in dft_names:
        experiment_dirs.extend([x for x in experiments_parent_dir.iterdir() if x.is_dir() and dft_name in x.stem.lower()])

    log_files_dirs = [
            x.joinpath(gaussian_logs_folder)
                for x in experiment_dirs if x.joinpath(gaussian_logs_folder).exists()
        ]

    gaussian_log_files = ut.get_file_paths_in_many_dirs(log_files_dirs, ".log")
    aggregate_log_file_name = f"{aggregate_name}_log.txt"
    aggregate_xyz_file = output_dir.joinpath(f"{aggregate_name}_confs.xyz")

    GU.process_many_log_files(
                            input_paths=gaussian_log_files,
                            output_dir=output_dir,
                            aggregate_log_file_name=aggregate_log_file_name,
                            extract_summary_step_nr=extract_summary_step_nr,
                            ignore_shorter_runs=ignore_shorter_runs,
                            do_only_summary=do_only_summary,
                            return_converged_only=return_converged_only,
                            write_last_opt_steps_file_path=aggregate_xyz_file,
                            show_errors_in_output=show_errors_in_output
                        )

    print(f"aggregate_log_file_name: {output_dir.joinpath(aggregate_log_file_name)}")
    print(f"aggregate_xyz_file: {aggregate_xyz_file}")


def process_log_files_by_dft(
        dft_name: str,
        extract_summary_step_nr: int=0,
        ignore_shorter_runs:bool=False,
        do_only_summary: bool=True,
        return_converged_only: bool=False,
        show_errors_in_output: bool=True
    ):
    aggregate_name = dft_name
    output_dir = output_parent_dir.joinpath(aggregate_name)
    experiment_dirs = [x for x in experiments_parent_dir.iterdir() if x.is_dir() and dft_name in x.stem.lower()]
    log_files_dirs = [
            x.joinpath(gaussian_logs_folder)
                for x in experiment_dirs if x.joinpath(gaussian_logs_folder).exists()
        ]

    gaussian_log_files = ut.get_file_paths_in_many_dirs(log_files_dirs, ".log")
    aggregate_log_file_name = f"{aggregate_name}_log.txt"
    aggregate_xyz_file = output_dir.joinpath(f"{aggregate_name}_confs.xyz")

    GU.process_many_log_files(
                            input_paths=gaussian_log_files,
                            output_dir=output_dir,
                            aggregate_log_file_name=aggregate_log_file_name,
                            extract_summary_step_nr=extract_summary_step_nr,
                            ignore_shorter_runs=ignore_shorter_runs,
                            do_only_summary=do_only_summary,
                            return_converged_only=return_converged_only,
                            write_last_opt_steps_file_path=aggregate_xyz_file,
                            show_errors_in_output=show_errors_in_output
                        )

    print(f"aggregate_log_file_name: {output_dir.joinpath(aggregate_log_file_name)}")
    print(f"aggregate_xyz_file: {aggregate_xyz_file}")



In [4]:
###############################################################################
# DFT default run: all_steps, show_errors_in_output=True
###############################################################################

output_parent_dir = Path("C:/tmp/gaussian/aggregated_results/all_runs_by_dft")
extract_summary_step_nr = 0
ignore_shorter_runs = False
return_converged_only = False
show_errors_in_output = True


In [4]:
###############################################################################
# DFT result at specific step nr 10: step_nr, show_errors=False
###############################################################################

output_parent_dir = Path("C:/tmp/gaussian/aggregated_results/1_step")
extract_summary_step_nr = 1
ignore_shorter_runs = True
return_converged_only = False
show_errors_in_output = False

In [7]:
###############################################################################
# DFT result at specific step nr 10: step_nr, show_errors=False
###############################################################################

output_parent_dir = Path("C:/tmp/gaussian/aggregated_results/10_steps")
extract_summary_step_nr = 10
ignore_shorter_runs = True
return_converged_only = False
show_errors_in_output = False


In [9]:
###############################################################################
# DFT result at specific step nr 25: step_nr, show_errors=False
###############################################################################

output_parent_dir = Path("C:/tmp/gaussian/aggregated_results/25_steps")
extract_summary_step_nr = 25
ignore_shorter_runs = True
return_converged_only = False
show_errors_in_output = False


In [11]:
###############################################################################
# Converged results: return_converged_only=True, show_errors_in_output=False
###############################################################################

output_parent_dir = Path("C:/tmp/gaussian/aggregated_results/converged")
extract_summary_step_nr = 0
ignore_shorter_runs = False
return_converged_only = True
show_errors_in_output = False


In [13]:
###############################################################################
# Starting geometries of converged results: step_nr=1 return_converged_only=True, show_errors_in_output=False
###############################################################################

output_parent_dir = Path("C:/tmp/gaussian/aggregated_results/converged_start_pos")
extract_summary_step_nr = 1
ignore_shorter_runs = True
return_converged_only = True
show_errors_in_output = False


In [5]:
dft_names = [
            "pbe1pbe_cc_pvtz_tzvpfit",
            "bp86_sto3g",
            "bp86_def2svpp_svpfit",
            "pbe1pbe_sto3g",
            "pbe1pbe_def2svpp_svpfit",
            "b3p86_def2svpp_svpfit",
            "tpssh_gd3_def2svpp_svpfit",
            "wb97xd_def2svpp_svpfit",
        ]

In [14]:
experiments_parent_dir = Path("C:/tmp/gaussian/optimize")
gaussian_logs_folder = "gaussian_results"

for dft_name in tqdm(dft_names):
    process_log_files_by_dft(
                            dft_name=dft_name,
                            extract_summary_step_nr=extract_summary_step_nr,
                            ignore_shorter_runs=ignore_shorter_runs,
                            do_only_summary=True,
                            return_converged_only=return_converged_only,
                            show_errors_in_output=show_errors_in_output
                        )


 12%|█████▌                                      | 1/8 [00:00<00:06,  1.10it/s]

aggregate_log_file_name: C:\tmp\gaussian\aggregated_results\converged_start_pos\pbe1pbe_cc_pvtz_tzvpfit\pbe1pbe_cc_pvtz_tzvpfit_log.txt
aggregate_xyz_file: C:\tmp\gaussian\aggregated_results\converged_start_pos\pbe1pbe_cc_pvtz_tzvpfit\pbe1pbe_cc_pvtz_tzvpfit_confs.xyz


 25%|███████████                                 | 2/8 [00:04<00:16,  2.70s/it]

aggregate_log_file_name: C:\tmp\gaussian\aggregated_results\converged_start_pos\bp86_sto3g\bp86_sto3g_log.txt
aggregate_xyz_file: C:\tmp\gaussian\aggregated_results\converged_start_pos\bp86_sto3g\bp86_sto3g_confs.xyz


 38%|████████████████▌                           | 3/8 [00:13<00:27,  5.56s/it]

aggregate_log_file_name: C:\tmp\gaussian\aggregated_results\converged_start_pos\bp86_def2svpp_svpfit\bp86_def2svpp_svpfit_log.txt
aggregate_xyz_file: C:\tmp\gaussian\aggregated_results\converged_start_pos\bp86_def2svpp_svpfit\bp86_def2svpp_svpfit_confs.xyz


 50%|██████████████████████                      | 4/8 [00:14<00:14,  3.70s/it]

aggregate_log_file_name: C:\tmp\gaussian\aggregated_results\converged_start_pos\pbe1pbe_sto3g\pbe1pbe_sto3g_log.txt
aggregate_xyz_file: C:\tmp\gaussian\aggregated_results\converged_start_pos\pbe1pbe_sto3g\pbe1pbe_sto3g_confs.xyz


 62%|███████████████████████████▌                | 5/8 [00:19<00:12,  4.01s/it]

aggregate_log_file_name: C:\tmp\gaussian\aggregated_results\converged_start_pos\pbe1pbe_def2svpp_svpfit\pbe1pbe_def2svpp_svpfit_log.txt
aggregate_xyz_file: C:\tmp\gaussian\aggregated_results\converged_start_pos\pbe1pbe_def2svpp_svpfit\pbe1pbe_def2svpp_svpfit_confs.xyz


 75%|█████████████████████████████████           | 6/8 [00:19<00:05,  2.82s/it]

aggregate_log_file_name: C:\tmp\gaussian\aggregated_results\converged_start_pos\b3p86_def2svpp_svpfit\b3p86_def2svpp_svpfit_log.txt
aggregate_xyz_file: C:\tmp\gaussian\aggregated_results\converged_start_pos\b3p86_def2svpp_svpfit\b3p86_def2svpp_svpfit_confs.xyz


 88%|██████████████████████████████████████▌     | 7/8 [00:20<00:02,  2.01s/it]

aggregate_log_file_name: C:\tmp\gaussian\aggregated_results\converged_start_pos\tpssh_gd3_def2svpp_svpfit\tpssh_gd3_def2svpp_svpfit_log.txt
aggregate_xyz_file: C:\tmp\gaussian\aggregated_results\converged_start_pos\tpssh_gd3_def2svpp_svpfit\tpssh_gd3_def2svpp_svpfit_confs.xyz


100%|████████████████████████████████████████████| 8/8 [00:20<00:00,  2.56s/it]

aggregate_log_file_name: C:\tmp\gaussian\aggregated_results\converged_start_pos\wb97xd_def2svpp_svpfit\wb97xd_def2svpp_svpfit_log.txt
aggregate_xyz_file: C:\tmp\gaussian\aggregated_results\converged_start_pos\wb97xd_def2svpp_svpfit\wb97xd_def2svpp_svpfit_confs.xyz





In [6]:
experiments_parent_dir = Path("C:/tmp/gaussian/optimize")
gaussian_logs_folder = "gaussian_results"

process_log_files(
                aggregate_name="all_aggregate",
                dft_names=dft_names,
                extract_summary_step_nr=extract_summary_step_nr,
                ignore_shorter_runs=ignore_shorter_runs,
                do_only_summary=True,
                return_converged_only=return_converged_only,
                show_errors_in_output=show_errors_in_output
            )

aggregate_log_file_name: C:\tmp\gaussian\aggregated_results\1_step\all_aggregate_log.txt
aggregate_xyz_file: C:\tmp\gaussian\aggregated_results\1_step\all_aggregate_confs.xyz


### Case 4: Collect all Gaussian log files and process, write aggregated results into single output directory (for error checking)

In [15]:
parent_dir = Path("C:/tmp/gaussian/optimize")
gaussian_logs_folder = "gaussian_results"

aggregate_name = "all_aggregate"
output_dir = Path(f"C:/tmp/gaussian/aggregated_results/{aggregate_name}")

experiment_dirs = [x for x in parent_dir.iterdir() if x.is_dir()]
log_files_dirs = [x.joinpath(gaussian_logs_folder) for x in experiment_dirs]

log_files_dirs

[WindowsPath('C:/tmp/gaussian/optimize/crest_ex0a_BP86_Def2SVPP_SVPFit/gaussian_results'),
 WindowsPath('C:/tmp/gaussian/optimize/crest_ex0a_BP86_STO3G/gaussian_results'),
 WindowsPath('C:/tmp/gaussian/optimize/crest_ex0a_PBE1PBE_Def2SVPP_SVPFit/gaussian_results'),
 WindowsPath('C:/tmp/gaussian/optimize/crest_ex0a_PBE1PBE_STO3G/gaussian_results'),
 WindowsPath('C:/tmp/gaussian/optimize/crest_ex0a_s1_BP86_Def2SVPP_SVPFit/gaussian_results'),
 WindowsPath('C:/tmp/gaussian/optimize/crest_ex0a_s1_pbe1pbe_def2svpp_svpfit/gaussian_results'),
 WindowsPath('C:/tmp/gaussian/optimize/crest_ex0b_BP86_Def2SVPP_SVPFit/gaussian_results'),
 WindowsPath('C:/tmp/gaussian/optimize/crest_ex0b_BP86_STO3G/gaussian_results'),
 WindowsPath('C:/tmp/gaussian/optimize/crest_ex0ff_BP86_Def2SVPP_SVPFit/gaussian_results'),
 WindowsPath('C:/tmp/gaussian/optimize/crest_ex15_BP86_Def2SVPP_SVPFit/gaussian_results'),
 WindowsPath('C:/tmp/gaussian/optimize/crest_ex15_BP86_STO3G/gaussian_results'),
 WindowsPath('C:/tmp/ga

In [16]:
print("output_dir", output_dir)

output_dir C:\tmp\gaussian\aggregated_results\all_aggregate


In [17]:
gaussian_log_files = ut.get_file_paths_in_many_dirs(log_files_dirs, ".log")
aggregate_log_file_name = f"{aggregate_name}_log.txt"
aggregate_xyz_file = output_dir.joinpath(f"{aggregate_name}_confs.xyz")

GU.process_many_log_files(
                        input_paths=gaussian_log_files,
                        output_dir=output_dir,
                        aggregate_log_file_name=aggregate_log_file_name,
                        do_only_summary=True,
                        write_last_opt_steps_file_path=aggregate_xyz_file
                    )
