# Merge Thermo Libraries
This script is used to merge different thermo libraries generated from ARC

### 1. Necessary packages

In [None]:
import logging
import os
import shutil
import glob

from rmgpy import settings
from rmgpy.data.thermo import ThermoLibrary

from easy_rmg_model.rmg2arc.thermo_db import (find_thermo_libs,
                                              load_thermo_database,
                                              load_thermo_lib_by_path,
                                              merge_thermo_lib,
                                              )

%load_ext autoreload
%autoreload 2
%matplotlib inline

### 2. Input
Merge the libraries from working directory and save the new library into the save path. If the save path is orignially a library, then merge everything on this library.
- `save_path` (str): the **full path (to file) to save the merged library**. If the path exists, the file will be overwriten.
- `work_dir` (str): indicate **where your ARC projects are**. The script will search and find all of the ARC project and their thermo libraries. 
- `complementary_ARC_project_paths` (list): a list of **complementary ARC projects paths** which is not in the `work_dir`
- `complementary_lib_paths` (list): a list of **complementary thermo library file paths** to be merged, so that you can also merge files that are not generated by ARC
- `rmg_builtin_libraries` (list): a list of thermo **library names**. These are libraries placed in `RMG-database/input/thermo/libraries/`

In [None]:
save_path = '/Users/xiaorui/Dropbox/RMG/Co-OPTIMA shared/relax-rotor/Thermo/thermo.py'
work_dir = '/Users/xiaorui/Dropbox/RMG/Co-OPTIMA shared/relax-rotor/Arkane_Species'
complementary_ARC_project_paths = []
complementary_lib_paths = []
rmg_builtin_libraries = []

One can also use `glob` to quickly find all `thermo.py` in certain path

In [None]:
# complementary_lib_paths = glob.glob('/Users/xiaorui/Dropbox/RMG/Co-OPTIMA shared/relax-rotor/Arkane_Species/**/thermo.py', recursive=True)

### 3. Load a RMG database instance

In [None]:
thermo_db = load_thermo_database(libraries=rmg_builtin_libraries)
lib_list = find_thermo_libs(work_dir) + complementary_lib_paths
for arc_project in complementary_ARC_project_paths:
    lib_list += find_thermo_libs(arc_project)

for lib in lib_list:
    try:
        load_thermo_lib_by_path(path=lib,
                                thermo_db=thermo_db,
                                reload=False)
    except TypeError:
        # Problematic library
        print(f'Find a problematic library: {lib}')
        raise

lib_list = lib_list + rmg_builtin_libraries

### 4. Initalize the library
Initalize the library to be saved and library contains thermo to be reconsidered

In [None]:
base_lib = ThermoLibrary(label='The merging library',
                         name='The merged library')

# Also create a library to store thermo if there 
# is a conflict and you haven't decided whether to add it
tbd_lib = ThermoLibrary(label='TBD thermos',
                        name='TBD thermos',)

### 5. Combine the thermo libraries 

In [None]:
for lib_label in lib_list:
    library_to_add = thermo_db.libraries[lib_label]
    merge_thermo_lib(base_lib, library_to_add, tbd_lib)

### 6. Save the libraries

In [None]:
base_lib.save(save_path)
tbd_lib.save(save_path + '.tbd')

### 7. Generate Parity plot

In [None]:
import matplotlib.pyplot as plt
from ipywidgets import *

from rmgpy.data.thermo import ThermoDatabase
from rmgpy.species import Species

In [None]:
thermo_db = ThermoDatabase()
thermo_db.load(os.path.join(settings['database.directory'], 'thermo'),
               [
                   'primaryThermoLibrary',
                   'Klippenstein_Glarborg2016',
                   'thermo_DFT_CCSDTF12_BAC',
                   'BurkeH2O2',
                   'DFT_QCI_thermo',
                   'Narayanaswamy',
                   'BurcatNS',
                   'CHO',
               ])

In [None]:
temp = 1000  # Kelvin
rmg = []
libs = []

for spc in base_lib.entries.values():
    species = Species().from_smiles(spc.label)
    rmg_data = thermo_db.get_all_thermo_data(species)[0][0]
    rmg.append(rmg_data.get_free_energy(temp) / 1000)
    libs.append(spc.data.get_free_energy(temp) / 1000)

In [None]:
fig = plt.figure(figsize=(5,5))
ax = fig.add_subplot(1, 1, 1)

ax.plot(rmg, libs, 'b.')

ax.plot([-1e4, 1e4], [-1e4*1.05, 1e4*1.05], 'r--')
ax.plot([-1e4, 1e4], [-1e4*0.95, 1e4*0.95], 'r--')

lims = [min(rmg + libs), max(rmg + libs)]
ax.set_xlim(lims)
ax.set_ylim(lims)
ax.set_xlabel('RMG dG [kJ/mol]')
ax.set_ylabel('Lib dG [kJ/mol]')