In [None]:
import rmgpy
import numpy as np
from rmgpy.molecule.molecule import *
from rmgpy.species import *
from rmgpy.chemkin import *
from rmgpy.data.rmg import RMGDatabase
from IPython.display import display
from rmgpy.data.thermo import ThermoLibrary
from rmgpy.rmg.react import react
from rmgpy.species import Species
from rmgpy.reaction import Reaction
from rmgpy.data.rmg import get_db
from rmgpy.molecule.group import Group
from rmgpy.kinetics.arrhenius import ArrheniusBM
from rmgpy import settings  
import time
import matplotlib.pyplot as plt
import matplotlib

In [None]:
thermo_libs = [
'primaryThermoLibrary',
'Fluorine',
'FFCM1(-)',
'halogens',
'CHOF_G4',
'CHOCl_G4',
'CHOBr_G4',
'CHOFCl_G4',
'CHOFBr_G4',
'CHOFClBr_G4',
'DFT_QCI_thermo',
'2-BTP_G4',
'thermo_DFT_CCSDTF12_BAC',
'SulfurHaynes'
]

kin_families = ['Retroene']


database = RMGDatabase()
database.load(
            path = settings['database.directory'],
            thermo_libraries = thermo_libs,
            transport_libraries = [],
            reaction_libraries = [],
            seed_mechanisms = [],#['BurkeH2O2inN2','ERC-FoundationFuelv0.9'],
            kinetics_families = kin_families,
            kinetics_depositories = ['training'],
            #frequenciesLibraries = self.statmechLibraries,
            depository = False, # Don't bother loading the depository information, as we don't use it
        )

family_to_train = "Retroene"
family = database.kinetics.families[family_to_train]

In [None]:
family.clean_tree()
start = time.time()
family.generate_tree(thermo_database=database.thermo,
                     nprocs=1,
                     new_fraction_threshold_to_reopt_node=0.25,
                     max_batch_size=800,
                     extension_iter_max=2,
                     extension_iter_item_cap=100)

end = time.time()
print(f"Execution time: {end-start:.4f} s")
print(f"{len(family.groups.entries)} entries in the group")
print(family.groups.entries)


In [None]:

#these are some nodes thatare not regularized correctly 
print('Before regularization:')
for node_label, node in family.groups.entries.items():

    if node_label == 'Root_4R!H->O':
        print('extension node')
        print(node_label)
        noncompliment = node
        print(node.item.to_adjacency_list())
    if node_label == "Root_N-4R!H->O":
        print('compliment node')
        print(node_label)
        compliment_node_to_look_at=node
        node_parent = node.parent
        print(node.item.to_adjacency_list())
    


In [None]:

start = time.time()
family.regularize(thermo_database=database.thermo)
end = time.time()
print(f"Execution time: {end-start:.4f} s")


print('After regularization:')
for node_label, node in family.groups.entries.items():

    if node_label == 'Root_4R!H->O':
        print('extension node')
        print(node_label)
        noncompliment = node
        print(node.item.to_adjacency_list())
    if node_label == "Root_N-4R!H->O":
        print('compliment node')
        print(node_label)
        compliment_node_to_look_at=node
        node_parent = node.parent
        print(node.item.to_adjacency_list())

#if regularization was successful, compliment node will have atomtypes that are narrowed down for *5
#if not, compliment node will look the same.

In [None]:
#lastly, we need to make sure that the entire tree checks its isomorphism check.
start = time.time()
family.check_tree()
end = time.time()
print(f"Execution time: {end-start:.4f} s")

In [None]:
#for further debugging, the templateRxnMap is very useful 
start = time.time()
templateRxnMap = family.get_reaction_matches(thermo_database=database.thermo,remove_degeneracy=True,
                                             get_reverse=True,exact_matches_only=False,fix_labels=True)
end = time.time()
print(end-start)

#if you want to debug the simple_regularization function in isolation, you'll need the templaterxnmap 
family.simple_regularization(compliment_node_to_look_at, templateRxnMap, test=True)