In [3]:
import json
import pandas as pd
from typing import List
from rxntools import reaction

In [4]:
reported_rxns_df = pd.read_parquet("../data/interim/enzymemap_MetaCyc_JN_mapped.parquet")

with open('../data/raw/cofactors.json') as f:
    cofactors_dict = json.load(f)
cofactors_list: List[str] = [cofactors_dict[key] for key in cofactors_dict.keys()]


In [5]:
query_rule = 'rule0002'
query_df = reported_rxns_df[reported_rxns_df['top_mapped_operator']==query_rule]
atom_mapped_rxns_list: List[str] = query_df['mapped'].to_list()

In [None]:
radius = 1
include_stereo = True
all_rxn_templates: List[str] = [] # initialize list to store all reaction templates extracted

# for each fully atom-mapped reaction
for rxn_SMARTS in atom_mapped_rxns_list:

    # create an instance of the reaction.mapped reaction class then extract atoms undergoing bond changes
    mapped_rxn = reaction.mapped_reaction(rxn_SMARTS) 
    changed_atoms, broken_bonds, formed_bonds = mapped_rxn.get_all_changed_atoms(include_cofactors=False, # set to False since we dont want changed cofactor atoms
                                                                                 consider_stereo=True,
                                                                                 cofactors_list=cofactors_list)
    
    substrates_list = mapped_rxn.get_substrates(cofactors_list=cofactors_list, consider_stereo=False)
    products_list = mapped_rxn.get_products(cofactors_list=cofactors_list, consider_stereo=False)

    # initialize an empty string to start building the reaction templates
    rxn_template = ''

    # check if reaction involves only a single substrate and single product
    if len(substrates_list)==1 and len(products_list)==1:
    
        # extract a template around the substrate
        substrate_template = mapped_rxn.get_template_around_rxn_site(atom_mapped_substrate_smarts=substrates_list[0],
                                                                    reactive_atom_indices=list(changed_atoms),
                                                                    radius=radius,
                                                                    include_stereo=include_stereo)
        
        # extract a template around the product
        product_template = mapped_rxn.get_template_around_rxn_site(atom_mapped_substrate_smarts=products_list[0],
                                                                   reactive_atom_indices=list(changed_atoms),
                                                                   radius=radius,
                                                                   include_stereo=include_stereo)
    break



In [11]:
substrate_template

'[C&H3:1][C&H2:2][O&H1:3]'

In [12]:
product_template

'[C&H3:1][C&H1:2]=[O:3]'