In [68]:
from rxntools import utils
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
from ergochemics import mapping
from typing import List

In [87]:
def make_rule_id(n: int, prefix: str = "rule", width: int = 4) -> str:
    """
    Convert an integer into a zero-padded rule ID of the form 'rule0001'.

    Args:
        n (int): The integer to convert.
        prefix (str): Optional prefix before the number. Defaults to "rule".
        width (int): Zero-padding width. Defaults to 4.

    Returns:
        str: Formatted rule ID (e.g., 'rule0004').
    """
    if n < 1:
        raise ValueError("Input must be >= 1.")
    return f"{prefix}{n:0{width}d}"


In [80]:
# extract and create a list of all minimal operators' SMARTS strings
gen_rxn_operators_df = pd.read_csv("../data/raw/JN1224MIN_rules.tsv", delimiter='\t')
gen_rxn_operators_list: List[str] = gen_rxn_operators_df["SMARTS"].to_list()

# extract and create a list of all unmapped MetaCyc reactions
EnzymeMap_MetaCyc_rxns_df = pd.read_csv("../data/raw/enzymemap_MetaCyc_processed.csv")
EnzymeMap_MetaCyc_rxns_unmapped: List[str] = EnzymeMap_MetaCyc_rxns_df["unmapped"].to_list()

# remove all hydrogen ions from rxn strings so that they can be mapped by Stefan's ergochemics
EnzymeMap_MetaCyc_rxns_cleaned: List[str] = []

for rxn in EnzymeMap_MetaCyc_rxns_unmapped:
    rxn = rxn.replace(".[H+]","").replace("[H+].","")
    EnzymeMap_MetaCyc_rxns_cleaned.append(rxn)

In [91]:
# run mapping with generalized reaction rules
# initialize an empty list to store all possible reaction rule mappings per reaction
all_mapped_operators: List[List[str]] = []

# for every reaction,
for rxn in EnzymeMap_MetaCyc_rxns_cleaned[0:5]:

    # initialize an empty list to store which operators this reaction can map to 
    mapped_operators: List[str] = []

    for i, operator in enumerate(gen_rxn_operators_list):
        try:
            mapped_rxn: mapping.OperatorMapResult = mapping.operator_map_reaction(rxn=rxn, operator=operator)
            if mapped_rxn.did_map:
                mapped_operators.append(make_rule_id(i+1))
        except:
            pass
    all_mapped_operators.append(mapped_operators)

In [92]:
all_mapped_operators

[['rule0002', 'rule0754'],
 ['rule0003', 'rule0753'],
 ['rule0003'],
 ['rule0003', 'rule0348'],
 ['rule0002', 'rule0347']]