In [None]:
from pickaxe_generic.engine import create_engine
from rdkit import Chem
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import Draw
from IPython.display import SVG

In [None]:
# create engine; the engine is the source of all network objects and handles configuration
engine = create_engine()

# get library objects from engine; must be in this order to have correct initializers
mol_lib, op_lib, rxn_lib = engine.Libs()

# initial molecule and operator set

mol_smiles = (  # reactant
    "CCOC(=O)C(C)O",
    # helper molecules
    "O",
    "OO",
    "I",
    "C=C",
    "[C-]#[O+]",
    "BrCCBr",
    "OC=O",
    "COC=O",  # known
    "CO",
    "Br",
    "[Br-]",
    "N",
    "C(=O)=O",
    "CC=O",
    "C1C(O1)CCl",
    "C=CCBr",  # lauren
    "OCCO",
    "CC(O)=O",
    "OC(CCC)=O",
    "CC(O)C=C",
)

op_smarts = (
    # hydrogenation of alkene
    "[C+0:1]=[C+0:2].[H][H]>>[*:1][*:2]",
    # hydrogenation of ketone
    "[C+0:1]=[O+0:2].[H][H]>>[*:1][*:2]",
    # hydrolysis of ether
    "[*+0:1][O+0:2]!@[*+0:3].[O+0H2:4]>>[*:1][*:2].[*:3][*:4]",
    "[*+0:1][O+0:2]@[*+0:3].[O+0H2:4]>>([*:1][*:2].[*:3][*:4])",
    # keto-enol tautomerization
    "[C+0H:1][C+0:2]=[O+0:3]>>[*:1]=[*:2][*:3]",
    "[C+0:1]=[C+0:2][O+0H:3]>>[*:1][*:2]=[*:3]",
    # epoxidation of alkene
    "[C+0:1]=[C+0:2].[O+0H:3][O+0H:4]>>[*:1]1[*:2][*:3]1.[*:4]",
    # hydrogenolysis of ether
    "[C+0:1]!@[O+0:2].[H][H]>>[*:1].[*:2]",
    "[C+0:1]@[O+0:2].[H][H]>>([*:1].[*:2])",
    # hydrodeoxygenation of alcohol
    "[C,c;+0:1][O+0H:2].[H][H]>>[*:1].[*:2]",
    # hydrolysis of ester
    "[C+0:1](=[O+0:2])[O+0:3]!@[*+0:4].[O+0H2:5]>>[*:1](=[*:2])[*:3].[*:4][*:5]",
    "[C+0:1](=[O+0:2])[O+0:3]@[*+0:4].[O+0H2:5]>>([*:1](=[*:2])[*:3].[*:4][*:5])",
    # dehydration of alcohol
    "[C+0H:1][C+0:2][O+0H:3]>>[*:1]=[*:2].[*:3]",
    # hydration of alkene
    "[C+0:1]=[C+0:2].[O+0H2:3]>>[*:1][*:2][*:3]",
    # decarbonylation of aldehyde
    "[*+0:1][C+0H:2]=[O+0:3]>>[*:1].[*-:2]#[*+:3]",
    # decarboxylation
    "[*+0:1][C+0:2](=[O+0:3])[O+0H:4]>>[*:1].[*:3]=[*:2]=[*:4]",
    # ketonization
    "[*+0:1][C+0:2](=[O+0:3])[O+0H:4].[*+0:5][C+0:6](=[O+0:7])[O+0H:8]>>[*:1][*:2](=[*:3])[*:5].[*:4]=[*:6]=[*:7].[*:8]",
    # esterification
    "[*+0:1][C+0:2](=[O+0:3])[O+0H:4].[*+0:5][O+0H:6]>>[*:1][*:2](=[*:3])[*:6][*:5].[*:4]",
    # aldehyde+alcohol <-> ester equilibration
    "[O+0H:1].[O+0:2]=[C+0H:3]>>[*:1][*:3][*:2]",
    "[O+0H0:1]!@[C+0:2][O+0H:3]>>[*:1].[*:2]=[*:3]",
    "[O+0H0:1]@[C+0:2][O+0H:3]>>([*:1].[*:2]=[*:3])",
    # aldol condensation
    "[*+0:1][C+0:2](=[O+0:3])[C+0H3:4].[O+0:5]=[C+0H:6][*+0:7]>>[*:1][*:2](=[*:3])[*:4][*:6]([*:5])[*:7]",
    "[*+0:1][C+0:2](=[O+0:3])[C+0H3:4].[O+0:5]=[C+0H:6][*+0:7]>>[*:1][*:2](=[*:3])[*:4]=[*:6][*:7].[*:5]",
    # ketone amination
    "[*+0:1][C+0:2](=[O+0:3])[*+0:4].[*+0:5][N+0H:6][*+0:7]>>[*:1][*:2]([*:6]([*:5])[*:7])[*:4].[*:3]",
    # selective oxidation of alcohols/aldehydes
    "[C+0H:1][O+0H:2]>>[*:1]=[*:2].[H][H]",
    "[C+0H:1]=[O+0:2].[O+0H:3][O+0H:4]>>[*:1](=[*:2])[*:3].[*:4]",
    # Williamson ether synthesis
    "[*+0:1][O+0H:2].[*+0:3][F,Cl,Br,I;+0:4]>>[*:1][*:2][*:3].[*:4]",
    # diol carboxylation
    "[O+0H:1][C+0:2][C+0:3][O+0H:4].[O+0:5]=[C+0:6]=[O+0:7]>>[*:1].[*:2]1[*:3][*:4][*:6](=[*:7])[*:5]1",
    # McMurry reaction
    "[*+0:1][C+0:2](=[O+0:3])[*+0:4].[*+0:5][C+0:6](=[O+0:7])[*+0:8]>>[*:1][*:2]([*:4])=[*:6]([*:5])[*:8].[*:3]=[*:7]",
)

# add initial molecules and operators to libraries
for smiles in mol_smiles:
    mol_lib.add(engine.Mol(smiles))
for smarts in op_smarts:
    op_lib.add(engine.Op(smarts))

# attach a "strategy" or "expansion procedure" to the library objects using the engine
strat = engine.CartesianStrategy(mol_lib, op_lib, rxn_lib)

# run a single "generation", which here means a single cartesian product step
strat.expand(num_gens=2)

print(f"{len(mol_lib) - len(mol_smiles)}" new molecules after 2 generations)
print(f"{len(mol_lib) - len(mol_smiles)}" reactions after 2 generations)

In [None]:
Chem.MolFromSmiles("COC(CCC)=C(C)OC(O)C")

In [None]:
mol_list = []
# print values of molecules in mol_lib
for mol in mol_lib:
    print(mol.smiles)
    mol_list.append(mol.rdkitmol)
    # Draw.ShowMol(mol.rdkitmol)
# print the reactions (hyperedges) which connect molecules in the network
for rxn in rxn_lib:
    print(rxn)

SVG(Draw._MolsToGridSVG(mol_list))

In [None]:
def mol_lib_to_file(mol_lib, filepath):
    with open(filepath, "w") as file:
        file.write("SMILES,\n")
        for mol in mol_lib:
            file.write(mol.smiles)
            file.write(",\n")


mol_lib_to_file(mol_lib, "test.csv")

In [None]:
from pickaxe_generic.utils import RxnTrackerDepthFirst

doubletracker = RxnTrackerDepthFirst(rxn_lib)
print(len(rxn_lib))
n = 0
for chain in doubletracker.getParentChains(
    engine.Mol("COC(CCC)=C(C)OC(O)C").uid,
    reagent_table=[engine.Mol(smiles).uid for smiles in mol_smiles],
    max_depth=2,
):
    g = 0
    for gen in chain:
        print(f"Generation {g}:")
        for rxnid in gen:
            print(rxn_lib[rxnid])
        g += 1
    print()