In [None]:
from network import create_engine
from rdkit import Chem
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import Draw
from IPython.display import SVG

## Adipic acid

In [None]:
# Adipic acid (Pubchem CID: 196)

ADPA = Chem.MolFromSmiles('O=C(O)CCCCC(=O)O')
ADPA

### C3 Chemistry

### Ethyl lactate to Adipic acid

In [None]:
# create engine; the engine is the source of all network objects and handles configuration
engine = create_engine()

# get library objects from engine; must be in this order to have correct initializers
mol_lib, op_lib, rxn_lib = engine.Libs()

# initial molecule and operator set

mol_smiles = (# reactant 
            "CCOC(=O)C(C)O",
              
            # helper molecules
            "O","OO","I","C=C","[C-]#[O+]","BrCCBr","OC=O","COC=O", #known
            "CO","Br","[Br-]","N","C(=O)=O","CC=O","C1C(O1)CCl","C=CCBr", #lauren
            "OCCO","CC(O)=O","OC(CCC)=O","CC(O)C=C",          
             )

op_smarts = ( # known pathway
    #"[C:1](=[O:2])[O:3].[OH2:4]>>[C:1](=[O:2])[OH:4].[OH:3]", #hydrolysis
    #"[CH3:1][CH:2]([OH:3])>>[C:1]=[C:2].[OH2:3]", #dehydration
    #"[C:1]=[C:2].[HI:3]>>[I:3][C:1][C:2]", #addition
    #reductive coupling of alkyl halides
    #"[I:1][CH2:2][CH2:3][C!H:7].[I:6][CH2:4][CH2:5][C!H:8]>>[C:8][C:5][C:4][C:2][C:3][C:7].[I:1][I:6]",
    
    #all reaction operators
    "[C:1]=[C:2]>>[C:1][C:2]", #hydrogenatation
    "[CH0:1]=[O:2]>>[C:1][O:2]", #hydrogenation
    
    "[C:1][O:2][C:3]>>[C:1].[C:3][O:2]", #hydrogenolysis
    "[C:1][OH:2]>>[C:1].[OH2:2]", #hydrodeoxygenation
    
    "[C:1][O:2][C:3].[OH2:4]>>[C:1][O:2].[C:3][O:4]", #hydrolysis 2
    
    "[CH3:1][C:2][OH:3]>>[C:1]=[C:2].[OH2:3]", #dehydration
    "[C:1]=[C:2].[OH2:3]>>[C:1][C:2][O:3]", #hydration
    
    "[C:1][C:2](=[O:3])[H]>>[C:1].[C:2]#[O+1:3]", #decarbonylation
    
    "[C:1][C:2](=[O:3])([OH:4])>>[C:1].[C:2](=[O:3])(=[O:4])", #decarboxylation
    
"[C:7][C:1](=[O:2])[OH:3].[C:8][C:4](=[O:5])[OH:6]>>[C:7][C:1](=[O:2])[C:8].[C:4](=[O:5])(=[O:6]).[OH2:3]", #ketonization
    
    "[C:1](=[O:2])[OH:3].[C:4][OH:5]>>[C:1](=[O:2])[O:5][C:4].[OH2:3]", #esterification
    "[C!H0:1][C:2]=[O:3]>>[C:1]=[C:2][OH:3]", #keto-enol tautomerization
    "[C:1](=[O:2])[CH3:3].[CH:4](=[O:5])>>[C:1](=[O:2])[C:3][C:4]([O:5])", #aldol condensation 1
    "[C:1](=[O:2])[CH3:3].[CH:4](=[O:5])>>[C:1](=[O:2])[C:3]=[C:4].[OH2:5]", #aldol condensation 2
    "[C:1]=[C:2][C:3]=[C:4].[C:5]=[C:6]>>[C:1]1[C:2]=[C:3][C:4][C:5][C:6]1", #diels-alder
    "[C:1](=[O:2]).[N!H0:3]>>[C:1][N:3].[OH2:2]", #ketone amination
    "[C:1]=[C:2]>>[C:1]1[O][C:2]1", #epoxidation
    
    "[CH2:1][OH:2]>>[C:1](=[O:2])", #selective oxidation 1
    "[*:3][CH:1]([OH:2])([*:4])>>[*:3][C:1](=[O:2])([*:4])", #selective oxidation 2
    "[C:1](=[O:2])[H]>>[C:1](=[O:2])([OH])", #selective oxidation 3
    
    
    "[C:1]=[C:2].[C:3]#[O:4].[H2]>>[O+0:4]=[C+0H:3][C:1][CH:2].[CH:1][C:2]([C+0H:3]=[O+0:4])", #hydroformylation
    
    "[C:1](=[O:2])[*;!O].[OH:5][OH:6]>>[C:1](=[O:2])[O:5].[O:6]", #baeyer-villiger oxidation 1
    "[C:1](=[O:2])@[C:3].[OH:5][OH:6]>>[C:1](=[O:2])[O:5][C:3].[O:6]", #baeyer-villiger oxidation 2
    "[*:1][C:2]([OH:3])[C:4]([OH:5])[*:6]>>[*:1][C:2]=[C:3][*:6].[O:3].[OH2:5]", #deoxydehydration
    #crossed claisen condensation
    "[*:1][C!H:2](=[O:3])[*:4].[*:5][O:6][CH:7](=[O:8])[*:9]>>[*:1][C:2](=[O:3])[*:4][C:7](=[O:8])[*:9].[*:5][O:6]",
    "[C:1][C:2].[I:3][I:4]>>[C:1][I:3].[C:2][I:4]", #transmetalation
    "[C:1][C:2].[Br:3][Br:4]>>[C:1][Br:3].[C:2][Br:4]", #transmetalation
    "[C:1][I:2].[C:3][I:4]>>[C:1][C:3].[I:2][I:4]", #reductive coupling of alkyl halides
    "[C:1][Br:2].[C:3][Br:4]>>[C:1][C:3].[Br:2][Br:4]", #reductive coupling of alkyl halides
    
    "[*:1][C:2]([*:3])=[C:4][C:5]>>[*:1][C:2](=[O])[*:3].[*:5]=[O]", #oxidative cleavage 1
    "[*:1][C:2]([*:3])=[C:4][C:5]>>[*:1][C:2](=[O])[*:3].[*:5](=[O])[O]", #oxidative cleavage 2
    "[*:1][C:2]#[C:3][*:4]>>[*:1][C:2](=[O])[O].[*:4][C:3](=[O])[O]", #oxidative cleavage 3
    "[*:1][C:2]([*:3])([O:4])[C:5]([*:6])([O:7])>>[*:1][C:2](=[O:4])[*:3].[*:6][C:5](=[O:7])", #oxidative cleavage 4
    
    "[C:1]=[C:2].[C:3]#[O:4].[OH2:5]>>[C:1][C:2][C+0:3](=[O+0:4])[O:5]", #hydroxycarbonylation 1
    "[C:1]#[C:2].[C:3]#[O:4].[OH2:5]>>[C:1]=[C:2][C+0:3](=[O+0:4])[O:5]", #hydroxycarbonylation 2
    "[*:1][C:2]=[C:3][*:4].[*:5][C:6]=[C:7][*:8]>>[*:1][C:2]=[C:6][*:5].[*:4][C:3]=[C:7][*:8]", #olefin metathesis
    )

# add initial molecules and operators to libraries
for smiles in mol_smiles:
    mol_lib.add(engine.Mol(smiles))
for smarts in op_smarts:
    op_lib.add(engine.Op(smarts))

# attach a "strategy" or "expansion procedure" to the library objects using the engine
strat = engine.CartesianStrategy(mol_lib,op_lib,rxn_lib)

# run a single "generation", which here means a single cartesian product step
strat.expand(num_gens=2)

print(len(mol_lib)-len(mol_smiles))
print(len(rxn_lib))

90 products and 120 reactions after 1st generation

2511 products and 4672 reactions after 2nd generation

In [None]:
Chem.MolFromSmiles('O=CCC(O)O')

In [None]:
mol_list = []
# print values of molecules in mol_lib
for mol in mol_lib:
    print(mol)
    mol_list.append(mol.rdkitmol)
    #Draw.ShowMol(mol.rdkitmol)
# print the reactions (hyperedges) which connect molecules in the network
for rxn in rxn_lib:
    print(rxn)

SVG(Draw._MolsToGridSVG(mol_list))

In [None]:
def mol_lib_to_file(mol_lib,filepath):
    with open(filepath,'w') as file:
        file.write('SMILES,\n')
        for mol in mol_lib:
            file.write(mol.smiles)
            file.write(',\n')

mol_lib_to_file(mol_lib,"test.csv")

In [None]:
from network import RxnTrackerSingle
tracker = RxnTrackerSingle(rxn_lib)
print(len(rxn_lib))
n = 0
max_paths = 25
for chain in tracker.getParentChains("O=C(O)CCCCC(=O)O"):
    n += 1
    for rxnid in chain:
        print(rxn_lib[rxnid])
    print()
    if n >= max_paths:
        print("Path limit reached!")
        break

In [None]:
from network import RxnTrackerDepthFirst
doubletracker = RxnTrackerDepthFirst(rxn_lib)
print(len(rxn_lib))
n = 0
max_paths = 25
for chain in doubletracker.getParentChains("C=CCCCBr"):
    n += 1
    g = 0
    for gen in chain:
        print(f"Generation {g}:")
        for rxnid in gen:
            print(rxn_lib[rxnid])
        g += 1
    print()
    if n >= max_paths:
        print("Path limit reached!")
        break