In [1]:
# pip install pickle5 pandas biopython matplotlib drawSvg bs4
import io, os, re, pickle5, time, math, urllib, Bio.PDB, sys, pandas as pd
from rdkit import Chem, RDConfig
from rdkit.Chem import AllChem, PandasTools, Draw, rdDepictor
from rdkit.Chem.Draw import IPythonConsole, rdMolDraw2D, MolDraw2D
from rdkit.Geometry.rdGeometry import Point3D
PandasTools.RenderImagesInAllDataFrames(images=True)
#pd.set_option('display.max_columns', None)
#pd.set_option('display.max_rows', None)
from IPython.core.display import display, HTML, SVG
display(HTML("<style>.container { width:100% !important; }</style>")) 
df = pd.read_pickle('df_cov_sanitized.pkl') 
df['adduct_pdb'] = df.apply(lambda x: Chem.ReplaceSubstructs(x.adduct_pdb, Chem.MolFromSmarts('[At]'), Chem.MolFromSmiles(f'{x.aa_bond_atom_name[0]}[At]'), replaceAll=True)[0], axis=1)

from matplotlib.colors import ColorConverter 
def get_shape(root):
    _1,_2,width,height=root.attrib['viewBox'].split()
    return int(width), int(height) 

import xml.etree.ElementTree as ET
def drawReactionInSVG(rctn1_svg, rctn2_svg, prdct_svg): 
    rctn1_root = ET.fromstring(rctn1_svg)
    rctn2_root = ET.fromstring(rctn2_svg)
    prdct_root = ET.fromstring(prdct_svg)
    
    rctn1_width, rctn1_height = get_shape(rctn1_root)
    rctn2_width, rctn2_height = get_shape(rctn2_root)
    prdct_width, prdct_height = get_shape(prdct_root)
    max_height  = max([rctn1_height, rctn2_height, prdct_height,10])
    half_height = max_height/2
    full_width  = rctn1_width + 10 + rctn2_width + 40 + prdct_width 
    
    svg = ET.Element('svg', attrib={'viewBox':f'0 0 {full_width} {max_height}'})
    
    rctn1_symbol = ET.SubElement(svg, 'symbol', attrib={'id':'rctn1'}) 
    for child in rctn1_root:
        #if child.tag.endswith('rect'): child.set('style', 'fill:black;stroke:1')
        if child.tag=='ellipse':
            child.set('rx', '16')
            child.set('ry', '16')
            child.set('style', 'fill:none;stroke:red;stroke-width:4px;stroke-opacity:0.3')
        rctn1_symbol.append(child)
        
    plus_symbol  = ET.SubElement(svg, 'symbol', attrib={'id':'plus'})
    _ = ET.SubElement( plus_symbol, 'line', attrib={'x1':'0', 'x2':'10', 'y1': '5', 'y2': '5',   'stroke':'rgb(0, 0, 0)', 'stroke-width':'1'})
    _ = ET.SubElement( plus_symbol, 'line', attrib={'x1':'5', 'x2': '5', 'y1': '0', 'y2':'10',   'stroke':'rgb(0, 0, 0)', 'stroke-width':'1'})
    
    rctn2_symbol = ET.SubElement(svg, 'symbol', attrib={'id':'rctn2'}) 
    for child in rctn2_root:
        #if child.tag.endswith('rect'): child.set('style', 'fill:black;stroke:1')
        rctn2_symbol.append(child)
    
    arrow_symbol = ET.SubElement(svg, 'symbol', attrib={'id':'arrow'})
    _ = ET.SubElement(arrow_symbol, 'line', attrib={'x1':'0', 'x2': '35', 'y1':'5', 'y2': '5',   'stroke':'rgb(0, 0, 0)', 'stroke-width':'1'})
    _ = ET.SubElement(arrow_symbol, 'path', attrib={'d':'M 25 1 L 40 5 L 25 9 C 30 5 30 5 25 1', 'stroke-miterlimit':'1', 'stroke-width':'1'})
    
    prdct_symbol = ET.SubElement(svg, 'symbol', attrib={'id':'prdct'}) 
    for child in prdct_root:
        #if child.tag.endswith('rect'): child.set('style', 'fill:black;stroke:1')
        prdct_symbol.append(child)    
         
    _ = ET.SubElement(svg, 'use', attrib={'href': '#rctn1', 'x':'0'                                , 'y':str(half_height-rctn1_height/2)})
    _ = ET.SubElement(svg, 'use', attrib={'href':  '#plus', 'x':str(rctn1_width)                   , 'y':str(half_height-5)})
    _ = ET.SubElement(svg, 'use', attrib={'href': '#rctn2', 'x':str(rctn1_width+10)                , 'y':str(half_height-rctn2_height/2)})
    _ = ET.SubElement(svg, 'use', attrib={'href': '#arrow', 'x':str(rctn1_width+10+rctn2_width)    , 'y':str(half_height-4)})
    _ = ET.SubElement(svg, 'use', attrib={'href': '#prdct', 'x':str(rctn1_width+10+rctn2_width+40) , 'y':str(half_height-prdct_height/2)})
     
    return ET.tostring(svg, encoding="unicode") 

In [2]:
op = rdMolDraw2D.MolDrawOptions() # https://rdkit.org/docs/source/rdkit.Chem.Draw.rdMolDraw2D.html?#rdkit.Chem.Draw.rdMolDraw2D.MolDrawOptions
op.bondLineWidth=2
op.maxFontSize=18
op.minFontSize=18
#op.legendFontSize=18
op.fillHighlights=False
op.fixedBondLength=24
op.centreMoleculesBeforeDrawing=True
op.atomHighlightsAreCircles = True
op.fontFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'Draw', 'FreeSans.ttf') # Amadeus.ttf | Telex-Regular.ttf

def MolToSVG(res,atom): 
    m = Chem.MolFromSmiles(f'C{atom[0]}')
    m = rdMolDraw2D.PrepareMolForDrawing(m)
    drawer = rdMolDraw2D.MolDraw2DSVG(100, 54)
    drawer.SetDrawOptions(op) 
    opts = drawer.drawOptions()
    opts.atomLabels[0]=res
    opts.atomLabels[1]=atom
    drawer.DrawMolecule(m)
    drawer.FinishDrawing()
    return drawer.GetDrawingText().replace('xmlns=','this_is_garbage=')

Acceptable_Nucleophile = [('ASP', 'OD1'), ('ASP', 'OD2'), ('CYS', 'SG'), ('GLU', 'OE1'), ('GLU', 'OE2'), ('HIS', 'ND1'), ('HIS', 'NE2'), ('LYS', 'NZ'), ('MET', 'SD'),  ('THR', 'OG1'), ('SER', 'OG'), ('TYR', 'OH')]
Acceptable_Nucleophile = [('ASP', 'OH'), ('CYS', 'SH'), ('GLU', 'OH'), ('GLU', 'OH'), ('HIS', 'N'), ('HIS', 'N'), ('LYS', 'NH2'), ('MET', 'SCH3'),  ('THR', 'OH'), ('SER', 'OH'), ('TYR', 'OH')]
Amino_Acids_SVGs = {i:MolToSVG(i,j) for i, j in Acceptable_Nucleophile}

In [3]:
op = rdMolDraw2D.MolDrawOptions() # https://rdkit.org/docs/source/rdkit.Chem.Draw.rdMolDraw2D.html?#rdkit.Chem.Draw.rdMolDraw2D.MolDrawOptions
op.bondLineWidth=2
op.maxFontSize=18
op.minFontSize=18
#op.legendFontSize=18
op.fillHighlights=False
op.fixedBondLength=24
op.centreMoleculesBeforeDrawing=True
op.atomHighlightsAreCircles = True
op.fontFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'Draw', 'FreeSans.ttf') # Amadeus.ttf | Telex-Regular.ttf

def MolToSVG(m):
    if not m.GetNumConformers():
        rdDepictor.Compute2DCoords(m)

    drawer = rdMolDraw2D.MolDraw2DSVG(5000,5000)
    drawer.SetDrawOptions(op)
    drawer.DrawMolecule(m)
    drawer.FinishDrawing()

    _ = re.findall("(?<=d\=\').*?(?=\')",  drawer.GetDrawingText(), re.DOTALL)
    _ = ' '.join(_)
    _ = re.sub('[a-zA-Z,\n]', ' ', _)
    _ = [float(f) for f in _.split()]
    _ = int(max(_) - min(_)) + 1

    drawer = rdMolDraw2D.MolDraw2DSVG(_, _)
    drawer.SetDrawOptions(op)
    drawer.DrawMolecule(m)
    drawer.FinishDrawing()
    return drawer.GetDrawingText().replace('xmlns=','this_is_garbage=')


color_code_red = ColorConverter().to_rgb('red')
def get_proper_size(m):
    drawer = rdMolDraw2D.MolDraw2DSVG(5000,5000)
    #drawer.ClearDrawing()
    drawer.SetDrawOptions(op)
    m = rdMolDraw2D.PrepareMolForDrawing(m)
    drawer.DrawMolecule(m)
    drawer.FinishDrawing()

    _ = re.findall("(?<=d\=\').*?(?=\')",  drawer.GetDrawingText(), re.DOTALL)
    _ = ' '.join(_)
    _ = re.sub('[a-zA-Z,\n]', ' ', _)
    _ = [float(f) for f in _.split()]
    _ = int(max(_) - min(_)) + 1
    return _, _

svg_collector = {}
for _, row in df[df['warhead_smiles'].notna()].iterrows():
    binder_pdb = row['binder_pdb']
    try:
        warhead_atom_idxs = binder_pdb.GetSubstructMatches(Chem.MolFromSmarts(row['warhead_smiles']))
    except:
        warhead_atom_idxs = []
    aa_svg     = Amino_Acids_SVGs[row['aa_name']]

    adduct_pdb = row['adduct_pdb']
    try:
        size = get_proper_size(adduct_pdb)
    except:
        adduct_pdb = Chem.MolFromSmiles(row['adduct_smiles'])
        adduct_pdb = Chem.ReplaceSubstructs(adduct_pdb, Chem.MolFromSmarts('[At]'), Chem.MolFromSmiles(f'{row["aa_bond_atom_name"][0]}[At]'), replaceAll=True)[0]
        size = get_proper_size(adduct_pdb)
        print(_) 
    drawer = rdMolDraw2D.MolDraw2DSVG(*size)
    drawer.SetDrawOptions(op)
    At_idx = [a.GetIdx() for a in adduct_pdb.GetAtoms() if a.GetSymbol()=='At'][0]
    opts = drawer.drawOptions()
    opts.atomLabels[At_idx]=row['aa_name']
    drawer.DrawMolecule(adduct_pdb)
    drawer.FinishDrawing()
    adduct_svg = drawer.GetDrawingText().replace('xmlns=','this_is_garbage=')


    size = get_proper_size(binder_pdb)
    binder_pdb = rdMolDraw2D.PrepareMolForDrawing(binder_pdb)
    drawer = rdMolDraw2D.MolDraw2DSVG(*size)
    drawer.SetDrawOptions(op)
    if len(warhead_atom_idxs) != 1:
        drawer.DrawMolecule(binder_pdb)
    else:
        indi_op = drawer.drawOptions() 
        drawer.DrawMolecule(binder_pdb, highlightAtoms=warhead_atom_idxs[0],highlightBonds=[]) 

    drawer.FinishDrawing()
    binder_svg = drawer.GetDrawingText().replace('xmlns=','this_is_garbage=')

    reaction_svg = drawReactionInSVG(binder_svg, aa_svg, adduct_svg)
    with open(f"svg/{row['adduct_name']}.html", 'w') as fw:
        fw.write('<!DOCTYPE html><html lang="en-us" dir="ltr" ><body>\n\n')
        fw.write(reaction_svg)
        fw.write('\n\n</body></html>') 

4135
4136
4137
4138
4139
4140
