In [1]:
import pandas as pd
import molplotly
from rdkit import Chem
import plotly.express as px
from rdkit.Chem import rdChemReactions

df = pd.read_csv('hte_data.csv')

In [2]:
df['Conversion %'] = df['Conversion %'].fillna(0.0).astype(int)


def convert_inchi(inchi) -> str:
    smiles = None
    if isinstance(inchi, str):
        smiles = Chem.MolToSmiles(Chem.MolFromInchi(inchi))
    return smiles

vis_cols = [
 'M&Starting Material&smiles',
 'M&Starting Material 2&smiles',
 'M&Product&smiles']
smiles_cols = []
for column in df.columns:
    if 'inchi' in column:
        material_name = column.rsplit('&', 1)[0]
        material_smiles_column_name = '&'.join([material_name, 'smiles'])
        smiles_cols.append(material_smiles_column_name)
        df[material_smiles_column_name] = df[column].apply(lambda x: convert_inchi(x))
df.tail()

Unnamed: 0,SampleName,M&Catalyst&Name,M&Base&Name,M&Internal Standard&inchi,M&Starting Material&inchi,M&Starting Material 2&inchi,M&Product&inchi,M&Base&inchi,M&Solvent&inchi,M&Solvent 2&inchi,...,Conversion %,M&Internal Standard&smiles,M&Starting Material&smiles,M&Starting Material 2&smiles,M&Product&smiles,M&Base&smiles,M&Solvent&smiles,M&Solvent 2&smiles,M&Solvent 3&smiles,M&Catalyst&smiles
97,KAT69-6-E2-A1,,,InChI=1S/C8H10N4O2/c1-10-4-9-6-5(10)7(13)12(3)...,InChI=1S/C13H11Cl2NO/c1-8-3-10(7-17)9(2)16(8)1...,,,,InChI=1S/C4H9NO/c1-4(6)5(2)3/h1-3H3,InChI=1S/H2O/h1H2,...,0,Cn1c(=O)c2c(ncn2C)n(C)c1=O,Cc1cc(C=O)c(C)n1-c1cc(Cl)cc(Cl)c1,,,,CC(=O)N(C)C,O,C1COCCO1,
98,KAT69-6-E3-A1,,,InChI=1S/C8H10N4O2/c1-10-4-9-6-5(10)7(13)12(3)...,InChI=1S/C5H6ClN3O/c1-10-5-3(7)4(6)8-2-9-5/h2H...,,,,InChI=1S/C4H9NO/c1-4(6)5(2)3/h1-3H3,InChI=1S/H2O/h1H2,...,0,Cn1c(=O)c2c(ncn2C)n(C)c1=O,COc1ncnc(Cl)c1N,,,,CC(=O)N(C)C,O,C1COCCO1,
99,KAT69-6-E4-A1,,,InChI=1S/C8H10N4O2/c1-10-4-9-6-5(10)7(13)12(3)...,InChI=1S/C11H12BrNO/c1-8-7-9(12)4-5-10(8)13-6-...,,,,InChI=1S/C4H9NO/c1-4(6)5(2)3/h1-3H3,InChI=1S/H2O/h1H2,...,0,Cn1c(=O)c2c(ncn2C)n(C)c1=O,Cc1cc(Br)ccc1N1CCCC1=O,,,,CC(=O)N(C)C,O,C1COCCO1,
100,KAT69-6-E5-A1,,,InChI=1S/C8H10N4O2/c1-10-4-9-6-5(10)7(13)12(3)...,InChI=1S/C12H18BNO2/c1-9-6-7-10(8-14-9)13-15-1...,,,,InChI=1S/C4H9NO/c1-4(6)5(2)3/h1-3H3,InChI=1S/H2O/h1H2,...,0,Cn1c(=O)c2c(ncn2C)n(C)c1=O,Cc1ccc(B2OC(C)(C)C(C)(C)O2)cn1,,,,CC(=O)N(C)C,O,C1COCCO1,
101,KAT69-6-E6-A1,,,InChI=1S/C8H10N4O2/c1-10-4-9-6-5(10)7(13)12(3)...,InChI=1S/C11H19BN2O2/c1-8-9(7-14(6)13-8)12-15-...,,,,InChI=1S/C4H9NO/c1-4(6)5(2)3/h1-3H3,InChI=1S/H2O/h1H2,...,0,Cn1c(=O)c2c(ncn2C)n(C)c1=O,Cc1nn(C)cc1B1OC(C)(C)C(C)(C)O1,,,,CC(=O)N(C)C,O,C1COCCO1,


In [3]:
def create_reaction_smiles(row: pd.DataFrame) -> str:
    """ Generate a rdkit parseable reaction smiles from the provided reaction smiles rows

    Args:
        row: dataframe containing at least one row with 'Product' and others that are reactants

    Returns:
        reaction smiles string
    """
    reactants = []
    product = None
    for c in row.index:
        if 'Product' in c:
            product = row[c]
        else:
            if row[c]:
                reactants.append(row[c])
    if len(reactants) > 1:
        reaction = '.'.join(reactants)
    elif len(reactants) == 0:
        reaction = '>>'
    else:
        reaction = f'{reactants[0]}>>'
    if product:
        reaction = '>>'.join([reaction, product])
    return reaction

df['reaction_smiles'] = df[vis_cols].apply(lambda x: create_reaction_smiles(x), axis=1)
df.head()

Unnamed: 0,SampleName,M&Catalyst&Name,M&Base&Name,M&Internal Standard&inchi,M&Starting Material&inchi,M&Starting Material 2&inchi,M&Product&inchi,M&Base&inchi,M&Solvent&inchi,M&Solvent 2&inchi,...,M&Internal Standard&smiles,M&Starting Material&smiles,M&Starting Material 2&smiles,M&Product&smiles,M&Base&smiles,M&Solvent&smiles,M&Solvent 2&smiles,M&Solvent 3&smiles,M&Catalyst&smiles,reaction_smiles
0,KAT69-6-A1-A1,XPhos Pd G4,Na2CO3,InChI=1S/C8H10N4O2/c1-10-4-9-6-5(10)7(13)12(3)...,InChI=1S/C7H5BrN2/c8-6-2-1-4-10-5-3-9-7(6)10/h...,InChI=1S/C12H18BNO2/c1-9-6-7-10(8-14-9)13-15-1...,InChI=1S/C13H11N3/c1-10-4-5-11(9-15-10)12-3-2-...,"InChI=1S/CH2O3.2Na/c2-1(3)4;;/h(H2,2,3,4);;/q;...",InChI=1S/C4H8O2/c1-2-6-4-3-5-1/h1-4H2,InChI=1S/C4H9NO/c1-4(6)5(2)3/h1-3H3,...,Cn1c(=O)c2c(ncn2C)n(C)c1=O,Brc1cccn2ccnc12,Cc1ccc(B2OC(C)(C)C(C)(C)O2)cn1,Cc1ccc(-c2cccn3ccnc23)cn1,O=C([O-])[O-].[Na+].[Na+],C1COCCO1,CC(=O)N(C)C,O,CC(C)c1cc(C(C)C)c(-c2ccccc2P(C2CCCCC2)C2CCCCC2...,Brc1cccn2ccnc12.Cc1ccc(B2OC(C)(C)C(C)(C)O2)cn1...
1,KAT69-6-A2-A1,XPhos Pd G4,Na2CO3,InChI=1S/C8H10N4O2/c1-10-4-9-6-5(10)7(13)12(3)...,InChI=1S/C7H5BrN2/c8-6-2-1-4-10-5-3-9-7(6)10/h...,InChI=1S/C12H18BNO2/c1-9-6-7-10(8-14-9)13-15-1...,InChI=1S/C13H11N3/c1-10-4-5-11(9-15-10)12-3-2-...,"InChI=1S/CH2O3.2Na/c2-1(3)4;;/h(H2,2,3,4);;/q;...",InChI=1S/C4H8O2/c1-2-6-4-3-5-1/h1-4H2,InChI=1S/C4H9NO/c1-4(6)5(2)3/h1-3H3,...,Cn1c(=O)c2c(ncn2C)n(C)c1=O,Brc1cccn2ccnc12,Cc1ccc(B2OC(C)(C)C(C)(C)O2)cn1,Cc1ccc(-c2cccn3ccnc23)cn1,O=C([O-])[O-].[Na+].[Na+],C1COCCO1,CC(=O)N(C)C,O,CC(C)c1cc(C(C)C)c(-c2ccccc2P(C2CCCCC2)C2CCCCC2...,Brc1cccn2ccnc12.Cc1ccc(B2OC(C)(C)C(C)(C)O2)cn1...
2,KAT69-6-A3-A1,XPhos Pd G4,Na2CO3,InChI=1S/C8H10N4O2/c1-10-4-9-6-5(10)7(13)12(3)...,InChI=1S/C7H5BrN2/c8-6-2-1-4-10-5-3-9-7(6)10/h...,InChI=1S/C12H18BNO2/c1-9-6-7-10(8-14-9)13-15-1...,InChI=1S/C13H11N3/c1-10-4-5-11(9-15-10)12-3-2-...,"InChI=1S/CH2O3.2Na/c2-1(3)4;;/h(H2,2,3,4);;/q;...",InChI=1S/C4H8O2/c1-2-6-4-3-5-1/h1-4H2,InChI=1S/C4H9NO/c1-4(6)5(2)3/h1-3H3,...,Cn1c(=O)c2c(ncn2C)n(C)c1=O,Brc1cccn2ccnc12,Cc1ccc(B2OC(C)(C)C(C)(C)O2)cn1,Cc1ccc(-c2cccn3ccnc23)cn1,O=C([O-])[O-].[Na+].[Na+],C1COCCO1,CC(=O)N(C)C,O,CC(C)c1cc(C(C)C)c(-c2ccccc2P(C2CCCCC2)C2CCCCC2...,Brc1cccn2ccnc12.Cc1ccc(B2OC(C)(C)C(C)(C)O2)cn1...
3,KAT69-6-A4-A1,XPhos Pd G4,Na2CO3,InChI=1S/C8H10N4O2/c1-10-4-9-6-5(10)7(13)12(3)...,InChI=1S/C7H5BrN2/c8-6-2-1-4-10-5-3-9-7(6)10/h...,InChI=1S/C11H19BN2O2/c1-8-9(7-14(6)13-8)12-15-...,InChI=1S/C12H12N4/c1-9-11(8-15(2)14-9)10-4-3-6...,"InChI=1S/CH2O3.2Na/c2-1(3)4;;/h(H2,2,3,4);;/q;...",InChI=1S/C4H8O2/c1-2-6-4-3-5-1/h1-4H2,InChI=1S/C4H9NO/c1-4(6)5(2)3/h1-3H3,...,Cn1c(=O)c2c(ncn2C)n(C)c1=O,Brc1cccn2ccnc12,Cc1nn(C)cc1B1OC(C)(C)C(C)(C)O1,Cc1nn(C)cc1-c1cccn2ccnc12,O=C([O-])[O-].[Na+].[Na+],C1COCCO1,CC(=O)N(C)C,O,CC(C)c1cc(C(C)C)c(-c2ccccc2P(C2CCCCC2)C2CCCCC2...,Brc1cccn2ccnc12.Cc1nn(C)cc1B1OC(C)(C)C(C)(C)O1...
4,KAT69-6-A5-A1,XPhos Pd G4,Na2CO3,InChI=1S/C8H10N4O2/c1-10-4-9-6-5(10)7(13)12(3)...,InChI=1S/C7H5BrN2/c8-6-2-1-4-10-5-3-9-7(6)10/h...,InChI=1S/C11H19BN2O2/c1-8-9(7-14(6)13-8)12-15-...,InChI=1S/C12H12N4/c1-9-11(8-15(2)14-9)10-4-3-6...,"InChI=1S/CH2O3.2Na/c2-1(3)4;;/h(H2,2,3,4);;/q;...",InChI=1S/C4H8O2/c1-2-6-4-3-5-1/h1-4H2,InChI=1S/C4H9NO/c1-4(6)5(2)3/h1-3H3,...,Cn1c(=O)c2c(ncn2C)n(C)c1=O,Brc1cccn2ccnc12,Cc1nn(C)cc1B1OC(C)(C)C(C)(C)O1,Cc1nn(C)cc1-c1cccn2ccnc12,O=C([O-])[O-].[Na+].[Na+],C1COCCO1,CC(=O)N(C)C,O,CC(C)c1cc(C(C)C)c(-c2ccccc2P(C2CCCCC2)C2CCCCC2...,Brc1cccn2ccnc12.Cc1nn(C)cc1B1OC(C)(C)C(C)(C)O1...


In [4]:
def rows_and_columns(plate_format: int = 96) -> tuple[list, list]:
    """ Constructs a plate format and returns sets of columns and rows"""
    if plate_format == 96:
        end_label = 'H'
        end_num = 12+1
    elif plate_format == 384:
        end_label = 'P'
        end_num = 24+1
    else:
        raise ValueError(f'Not supported plate size of {plate_format}')
    rows = [chr(i) for i in range(ord('A'), ord(end_label) + 1)]
    columns = [str(i) for i in range(1, end_num)]
    return rows, columns

def batch(iterable, n=1):
    l = len(iterable)
    for ndx in range(0, l, n):
        yield iterable[ndx:min(ndx + n, l)]


rows, columns = rows_and_columns(plate_format=384)

lol = []
for x in batch(df['Conversion %'].astype(int).tolist(), len(columns)):
    lol.append(x)
lol

[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 22, 44, 39, 22, 43],
 [11,
  0,
  0,
  0,
  0,
  0,
  0,
  8,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  15,
  22,
  19,
  45,
  15,
  35],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 48, 42, 30, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 6, 4, 6, 13, 14, 51],
 [0, 0, 0, 0, 0, 0]]

In [5]:
def set_conversion_vis(x):
    if x >=10:
        return x
    elif 0<x<10:
        return 10
    else:
        return 0

z = lol[:-1]
y = rows[:len(z)]
x = [x for x in range(1,25)]
df['convis'] =  df['Conversion %'].apply(set_conversion_vis)

In [6]:
fig = px.scatter(y=df['Row'], x=df['Column'], color=df['convis'], height=700, width=800, color_continuous_scale='thermal')
fig.update_yaxes(autorange="reversed")
# fig.update_layout(autosize=False,
#                   width=800,
#                   height=500)

fig.update_layout(
    xaxis = dict(
        tickmode = 'array',
        tickvals = [x for x in range(1,25)],
        ticktext = [str(value) for value in range(1,25)]
    ),
    margin=dict(l=20, r=20, t=200, b=20),
    paper_bgcolor="LightSteelBlue",
)
fig.update_traces(marker=dict(size=20,
                              symbol='square',
                              line=dict(width=2,
                                        color='DarkSlateGrey')),
                  selector=dict(mode='markers'))
app_pca = molplotly.add_molecules(fig=fig,
                                  df=df,
                                  smiles_col='reaction_smiles',
                                  title_col='SampleName',
                                  caption_cols=["M&Catalyst&Name", "M&Base&Name", 'Conversion %'],
                                  color_col=['Conversion %'],
                                  show_coords=True,
                                  svg_height=200,
                                  svg_width=500,
                                  wrap=False,
                                  reaction=True,
                                  width=500)
app_pca.run_server(mode='inline', port=8008, height=1000) # will not render on jupyter server

Dash is running on http://127.0.0.1:8008/

