### Import

Import pandas for data manipulation, plotly for plotting, and molplot for visualising structures!

In [1]:
import pandas as pd
import plotly.express as px

import molplot


INFO:rdkit:Enabling RDKit 2021.09.3 jupyter extensions


Let's load the ESOL dataset from [ESOL: Estimating Aqueous Solubility Directly from Molecular Structure](https://doi.org/10.1021/ci034243x)

In [2]:
df_esol = pd.read_csv('esol.csv')
df_esol['y_pred'] = df_esol['ESOL predicted log solubility in mols per litre']
df_esol['y_true'] = df_esol['measured log solubility in mols per litre']


Let's make a plotly scatter plot comparing the measured vs predicted solubilities (and configure the plot parameters)

In [3]:
df_esol['delY'] = df_esol["y_pred"] - df_esol["y_true"]
fig = px.scatter(df_esol, x="y_true", y="y_pred",
                 color='delY')

# This adds a dotted line along y=x from x=0 to x=100
fig.add_shape(type='line',
              x0=min(df_esol['y_true']),
              y0=min(df_esol['y_true']),
              x1=max(df_esol['y_true']),
              y1=max(df_esol['y_true']),
              line=dict(color='black', dash='dash'),
              xref='x',
              yref='y'
              )

fig.update_layout(
    title='ESOL Regression (default plotly)',
    autosize=True,
    width=1200,  # size of figure
    height=800,
    xaxis=dict(title="Measured Solubility"),
    yaxis=dict(title="Predicted Solubility")
)

fig

now let's `add_molecules`!

In [7]:
fig.update_layout(title='ESOL Regression (with add_molecules!)')

app = molplot.add_molecules(
    fig=fig, df=df_esol, smiles_col='smiles', title_col='Compound ID', wrap=True, wraplen=25, width=150)

# change the arguments here to run the dash app on an external server and/or change the size of the app!
app.run_server(mode='inline', port=8011, height=1000)



The 'environ['werkzeug.server.shutdown']' function is deprecated and will be removed in Werkzeug 2.1.



In this example, we color the points discretely according to a threshold value of `y_pred` - this leads to two separate plotly "curves" so the condition for the color of the points needs to be passed in to the `add_molecules` function in order for the correct SMILES to be selected for visualisation

In [12]:
condition_name = 'y_pred > -3'
df_esol[condition_name] = df_esol["y_pred"] > -3

fig = px.scatter(df_esol, x="y_true", y="y_pred",
                 color=condition_name)

fig.update_layout(
    title='ESOL Regression (with add_molecules!)',
    autosize=True,
    width=1200, # size of figure
    height=800,
    xaxis=dict(title="Measured Solubility"),
    yaxis=dict(title="Predicted Solubility")
    )

app = molplot.add_molecules(fig=fig, df=df_esol, smiles_col='smiles', title_col='Compound ID', caption_cols=[
                            'Molecular Weight', 'Number of Rings'], condition_col=condition_name)

app.run_server(mode='inline', port=8009, height=1000)


In [13]:
condition_name = 'Number of Rings'
df_esol[condition_name] = df_esol[condition_name].astype(int)

fig = px.scatter(df_esol, x="y_true", y="y_pred",
                 color=condition_name)

fig.update_layout(
    title='ESOL Regression (with add_molecules!)',
    autosize=True,
    width=1200,  # size of figure
    height=800,
    xaxis=dict(title="Measured Solubility"),
    yaxis=dict(title="Predicted Solubility")
)

app = molplot.add_molecules(fig=fig, df=df_esol, smiles_col='smiles', title_col='Compound ID', caption_cols=[
                            'Molecular Weight', 'Number of Rings'], condition_col=condition_name)

app.run_server(mode='inline', port=8008, height=1000)



The 'environ['werkzeug.server.shutdown']' function is deprecated and will be removed in Werkzeug 2.1.

