# Imports and Data Loading

Import pandas for data manipulation, plotly for plotting, and molplot for visualising structures!

In [None]:
import pandas as pd
import plotly.express as px

import molplotly


Let's load the ESOL dataset from [ESOL: Estimating Aqueous Solubility Directly from Molecular Structure](https://doi.org/10.1021/ci034243x) - helpfully hosted by the [deepchem](https://github.com/deepchem/deepchem) team but also included as `example.csv` in the repo.

In [None]:
# df_esol = pd.read_csv('example.csv')
df_esol = pd.read_csv(
    'https://raw.githubusercontent.com/deepchem/deepchem/master/datasets/delaney-processed.csv')
df_esol['y_pred'] = df_esol['ESOL predicted log solubility in mols per litre']
df_esol['y_true'] = df_esol['measured log solubility in mols per litre']


In [None]:
# from io import BytesIO
# import os
# import base64
# import textwrap

# import numpy as np
# from rdkit import Chem
# from rdkit.Chem.Draw import rdMolDraw2D

# from jupyter_dash import JupyterDash

# import plotly.express as px
# from dash import dcc, html, Input, Output, no_update

# from html.parser import HTMLParser
# import copy
# from plotly.graph_objects import Figure

# import requests

# def test_add(
#     fig,
#     smiles_col="SMILES",
# ):
#     """
#     """
#     if isinstance(smiles_col, str):
#         smiles_col = [smiles_col]

#     if len(smiles_col) > 1:
#         menu = dcc.Dropdown(
#             options=smiles_col, value=smiles_col[0], multi=True, id="smiles-menu"
#         )
#         slider = dcc.Slider(
#             min=0,
#             max=len(smiles_col) - 1,
#             step=1,
#             marks={i: smiles_col[i] for i in range(len(smiles_col))},
#             value=0,
#             id="smiles-slider",
#         )
#     else:
#         menu = dcc.Store(id="smiles-slider", data=0)
#         slider = dcc.Store(id="smiles-slider", data=0)

#     fig.update_traces(hoverinfo="none", hovertemplate=None)
#     app = JupyterDash(__name__)
#     app.layout = html.Div(
#         [
#             html.Div([menu]),
#             # dcc.Graph(id="graph-basic-2", figure=fig, clear_on_unhover=True),
#             # dcc.Tooltip(
#             #     id="graph-tooltip", background_color=f"rgba(255,255,255,0.5)"
#             # ),
#             # slider,
#             # menu,
#         ]
#     )


#     return app


Let's make a scatter plot comparing the measured vs predicted solubilities using `molplotly`

In [None]:
# from dash import Dash, dcc, html, Input, Output

# app = JupyterDash(__name__)
# app.layout = html.Div([
#                 dcc.Dropdown(options=[
#                 {"label": "New York City", "value": "NYC"},
#                 {"label": "Montréal", "value": "MTL"},
#                 {"label": "San Francisco", "value": "SF"},
#                 ],
#                  value='NYC', 
#                  id='demo-dropdown'),
#                  html.Div(id='dd-output-container')
# ])


# @app.callback(
#     Output('dd-output-container', 'children'),
#     Input('demo-dropdown', 'value')
# )
# def update_output(value):
#     return f'You have selected {value}'


# app.run_server(mode='inline', port=8002, debug=True)

In [None]:
df_esol['delY'] = df_esol["y_pred"] - df_esol["y_true"]
df_esol['test'] = 'CO'
fig_scatter = px.scatter(df_esol,
                         x="y_true",
                         y="y_pred",
                         color='delY',
                         title='ESOL Regression (with add_molecules!)',
                         labels={'y_pred': 'Predicted Solubility',
                                 'y_true': 'Measured Solubility',
                                 'delY': 'ΔY'},
                         width=1200,
                         height=800)

# This adds a dashed line for what a perfect model _should_ predict
y = df_esol["y_true"].values
fig_scatter.add_shape(
    type="line", line=dict(dash='dash'),
    x0=y.min(), y0=y.min(),
    x1=y.max(), y1=y.max()
)

port = 10000
# app_scatter = test_add(fig=fig_scatter,
#                         smiles_col=['smiles','test'],
#                         )
app_scatter = molplotly.add_molecules(fig=fig_scatter,
                                      df=df_esol,
                                      smiles_col=['smiles','test'],
                                      title_col='Compound ID',
                                      port=port
                                      )


# change the arguments here to run the dash app on an external server and/or change the size of the app!
app_scatter.run_server(mode='inline', port=port, height=1000)