In [None]:
!pip install git+https://github.com/samoturk/mol2vec
from pathlib import Path
from tempfile import NamedTemporaryFile
import fileinput
import os
import rdkit
import pandas as pd
import numpy as np
import mol2vec
from mol2vec import features
from mol2vec import helpers
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsoleimport pkg_resources
pkg_resources.require("gensim==3.8.3")  
import gensim
from gensim.models import word2vec
from mol2vec.features import mol2alt_sentence, mol2sentence, MolSentence, DfVec, sentences2vec
from mol2vec.helpers import depict_identifier, plot_2D_vectors, IdentifierTable, mol_to_svg
import seaborn as sns
import matplotlib.pyplot as plt
import gensim.downloader
from rdkit import RDLogger   
RDLogger.DisableLog('rdApp.*') # turn off RDKit warning message 

In [None]:
!pip install umap-learn
import umap.umap_ as umap

In [None]:
'''
Function that returns 2D and 3D UMAP projections of inputted vector set

Input: SMILES strings and corresponding feature vectors
Returns: 2D and 3D UMAP projections of the inputted vectors. Also saves the UMAP projection information
in the dataframes umap2D.csv and umap3D.csv

'''

def run_umap(smiles, vectors):

    umap_2d = umap.UMAP(n_components=2, init='random', random_state=0)
    umap_3d = umap.UMAP(n_components=3, init='random', random_state=0)

    proj_2d = umap_2d.fit_transform(xList300)
    proj_3d = umap_3d.fit_transform(vectors)
    
    
    proj2d_dim1 = [i[0] for i in proj_2d]
    proj2d_dim2 = [i[1] for i in proj_2d]
    
    proj3d_dim1 = [i[0] for i in proj_3d]
    proj3d_dim2 = [i[1] for i in proj_3d]
    proj3d_dim3 = [i[2] for i in proj_3d]
    
    umap2D = pd.DataFrame()
    umap3D = pd.DataFrame()
   
    umap2D['smiles'] = smiles
    umap2D['dim1'] = proj2d_dim1
    umap2D['dim2'] = proj2d_dim2
    
    umap3D['smiles'] = smiles
    umap3D['dim1'] = proj3d_dim1
    umap3D['dim2'] = proj3d_dim2
    umap3D['dim3'] = proj3d_dim3
    
    savePath2D = os.path.join(os.getcwd(), 'all_files/Updated_Smiles/umap2D.csv')
    umap2D.to_csv(savePath2D)
  
    savePath3D = os.path.join(os.getcwd(), 'all_files/Updated_Smiles/umap3D.csv')
    umap3D.to_csv(savePath3D)
    
    return proj_2d, proj_3d
    

In [None]:
!pip install molplotly

In [None]:
umapPath = os.path.join(os.getcwd(), 'all_files/Updated_Smiles/detectionDataset.csv')
umapUpload = pd.read_csv(fullPath)

In [None]:
#Plotting UMAP projection, points are colored by magnitude of prediction error

import plotly.express as px
import molplotly


fig_pca = px.scatter(umapUpload,
                     x="UMAP Dimension 1",
                     y="UMAP Dimension 2",
                     color='BR Prediction Error',
                     title='Magnitude of Prediction Error',
                     labels={'BR Prediction Error': 'BR Prediction Error'})

app_pca = molplotly.add_molecules(fig=fig_pca,
                                  df=umapUpload.rename(columns={'BR Prediction Error': 'BR Prediction Error'}),
                                  smiles_col='smiles',
                                  caption_cols=['BR Prediction Error'],
                                  color_col='BR Prediction Error',
                                  show_coords=True)

app_pca.run_server(mode='inline', port=8707, height=850)

In [None]:
#Plotting UMAP projection, points are colored by magnitude of prediction uncertainty


fig_pca = px.scatter(umapUpload,
                     x="UMAP Dimension 1",
                     y="UMAP Dimension 2",
                     color='sd',
                     title='Magnitude of Standard Deviation',
                     labels={'sd': 'Standard Deviation'})

app_pca = molplotly.add_molecules(fig=fig_pca,
                                  df=umapUpload.rename(columns={'sd': 'Standard Deviation'}),
                                  smiles_col='smiles',
                                  caption_cols=['Standard Deviation'],
                                  color_col='Standard Deviation',
                                  show_coords=True)

app_pca.run_server(mode='inline', port=8707, height=850)