## OLED ML Worksheet

In [70]:
#Imports

import pandas as pd
import pickle
from rdkit import Chem
from rdkit.Chem import AllChem
import numpy as np

In [71]:
#Data Import & Remove Chromophores Containg Tin
smi = pd.read_csv('./data/OLED_dataset_CSV.csv', encoding='windows-1252') # load csv file (20236, 14)

target_atom = ['Sn']
Unnecessary_chromophores = []

smi["Molecules"] = smi["Chromophore"].apply(lambda x: Chem.MolFromSmiles(x)) #Add column of Molecular objects

for index, row in smi.iterrows():
    atoms = {atom.GetSymbol() for atom in row["Molecules"].GetAtoms()}
    if set(target_atom).intersection(atoms):
        Unnecessary_chromophores.append(row["Chromophore"])


filtered_smi = smi[~smi['Chromophore'].isin(Unnecessary_chromophores)]

In [49]:
#Create dictionary of SMILES: Morgan fingerprint

Mfp_Chrom = {}

for _, row in filtered_smi.iterrows():
    fp = AllChem.GetMorganFingerprintAsBitVect(row["Molecules"], 3, nBits=1024)
    nf = np.array(fp).tolist()
    Mfp_Chrom[row["Chromophore"]] = nf


In [51]:
#Save fingerprints of chromophores to .JSON file
import json
with open("./data/Mfp_chrom.json", "w") as json_file:
    json.dump(Mfp_Chrom, json_file)

In [54]:
#Load saved .JSON file
with open("./data/Mfp_chrom.json", "r") as json_file:
    loaded_MFP = json.load(json_file)
#Convert dict into correct format
for key, value in loaded_MFP.items():
    loaded_MFP[key] = np.array(value)

In [72]:
#Get Unique Solvents
filtered_smi_sol = filtered_smi[filtered_smi.Solvent != "gas"] #remove "gas" from solvents
filtered_smi_sol = np.array(filtered_smi_sol["Solvent"].drop_duplicates()) #remove duplicates from solvents

#Create dictionary of SMILES: Morgan Fingerprint (Solvents)

Mfp_Sol = {}

sol_molecules = [Chem.MolFromSmiles(smiles) for smiles in filtered_smi_sol] #generate array of solvent molecules


In [73]:
sol_molecules

[<rdkit.Chem.rdchem.Mol at 0x22e16ead9a0>,
 <rdkit.Chem.rdchem.Mol at 0x22e16eadc40>,
 <rdkit.Chem.rdchem.Mol at 0x22e16ead930>,
 <rdkit.Chem.rdchem.Mol at 0x22e16ead8c0>,
 <rdkit.Chem.rdchem.Mol at 0x22e16ead850>,
 <rdkit.Chem.rdchem.Mol at 0x22e16ead7e0>,
 <rdkit.Chem.rdchem.Mol at 0x22e16ead770>,
 <rdkit.Chem.rdchem.Mol at 0x22e16ead700>,
 <rdkit.Chem.rdchem.Mol at 0x22e16ead690>,
 <rdkit.Chem.rdchem.Mol at 0x22e16ead620>,
 <rdkit.Chem.rdchem.Mol at 0x22e16ead5b0>,
 <rdkit.Chem.rdchem.Mol at 0x22e16ead540>,
 <rdkit.Chem.rdchem.Mol at 0x22e16ead4d0>,
 <rdkit.Chem.rdchem.Mol at 0x22e16ead460>,
 <rdkit.Chem.rdchem.Mol at 0x22e16ead3f0>,
 <rdkit.Chem.rdchem.Mol at 0x22e16ead380>,
 <rdkit.Chem.rdchem.Mol at 0x22e16ead310>,
 <rdkit.Chem.rdchem.Mol at 0x22e16ead2a0>,
 <rdkit.Chem.rdchem.Mol at 0x22e16ead230>,
 <rdkit.Chem.rdchem.Mol at 0x22e16ead1c0>,
 <rdkit.Chem.rdchem.Mol at 0x22e16ead150>,
 <rdkit.Chem.rdchem.Mol at 0x22e16ead0e0>,
 <rdkit.Chem.rdchem.Mol at 0x22e16ead070>,
 <rdkit.Che