In [1]:
import os
import sys
import gains as genetic
import pandas as pd
import rdkit
import pickle
from math import exp
import numpy as np
import random
import unittest
import datetime
import sys
import sklearn
import salty

from rdkit.Chem import Draw
from rdkit import RDConfig
from rdkit.Chem import FragmentCatalog
from rdkit import RDConfig
from rdkit import DataStructs
from rdkit.Chem.Fingerprints import FingerprintMols
from rdkit import Chem
from rdkit.Chem import AllChem as Chem
from rdkit.Chem.Draw import ShowMol
from rdkit.Chem.Draw import SimilarityMaps
from rdkit.ML.Descriptors.MoleculeDescriptors import\
    MolecularDescriptorCalculator as calculator

%matplotlib inline

tableau20 = [(31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120),    
             (44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150),    
             (148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148),    
             (227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199),    
             (188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)]   

# Scale the RGB values to the [0, 1] range, which is the format matplotlib accepts.    
for i in range(len(tableau20)):    
    r, g, b = tableau20[i]    
    tableau20[i] = (r / 255., g / 255., b / 255.)  

In [29]:
def guess_password(target):
    startTime = datetime.datetime.now()

    def fnGetFitness(genes):
        return get_fitness(anion, genes, target)

    def fnDisplay(candidate, mutation):
        display(candidate, mutation, startTime)

    def fnShowIon(genes, target, mutation_attempts, sim_score, molecular_relative):
        show_ion(genes, target, mutation_attempts, sim_score, molecular_relative)

    optimalFitness = 0.99
    best = genetic.get_best(fnGetFitness,\
        optimalFitness, geneSet, fnDisplay,\
                    fnShowIon, target, parent_candidates)
    return best
    
def display(candidate, mutation, startTime):
    timeDiff = datetime.datetime.now() - startTime
    
def get_fitness(anion, genes, target):
    cation = Chem.MolFromSmiles(genes)
    model = genetic.load_data("density_model_3.sav", pickleFile=True)
    deslist = genetic.load_data("density_model_3_descriptors.csv")
    feature_vector=[]
    
    for item in deslist:

        if "anion" in item:
            with genetic.suppress_stdout_stderr():
                feature_vector.append(calculator([item.partition('-')\
                    [0]]).CalcDescriptors(anion)[0])
        elif "cation" in item:
            with genetic.suppress_stdout_stderr():
                feature_vector.append(calculator([item.partition('-')\
                [0]]).CalcDescriptors(cation)[0])          
        elif "Temperature, K" in item:
            feature_vector.append(298.15)
        elif "Pressure, kPa" in item:
            feature_vector.append(101.325)
        else:
            print("unknown descriptor in list: %s" % item)
    features_normalized = (feature_vector-deslist.iloc[0].values)/deslist.iloc[1].values
    prediction = exp(model.predict(np.array(features_normalized).reshape(1,-1))[0])
    error = abs((prediction-target)/target)
    
    return 1-error, prediction

def show_ion(genes, target, mutation_attempts, sim_score, molecular_relative):
    mol = Chem.MolFromSmiles(genes)
    fitness, mol_property = get_fitness(anion, genes, target)
    print("{}\t{}".format("number of atoms: ", mol.GetNumAtoms()))
    print("{}\t{}".format("mutation attempts: ", mutation_attempts))
    print("with density: \t\t{0:1.2f} (kg/m)".format(mol_property))
    print("similarity score:  {0:10.3f}".format(sim_score))
    print("{}\t{}\n".format("molecular relative: ", salty.check_name(molecular_relative)))

    
geneSet = genetic.generate_geneset()
df = genetic.load_data("cationInfo.csv")
parent_candidates = df['smiles'].unique()
df = genetic.load_data("anionInfo.csv")
df = df['smiles'].unique()
target = 1000
for i in range(1,2):
    salts = pd.read_csv("../../designer_molecules/pdb_files/model_3/40/salt_candidates.csv")
    cols = salts.columns
    while True:
        ohPickMe = random.sample(range(df.shape[0]),1)
        anion = Chem.MolFromSmiles(df[ohPickMe[0]])
        best = guess_password(target)
        Tanimoto_Similarity_Score, sim_index = genetic.molecular_similarity(best, parent_candidates)
        Cation_Heavy_Atoms = best.Mol.GetNumAtoms()
        if Cation_Heavy_Atoms < 20:
            if Tanimoto_Similarity_Score >= 0.4 and Tanimoto_Similarity_Score < 1:
                Salt_Smiles = best.Genes + "." + Chem.MolToSmiles(anion)
                if Salt_Smiles not in salts["Salt Smiles"]:
                    if i < 10:
                        CAT_ID = "C0%s" % i
                        AN_ID = "A0%s" % i
                    else:
                        CAT_ID = "C%s" % i
                        AN_ID = "A%s" % i
                    Salt_ID = CAT_ID + "_" + AN_ID
                    Molecular_Relative = salty.check_name(parent_candidates[sim_index])
                    Anion = salty.check_name(df[ohPickMe[0]])
                    new_entry = pd.DataFrame([[Salt_ID, Salt_Smiles, Cation_Heavy_Atoms,
                                               Tanimoto_Similarity_Score, Molecular_Relative, 
                                               Anion, target]], columns=cols[:-2])
                    cation = Chem.AddHs(best.Mol)
                    Chem.EmbedMolecule(cation, Chem.ETKDG())
                    Chem.UFFOptimizeMolecule(cation)
#                     rdkit.Chem.rdmolfiles.MolToPDBFile(cation, 
#                         "../../designer_molecules/pdb_files/model_3/40/%s.pdb" % CAT_ID)
                    anion = Chem.AddHs(anion)
                    Chem.EmbedMolecule(anion, Chem.ETKDG())
                    Chem.UFFOptimizeMolecule(anion)
#                     rdkit.Chem.rdmolfiles.MolToPDBFile(anion, 
#                         "../../designer_molecules/pdb_files/model_3/40/%s.pdb" % AN_ID)
                    new = pd.DataFrame(pd.concat([salts, new_entry]), columns=cols)

                    break
                else:
                    continue
            else:
                continue
        else:
            continue
#     pd.DataFrame.to_csv(new, path_or_buf='pdb_files/model_3/40/salt_candidates.csv',
#                         index=False)



number of atoms: 	32
mutation attempts: 	31
with density: 		1003.68 (kg/m)
similarity score:       0.612
molecular relative: 	1-isobutyl-1-methylpyrrolidinium

number of atoms: 	27
mutation attempts: 	28
with density: 		1009.64 (kg/m)
similarity score:       0.535
molecular relative: 	1-(6-hydroxyhexyl)imidazolium

number of atoms: 	40
mutation attempts: 	633
with density: 		1002.74 (kg/m)
similarity score:       0.539
molecular relative: 	1-isobutenyl-3-methylimidazolium

number of atoms: 	20
mutation attempts: 	43
with density: 		993.17 (kg/m)
similarity score:       0.638
molecular relative: 	3-butyl-1-methyl-1H-imidazolium

number of atoms: 	17
mutation attempts: 	3
with density: 		993.42 (kg/m)
similarity score:       0.889
molecular relative: 	tributyloctylphosphonium



In [4]:
out = pd.DataFrame()
for j in np.arange(40,100,10):
    df = pd.read_csv("../../designer_molecules/pdb_files/model_3/%s/salt_candidates.csv" % j)
    inner = pd.DataFrame()
    for i in range(1,25):
        if i < 10:
            CAT_ID = "C0%s" % i
            AN_ID = "A0%s" % i
        else:
            CAT_ID = "C%s" % i
            AN_ID = "A%s" % i
        Salt_ID = CAT_ID + "_" + AN_ID
        try:
            mean = np.round(pd.read_csv("pdb_files/model_3/%s/%s.dens" % (j, Salt_ID)).describe().loc["mean"][1])
            std = np.round(pd.read_csv("pdb_files/model_3/%s/%s.dens" % (j, Salt_ID)).describe().loc["std"][1])
            value = ("{}{}{}".format(mean," +/- ", std))
            true = df.loc[df["Salt ID"] == Salt_ID, "Model Density"]
            difference = (true - mean)/true*100
            df.loc[df["Salt ID"] == Salt_ID, "MD Density"] = value
            df.loc[df["Salt ID"] == Salt_ID, "% Difference"] = difference
        except:
            pass
    df.dropna(inplace=True)
    out = pd.concat([df, out], axis=0) 
out.reset_index(drop=True, inplace=True)
out

Unnamed: 0,Salt ID,Salt Smiles,Cation Heavy Atoms,Tanimoto Similarity Score,Molecular Relative,Anion,Model Density,MD Density,% Difference
0,C01_A01,CCCCCCCC[n+]1cc[nH]c1.COP(=O)([O-])OC,13,0.900000,1-butylimidazolium,dimethylphosphate,1000,1069.0 +/- 38.0,-6.9
1,C04_A04,Cn1cc[n+](CCCCCCCCCCP)c1.N#C[S-],17,0.937853,1-methyl-3-octyl-1H-imidazolium,thiocyanate,1000,1013.0 +/- 25.0,-1.3
2,C05_A05,CCCC(CC)CCCCn1cc[n+](CCC#N)c1.N#C[S-],19,0.940299,3-(2-cyanoethyl)-1-decyl-1H-imidazolium,thiocyanate,1000,992.0 +/- 39.0,0.8
3,C07_A07,Cn1cc[n+](CCCCCCCCO)c1.CCC(=O)[O-],15,0.972067,3-(6-hydroxyhexyl)-1-methylimidazolium,propionate,1000,1008.0 +/- 31.0,-0.8
4,C08_A08,CCCCn1c(C)cc(C)[n+]1-c1cccc(F)c1.NCCCCC(N)C(=O...,18,0.905455,"1-phenyl-2-butyl-3,5-dimethylpyrazolium",L-lysinate,1000,1059.0 +/- 36.0,-5.9
5,C09_A09,CCCCCCC(C)CC[n+]1cccc(C#N)c1.N#C[N-]C#N,18,0.929515,3-cyano-1-octylpyridinium,dicyanamide,1000,986.0 +/- 34.0,1.4
6,C11_A11,CCc1c(C)cc(C)c[n+]1CCCC(C)CC.CCO[PH](=O)[O-],17,0.975664,"1-hexyl-3,5-dimethylpyridinium",ethyl phosphonate,1000,996.0 +/- 32.0,0.4
7,C12_A12,CCCC[P+](C)(CCCC)CCC(C)N.CCC(=O)[O-],15,0.928571,(3-aminopropyl)tributylphosphonium,propionate,1000,913.0 +/- 25.0,8.7
8,C13_A13,CCCCCCC[N+]1(C)CCOCC1.CC(C)CC(N)C(=O)[O-],14,0.990909,4-methyl-4-octylmorpholin-4-ium,L-leucinate,1000,989.0 +/- 37.0,1.1
9,C16_A16,CCC[n+]1ccn(CC(C)C)c1.CC(C)CC(N)C(=O)[O-],12,0.963235,1-isobutyl-3-methylimidazolium,L-leucinate,1000,965.0 +/- 27.0,3.5


In [7]:
out.drop([23,58], inplace=True)

Unnamed: 0,Salt ID,Salt Smiles,Cation Heavy Atoms,Tanimoto Similarity Score,Molecular Relative,Anion,Model Density,MD Density,% Difference
0,C01_A01,CCCCCCCC[n+]1cc[nH]c1.COP(=O)([O-])OC,13,0.900000,1-butylimidazolium,dimethylphosphate,1000,1069.0 +/- 38.0,-6.9
1,C04_A04,Cn1cc[n+](CCCCCCCCCCP)c1.N#C[S-],17,0.937853,1-methyl-3-octyl-1H-imidazolium,thiocyanate,1000,1013.0 +/- 25.0,-1.3
2,C05_A05,CCCC(CC)CCCCn1cc[n+](CCC#N)c1.N#C[S-],19,0.940299,3-(2-cyanoethyl)-1-decyl-1H-imidazolium,thiocyanate,1000,992.0 +/- 39.0,0.8
3,C07_A07,Cn1cc[n+](CCCCCCCCO)c1.CCC(=O)[O-],15,0.972067,3-(6-hydroxyhexyl)-1-methylimidazolium,propionate,1000,1008.0 +/- 31.0,-0.8
4,C08_A08,CCCCn1c(C)cc(C)[n+]1-c1cccc(F)c1.NCCCCC(N)C(=O...,18,0.905455,"1-phenyl-2-butyl-3,5-dimethylpyrazolium",L-lysinate,1000,1059.0 +/- 36.0,-5.9
5,C09_A09,CCCCCCC(C)CC[n+]1cccc(C#N)c1.N#C[N-]C#N,18,0.929515,3-cyano-1-octylpyridinium,dicyanamide,1000,986.0 +/- 34.0,1.4
6,C11_A11,CCc1c(C)cc(C)c[n+]1CCCC(C)CC.CCO[PH](=O)[O-],17,0.975664,"1-hexyl-3,5-dimethylpyridinium",ethyl phosphonate,1000,996.0 +/- 32.0,0.4
7,C12_A12,CCCC[P+](C)(CCCC)CCC(C)N.CCC(=O)[O-],15,0.928571,(3-aminopropyl)tributylphosphonium,propionate,1000,913.0 +/- 25.0,8.7
8,C13_A13,CCCCCCC[N+]1(C)CCOCC1.CC(C)CC(N)C(=O)[O-],14,0.990909,4-methyl-4-octylmorpholin-4-ium,L-leucinate,1000,989.0 +/- 37.0,1.1
9,C16_A16,CCC[n+]1ccn(CC(C)C)c1.CC(C)CC(N)C(=O)[O-],12,0.963235,1-isobutyl-3-methylimidazolium,L-leucinate,1000,965.0 +/- 27.0,3.5


In [28]:
driver = webdriver.Chrome()
driver.get("http://www.chemspider.com/")
out.reset_index(inplace=True)
results=[]

In [43]:
###This cell checks that entries are not existent in the world
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time

for i in range(52,out.shape[0]):
    ion = out["Salt Smiles"][i].split(".")[0]
    search = driver.find_element_by_xpath("//*[@id=\"ctl00_ctl00_qs_query\"]")
    search.clear()
    search.send_keys(ion)
    submit = driver.find_element_by_xpath("//*[@id=\"ctl00_ctl00_search_btn\"]")
    submit.click()
    time.sleep(5)
    try:
        name = driver.find_element_by_xpath("//*[@id=\"ctl00_ctl00_ContentSection"\
                                            "_ContentPlaceHolder1_RecordViewDetails"\
                                            "_rptDetailsView_ctl00_WrapTitle\"]")
        results.append([ion, name.text])
        print(ion, name.text)
        time.sleep(3)
    except:
        results.append([ion, "new molecule"])
        print("{}\t{}".format("name not found for:",ion))

name not found for:	CCc1[nH+]c(N)cn1CC(C)CC
name not found for:	CCCC(=C(F)CF)n1cc[n+](C)c1CC
name not found for:	CNC(Cl)(CC(O)C(F)(F)F)[n+]1ccccc1
name not found for:	CCCCCC([n+]1ccn(C)c1C)S(=O)(=O)OC
name not found for:	CCCCc1c(C)c[n+](CCCC)n1C
name not found for:	CCCCC(CCCC)C([NH3+])CCC
name not found for:	CCC([PH3+])CCC(C)F
CCC([NH3+])(CC)C1CC1 3-Cyclopropyl-3-pentanamine
name not found for:	CCCCC(C)C([P+]P)C(C)CC
name not found for:	CCCCCCC(CC)C([NH3+])CCCC


In [55]:
b = pd.DataFrame(a)
b.to_csv("existance_results.csv",index=False)

In [53]:
results
a = np.array(results)
a[:,1]

array(['octylimidazolium', 'new molecule', 'new molecule',
       '1-(8-hydroxyoctyl)-3-methyl-imidazolium', 'new molecule',
       'new molecule', 'new molecule', 'new molecule', 'new molecule',
       'new molecule', 'new molecule', 'new molecule', 'new molecule',
       'new molecule', 'new molecule', 'new molecule', 'new molecule',
       'new molecule', 'new molecule', 'new molecule', 'new molecule',
       'new molecule', 'new molecule', 'new molecule', 'new molecule',
       'N-Ethyl-N-methyl-1-pentanamine', 'new molecule', 'new molecule',
       'new molecule', 'new molecule', 'new molecule', 'new molecule',
       'new molecule', 'new molecule', "2,2'-(Pentylimino)diethanol",
       'N,N-Dimethyl-1-pentanaminium', 'new molecule', 'new molecule',
       'new molecule', 'new molecule', 'new molecule', 'new molecule',
       'new molecule', 'new molecule', 'new molecule',
       'N-Allyl-N-ethyl-2-butanamine', '(2R)-2-Methyl-1-heptanamine',
       '3-Fluoro-1-pentanamine', 'new m

In [None]:
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression as lin
poop = np.abs(out["% Difference"].values)
with plt.style.context('seaborn-whitegrid'):
    fig=plt.figure(figsize=(5,5), dpi=300)
    ax = fig.add_subplot(111)
    ax.plot(out["Tanimoto Similarity Score"], poop, linestyle="", marker="*", label=None)
    #ax.plot([0,X0],[Y0,0], label="Best fit R-squared: 0.25")
    ax.set_xlim(0.4,1)
    ax.set_ylim(0,11)
    #ax.legend(bbox_to_anchor=(1,1))
    ax.set_ylabel("% Error")
    ax.set_xlabel("Similarity Score")
    ax.grid()

In [None]:
df = genetic.load_data("cationInfo.csv")
parent_candidates = df["smiles"].unique()
to_plot=[]
for i in range(out.shape[0]):
    smi = out.iloc[i]["Salt Smiles"].split(".")[0]
    mol = Chem.MolFromSmiles(smi)
    chro = genetic.Chromosome(smi, 0)
    score, index = molecular_simularity(chro, parent_candidates)
    score.append(out.iloc[i]["% Difference"])
    to_plot.append(score)

In [None]:
points = np.array(to_plot)
sim_scores = []
for i in range(points.shape[1]-1):
    sim_scores.append(points[:,i])

In [None]:
metrics = ["Tanimoto, 0.21", "Dice, 0.22", "Cosine, 0.21", "Sokal, 0.20", 
           "Kulczynski, 0.20", "McConnaughey, 0.20"]
# metrics = ["Tanimoto", "Dice", "Cosine", "Sokal", 
#            "Kulczynski", "McConnaughey"]

import matplotlib.pyplot as plt
with plt.style.context('seaborn-whitegrid'):
    fig=plt.figure(figsize=(5,5), dpi=300)
    ax = fig.add_subplot(111)
    for i in range(len(sim_scores)):
        X=sim_scores[i].reshape(-1,1)
        Y=np.abs(out["% Difference"]).values.reshape(-1,1)
        model = lin()
        model.fit(X,Y)
        if model.coef_ < 0:
            Y0=model.intercept_
            X0=abs(model.intercept_/model.coef_)
        else:
            print("pos corr")
        print(model.coef_, model.intercept_, model.score(X,Y))
        for j in range(len(sim_scores[0])):
        
            if j == 0:
                ax.plot(sim_scores[i][j], np.abs(out.iloc[j]["% Difference"]), 
                        linestyle="", marker="*", c=tableau20[i*2],
                       label=metrics[i])
                ax.plot([0,X0],[Y0,0])
    
            else:
                ax.plot(sim_scores[i][j], np.abs(out.iloc[j]["% Difference"]), 
                        linestyle="", marker="*", c=tableau20[i*2])
    ax.set_xlim(0.2,1)
    ax.set_ylim(0,11)
    ax.set_ylabel("% Error")
    ax.set_xlabel("Similarity Score")
    ax.legend(bbox_to_anchor=(1,1))
    
    ax.grid()
#fig.savefig(filename='poster_images/NN_Test_Dataset_All_Salts.eps', 
#                        bbox_inches='tight', format='eps') 

In [None]:
# salts = salty.load_data("salts_with_smiles.csv")
# new_df = pd.concat([salts["name-cation"], salts["name-anion"], salts["Temperature, K"],\
#                     salts["Pressure, kPa"], salts["Specific density, kg/m<SUP>3</SUP>"]],\
#                    axis = 1)
# cationDescriptors = salty.load_data("cationDescriptors.csv")
# cationDescriptors.columns = [str(col) + '-cation' for col in cationDescriptors.columns]
# anionDescriptors = salty.load_data("anionDescriptors.csv")
# anionDescriptors.columns = [str(col) + '-anion' for col in anionDescriptors.columns]
# new_df = pd.merge(cationDescriptors, new_df, on="name-cation", how="right")
# new_df = pd.merge(anionDescriptors, new_df, on="name-anion", how="right")
# new_df.dropna(inplace=True) 

df = genetic.load_data("cationInfo.csv")
parent_candidates = df["smiles"].unique()

mol = Chem.MolFromPDBFile("pdb_files/model_3/90/C02.pdb")
Draw.MolToMPL(mol)
smi = Chem.MolToSmiles(mol)
print(smi)
best = genetic.Chromosome(smi, 0)
score, index = molecular_simularity(best, parent_candidates)
# print(parent_candidates[index])
print(salty.check_name(parent_candidates[index[0]]))
print(np.round(score, decimals=2))
print(np.round(np.average(score), decimals=3))
print(index)

print("{}\t{}".format("cation heavy atoms: ", mol.GetNumAtoms()))
mol = Chem.MolFromPDBFile("pdb_files/model_3/90/A02.pdb")
smi = Chem.MolToSmiles(mol)
print("{}\t{}".format("anion heavy atoms: ", mol.GetNumAtoms()))
print(smi)
# print(salty.check_name(smi))
Draw.MolToMPL(mol)

# smi = salty.check_name("methyl sulfate")
# print(smi)
# mol = Chem.MolFromSmiles(smi)
# Draw.MolToMPL(mol)

In [None]:
parent_candidates[129]

In [None]:
cation = Chem.MolFromSmiles(salty.check_name("1-butyl-1-methylpyrrolidinium"))
cation = Chem.AddHs(cation)
Chem.EmbedMolecule(cation, Chem.ETKDG())
Chem.UFFOptimizeMolecule(cation)
rdkit.Chem.rdmolfiles.MolToPDBFile(cation, "pdb_files/prototype/BMP")


anion = Chem.MolFromSmiles(salty.check_name("bis[(trifluoromethyl)sulfonyl]imide"))
anion = Chem.AddHs(anion)
Chem.EmbedMolecule(anion, Chem.ETKDG())
Chem.UFFOptimizeMolecule(anion)
rdkit.Chem.rdmolfiles.MolToPDBFile(anion, "pdb_files/prototype/TF2")

In [None]:
def guess_password(target):
    startTime = datetime.datetime.now()

    def fnGetFitness(genes):
        return get_fitness(anion, genes, target)

    def fnDisplay(candidate, mutation):
        display(candidate, mutation, startTime)

    def fnShowIon(genes, target, mutation_attempts):
        show_ion(genes, target, mutation_attempts)

    optimalFitness = 0.99
    best = genetic.get_best(fnGetFitness,\
        optimalFitness, geneSet, fnDisplay,\
                    fnShowIon, target, parent_candidates)
    return best
    
def display(candidate, mutation, startTime):
    timeDiff = datetime.datetime.now() - startTime
    print("{}\t{}\t{}".format(
    candidate.Genes, candidate.Fitness, mutation))
    
def get_fitness(anion, genes, target):
    cation = Chem.MolFromSmiles(genes)
    model = genetic.load_data("density_nn_model.sav", pickleFile=True)
    deslist = genetic.load_data("density_nn_model_descriptors.csv")
    feature_vector=[]
    
    for item in deslist:

        if "anion" in item:
            with genetic.suppress_stdout_stderr():
                feature_vector.append(calculator([item.partition('-')\
                    [0]]).CalcDescriptors(anion)[0])
        elif "cation" in item:
            with genetic.suppress_stdout_stderr():
                feature_vector.append(calculator([item.partition('-')\
                [0]]).CalcDescriptors(cation)[0])          
        elif "Temperature_K" in item:
            feature_vector.append(298.15)
        elif "Pressure_kPa" in item:
            feature_vector.append(101.325)
        else:
            print("unknown descriptor in list: %s" % item)
    features_normalized = (feature_vector-deslist.iloc[0].values)/deslist.iloc[1].values
    prediction = exp(model.predict(np.array(features_normalized).reshape(1,-1))[0])
    error = abs((prediction-target)/target)
    
    return 1-error

def show_ion(genes, target, mutation_attempts):
    print("{}\t{}".format("mutation attempts: ", mutation_attempts))
    print("within 1%% of target density: %s (kg/m) " % target)
    

geneSet = genetic.generate_geneset()

df = genetic.load_data("cationInfo.csv")
# df = df.loc[df["name"].str.contains("imid", case=False)]
# df = df.loc[~df["name"].str.contains("phenyl", case=False)]
# df = df.loc[~df["name"].str.contains("benzyl", case=False)]
# df = df.loc[~df["name"].str.contains("azido", case=False)]
# df = df.loc[~df["name"].str.contains("cyan", case=False)]
# df = df.loc[~df["name"].str.contains("benz", case=False)]
# df = df.loc[~df["name"].str.contains("cyclo", case=False)]
# df = df.loc[~df["name"].str.contains("sulf", case=False)]
# df = df.loc[~df["name"].str.contains("azepinium", case=False)]
parent_candidates = df['smiles'].unique()


df = genetic.load_data("anionInfo.csv")
df = df['smiles'].unique()
ohPickMe = random.sample(range(df.shape[0]),1)
anion = Chem.MolFromSmiles(df[ohPickMe[0]])
print(df[ohPickMe[0]])
img = Draw.MolToMPL(anion)
target = random.sample(range(800,1500),1)[0]

print(target)
best = guess_password(target)
img = Draw.MolToMPL(best.Mol)

In [None]:
smiles = "CCCCCCc1c(SC)[n+](C)c(S)n1CCCPC"
mol = Chem.MolFromSmiles(smiles)
mol = Chem.AddHs(mol)
Chem.EmbedMolecule(mol, Chem.ETKDG())
Chem.UFFOptimizeMolecule(mol)
a = Draw.MolToMPL(mol)
rdkit.Chem.rdmolfiles.MolToPDBFile(mol, "test", confId=-2)

In [None]:
def guess_password(target):
    startTime = datetime.datetime.now()

    def fnGetFitness(genes):
        return get_fitness(genes, target)

    def fnDisplay(candidate, mutation):
        display(candidate, mutation, startTime)

    def fnShowIon(genes, target, mutation_attempts):
        show_ion(genes, target, mutation_attempts)

    optimalFitness = get_fitness(target, target)
    best = genetic.get_best(fnGetFitness,\
        optimalFitness, geneSet, fnDisplay,\
                    fnShowIon, target, parent_candidates)
    
def display(candidate, mutation, startTime):
    timeDiff = datetime.datetime.now() - startTime
    print("{}\t{}\t{}\t{}".format(
    candidate.Genes, candidate.Fitness, mutation, timeDiff))
    
def get_fitness(genes, target):
    ms = [Chem.MolFromSmiles(target), Chem.MolFromSmiles(genes)]
    fps = [FingerprintMols.FingerprintMol(x) for x in ms]
    return DataStructs.FingerprintSimilarity(fps[0],fps[1])

def show_ion(genes, target, mutation_attempts):
    mol = Chem.MolFromSmiles(target)
    print("{}\t{}".format("number of atoms: ", mol.GetNumAtoms()))
    print("{}\t{}".format("mutation attempts: ", mutation_attempts))
    

geneSet = genetic.generate_geneset()

df = genetic.load_data("cationInfo.csv")
df = df.loc[df["name"].str.contains("imid", case=False)]
df = df.loc[~df["name"].str.contains("phenyl", case=False)]
df = df.loc[~df["name"].str.contains("benzyl", case=False)]
df = df.loc[~df["name"].str.contains("azido", case=False)]
df = df.loc[~df["name"].str.contains("cyan", case=False)]
df = df.loc[~df["name"].str.contains("benz", case=False)]
df = df.loc[~df["name"].str.contains("cyclo", case=False)]
df = df.loc[~df["name"].str.contains("sulf", case=False)]
df = df.loc[~df["name"].str.contains("azepinium", case=False)]
parent_candidates = df['smiles'].unique()

df = parent_candidates
ohPickMe = random.sample(range(df.shape[0]),1)
target = df[ohPickMe[0]]
guess_password(target)
img = Draw.MolToMPL(Chem.MolFromSmiles(target))

In [None]:
from rdkit.Chem import MolFromSmiles as mol
from rdkit.Chem.Draw import MolToMPL as img
a = img(mol(target))

In [None]:
geneSet.GetEntryDescription(random.sample(range(geneSet.GetNumEntries()), 1)[0])

In [None]:
Chromosome("C1[n+]ccn1",0)

In [None]:
Chromosome(geneSet.GetEntryDescription(
    random.sample(range(geneSet.GetNumEntries()), 1)[0]), 0)

In [None]:
atoms = [6, 7]
fName = os.path.join(RDConfig.RDDataDir, 'FunctionalGroups.txt')
rdkitFrags = FragmentCatalog.FragCatParams(1, 5, fName)
customFrags = FragmentCatalog.FragCatalog(rdkitFrags)
fcgen = FragmentCatalog.FragCatGenerator()
m = Chem.MolFromSmiles(check_name("1-benzyl-3-methyl-1H-imidazol-3-ium"))
fcgen.AddFragsFromMol(m, customFrags)

geneSet = customFrags
newGene = Chromosome(geneSet.GetEntryDescription(
    random.sample(range(geneSet.GetNumEntries()), 1)[0]), 0)
oldGene = oldGene + newGene.Mol.GetNumAtoms()
combined = Chem.EditableMol(Chem.CombineMols(newGene.Mol,
                            childGenes.Mol))
combined.AddBond(0, oldGene, order=Chem.rdchem.BondType.SINGLE)
childGenes = combined.GetMol()

childGenes = Chromosome(Chem.MolToSmiles(childGenes), 0)

In [None]:
df = load_data("cationInfo.csv")
df = df.loc[df["name"].str.contains("imid", case=False)]
df = df.loc[~df["name"].str.contains("phenyl", case=False)]
df = df.loc[~df["name"].str.contains("benzyl", case=False)]
df = df.loc[~df["name"].str.contains("azido", case=False)]
df = df.loc[~df["name"].str.contains("cyan", case=False)]
df = df.loc[~df["name"].str.contains("benz", case=False)]
df = df.loc[~df["name"].str.contains("cyclo", case=False)]
df = df.loc[~df["name"].str.contains("sulf", case=False)]
df = df.loc[~df["name"].str.contains("azepinium", case=False)]
df = df['smiles'].unique()
for i in range(df.shape[0]):
    Draw.MolToMPL(Chem.MolFromSmiles(df[i]))

In [None]:
from salty import check_name
df = load_data("cationInfo.csv")
df = df.loc[df["name"].str.contains("imid", case=False)]
df = df.loc[~df["name"].str.contains("phenyl", case=False)]
df = df.loc[~df["name"].str.contains("benzyl", case=False)]
df = df.loc[~df["name"].str.contains("azido", case=False)]
df = df.loc[~df["name"].str.contains("cyan", case=False)]
df = df.loc[~df["name"].str.contains("benz", case=False)]
df = df.loc[~df["name"].str.contains("cyclo", case=False)]
df = df.loc[~df["name"].str.contains("sulf", case=False)]

df = df['smiles'].unique()
df


In [None]:
def remove_custom_fragment(childGenes, GeneSet, oldGene):
    geneSet = GeneSet.CustomFrags
    newGene = Chromosome(geneSet.GetEntryDescription(
        random.sample(range(geneSet.GetNumEntries()), 1)[0]), 0)
    try:
        truncate = Chem.DeleteSubstructs(childGenes.Mol, newGene.Mol)
        childGenes = truncate
        childGenes = Chromosome(Chem.MolToSmiles(childGenes), 0)
        return childGenes
    except BaseException:
        return 0
img = Draw.MolToMPL(Chem.MolFromSmiles("CNN(CCCCCN1C=C[NH+]=C1)CCC(F)CCCCCCN=O"))

In [None]:
import salty
salty.check_name("c1cc(cc(c1)S(=O)(=O)[O-])C(=O)O")

In [None]:
geneSet = genetic.generate_geneset()
df = genetic.load_data("saltInfo.csv")
df = df.loc[df["cation_name"].str.contains("imid", case=False)]
df = df['cation_SMILES'].unique()
ohPickMe = random.sample(range(df.shape[0]),2)
target = Chem.MolFromSmiles(df[ohPickMe[0]])
candidate = Chem.MolFromSmiles(df[ohPickMe[1]])
%matplotlib inline
SimilarityMaps.GetSimilarityMapForFingerprint(candidate,\
        target,SimilarityMaps.GetMorganFingerprint)