In [1]:
from __future__ import print_function
from rdkit import Chem
from rdkit.Chem import rdmolops
import numpy as np


__urls__ = 'https://raw.githubusercontent.com/wolfiex/DSMACC-testing/master/dsmacc/datatables/smiles_mined.csv'

import pandas as pd
import requests,io
s=requests.get(__urls__).content
df=pd.read_csv(io.StringIO(s.decode('utf-8')))

df.head()


Unnamed: 0.1,Unnamed: 0,name,smiles,inchi,synonyms
0,0,C918NO3,O=CCC(=O)C1CC(ON(=O)=O)C1(C)C,InChI=1S/C9H13NO5/c1-9(2)6(7(12)3-4-11)5-8(9)1...,
1,1,C134CO,OCCC(=O)C(=O)C(=O)C(C)(C)C(=O)CCC(=O)C,"InChI=1S/C13H18O6/c1-8(15)4-5-10(17)13(2,3)12(...",
2,2,NBUTACET,CCCCOC(=O)C,"InChI=1S/C6H12O2/c1-3-4-5-8-6(2)7/h3-5H2,1-2H3","Butyl acetate;N-BUTYL ACETATE;Acetic acid, but..."
3,3,C124O2,CCCCCCC(=O)C(O[O])CC(=O)CC,InChI=1S/C12H21O4/c1-3-5-6-7-8-11(14)12(16-15)...,
4,4,PROL1MPAN,CCC(C)(O)C(=O)OON(=O)=O,"InChI=1S/C5H9NO6/c1-3-5(2,8)4(7)11-12-6(9)10/h...",


In [2]:
'''
symbols = {}
for i in range(200): 
    try:
        a= Chem.rdchem.Atom(i).GetSymbol()
        symbols[a]=[]
    except:None
'''
from collections import Counter
included = {}
for smile in df.smiles.values:
    
    try:
        atoms = [str(i.GetSymbol()) for i in Chem.MolFromSmiles(smile).GetAtoms()]
        atoms = Counter(atoms)
        
        
        for i in atoms.keys():
            try:
                included[i] = max([included[i],atoms[i]])
            except:
                included[i] = atoms[i]
        
    except:None
#number or repetitions        
print(included)


{'C': 15, 'Cl': 4, 'O': 12, 'N': 3, 'S': 1, 'Br': 2}


In [3]:
def newgen(letter):  
    number = 0
    while True:
        if number>0:
            yield letter+str(number)
            number += 1
        else:
            yield letter
            number+=1
    

In [4]:

gen = dict(zip(included.keys(),[newgen(i) for i in included.keys()]))
headers = [ gen[i].next()  for i in included for j in range(included[i]) ]
print (headers)

['C', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14', 'Cl', 'Cl1', 'Cl2', 'Cl3', 'O', 'O1', 'O2', 'O3', 'O4', 'O5', 'O6', 'O7', 'O8', 'O9', 'O10', 'O11', 'N', 'N1', 'N2', 'S', 'Br', 'Br1']


In [5]:
def get_basic_matrix(smile,symmetric=True):
    global included
    
    gen = dict(zip(included.keys(),[newgen(i) for i in included.keys()]))
    
    mol = Chem.MolFromSmiles(smile)
    atoms = [gen[i.GetSymbol()].next() for i in mol.GetAtoms()]
    n=len(atoms)
    matrix = np.zeros((n,n))
    for b in mol.GetBonds():
        weight = 1
        if b.GetBondType() == Chem.rdchem.BondType.DOUBLE :
            weight=2
            
            
        matrix[b.GetBeginAtomIdx()][b.GetEndAtomIdx()]= weight
        
        if symmetric:
            matrix[b.GetEndAtomIdx()][b.GetBeginAtomIdx()]= weight
        
        
        
        
    return pd.DataFrame(matrix,columns=atoms,index=atoms,dtype='i4')


print(get_basic_matrix('C=CO'))

    C  C1  O
C   0   2  0
C1  2   0  1
O   0   1  0


In [16]:

def get_full_matrix(smile,flat=False):
    '''
    Flat = flattens the unsymmetric string of the graph adjacency matrix (As this is symmetric we dont need to supply the data twice)
    
    NB - alternatively rather than representing double edges with a 2, we may add a reverse edge?
    '''
    
    print(smile)
    
    global included,headers
    
    try:
        gen = dict(zip(included.keys(),[newgen(i) for i in included.keys()]))

        mol = Chem.MolFromSmiles(smile)
        atoms = [gen[i.GetSymbol()].next() for i in mol.GetAtoms()]

        n=sum(included.values())
        matrix = np.zeros((n,n))


        for b in mol.GetBonds():
            weight = 1
            if b.GetBondType() == Chem.rdchem.BondType.DOUBLE :
                weight=2


            matrix[headers.index(atoms[b.GetBeginAtomIdx()])][headers.index(atoms[b.GetEndAtomIdx()])]= weight



        if flat: 
            flat = []
            for i in range(n):
                flat.extend(matrix[i,i+1:])
            return flat



        else:return pd.DataFrame(matrix,columns=headers,index=headers,dtype='i4')
    except:return False



In [17]:
print(get_full_matrix('C=CO').head())

C=CO
    C  C1  C2  C3  C4  C5  C6  C7  C8  C9  ...  O8  O9  O10  O11  N  N1  N2  \
C   0   2   0   0   0   0   0   0   0   0  ...   0   0    0    0  0   0   0   
C1  0   0   0   0   0   0   0   0   0   0  ...   0   0    0    0  0   0   0   
C2  0   0   0   0   0   0   0   0   0   0  ...   0   0    0    0  0   0   0   
C3  0   0   0   0   0   0   0   0   0   0  ...   0   0    0    0  0   0   0   
C4  0   0   0   0   0   0   0   0   0   0  ...   0   0    0    0  0   0   0   

    S  Br  Br1  
C   0   0    0  
C1  0   0    0  
C2  0   0    0  
C3  0   0    0  
C4  0   0    0  

[5 rows x 37 columns]


In [18]:
print(get_full_matrix('C=CO',flat=1))

C=CO
[2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,

In [19]:
flat_list = dict(zip(df.name.values,[get_full_matrix(s,flat=1) for s in df.smiles.values]))

O=CCC(=O)C1CC(ON(=O)=O)C1(C)C
OCCC(=O)C(=O)C(=O)C(C)(C)C(=O)CCC(=O)C
CCCCOC(=O)C
CCCCCCC(=O)C(O[O])CC(=O)CC
CCC(C)(O)C(=O)OON(=O)=O
OCC1(OO)CC\C=C(\C)/CCC2C1CC2(C)C
OC(=O)C(C)(C)O
CC1=CC([O])(O)C2(OOC1(C)C2O)N(=O)=O
CCC(=CC(=O)O)C=O
CC(=CC(=O)CC)C1OC1(C)C=O
OOC1(O)C(=C(N(=O)=O)C2(OOC1(C)C2O)N(=O)=O)C
O=N(=O)OC(C)(C)C(C)(C)O
[O]OC(=O)CC1CC(C(=O)CO)C1(C)C
CC(=O)C(C)(C)[O]
CC([O])C(C)O
O=CC(CCC(=O)O)C(=C)C
OOCC(O)C(C)C
[O]OC1C=CC2(C)OOC1(CC)C2O
Cc1ccc(C)c(O)c1O
CCCC(C)(CC)ON(=O)=O
CCCCCCC(=O)C(O)CC(=O)CC
CCCC(=O)OCC=O
O=C1C=CC(=O)C(=C1)C
OC(C(=O)C)C([O])C1OC1(C)C(=O)C
O=CCC(=C)C1CC(C)(C)C1CCC(=O)O
nan
nan
OCC(OO)C(C)(C)O
O=N(=O)OC(C)C(C)(C)[O]
O=CC1OC1C(=O)O
CCCC([O])OC(=O)C(=O)C
[O]CC(O)C(C)C
OOC(=O)C(O)Cl
CCCCOC(=O)C(=O)C
O=CC=CC=O
CCC(C)(O)CC(=O)OON(=O)=O
CCC12OOC(CC)(C=C(C)C2(O)OO)C1O
CCCCC(O)C(=O)OON(=O)=O
CCCC(O)CC(=O)OON(=O)=O
OCCC(O)C(C)O[O]
CCC(=O)C(OO)C(O)C1OC1C=O
[O]Oc1c(CC)cc(C)cc1N(=O)=O
OOC(C(=O)C)C(O)C(=O)C
CCC(=O)C=C(C)C(O[O])C(=O)CC
OC1C=CC2(OOC1C2O)C(C)C
CCCC(=O)C
CC(=O)

OCC(O)C(=O)O[O]
CC(=O)OC(=O)CCOO
O=CCC(OO)CC(=O)C(=O)C
O=N(=O)OCc1cccc(C)c1
OCC(=C)C=O
[O]OCC(C)C(C)CO
O=N(=O)OCC(=CC(=O)OON(=O)=O)C
OC(CC=O)CO
CCc1cc(C)cc(c1)C(C)[O]
[O-][O+]=C(O)C(=C/C=C/C(=O)O)C
O=CCC(CC=O)C(C)(O)CO
CC(OO)(C=O)C(O)C(=O)CC
O=CC(=O)CC(=O)C1CC(C)(C)C1CCC(=O)C
CCC(=O)CC(=O)O
COC(=O)CCC(=O)OO
O=CC(=O)C=C(C(=O)CO)CC
CCC1=CC(=O)C(ON(=O)=O)C(C)([O])C1=O
[O]OCCc1ccccc1
[O]CCC(=O)C1CC(C)(C)C1CC=O
O=CCCC(=O)CC(=O)O
OOC(=O)C(C)(C)CO
CCC(=O)CCC(=O)O
[O]OC(=O)CC(C)C(C)O
O=CC(C)OC(=O)C(=O)C
CCC12OOC(N(=O)=O)(C2ON(=O)=O)C(O)(OO)C(=C1C)N(=O)=O
O=CCCC(=O)C(O)C=O
O=CCC(CC=O)C(C)(CO)O[O]
O=C(OON(=O)=O)Cc1ccccc1
OOC(C)c1cc(CC)cc(C)c1
CCC(O)COC(=O)C
OCC(C)C(C)CO
OOC(C=O)C(O)C1(C)OC1(C)C=O
CCCC(=O)C(C)ON(=O)=O
CC(=C)C1CCC(C)(O)C(=O)C1
OOCC(C)(C)OC=O
[O]OC1CC(=O)CC(=O)C1(C)ON(=O)=O
CCOC(=O)CO[O]
CCC(=O)C=CC1OC1(C)C(=O)O[O]
O=CCC(CO)C(C)(CO)O[N+](=O)[O-]
CCC(O)CC(=O)C=O
O=CC1OC1C(O)C(=O)C(=O)C(C)C
OOCC(=O)C(=C)CCC=O
CCCC(=CC(=O)C=O)C(=O)OO
CC(=O)C(=O)OCO[O]
CCC(=O)COO
[O-][O+]=CC(=O)OO
[O]O

OCC(OO)C(C)C(=O)C
CCCCCC(O)CC(=O)C(C)O[O]
[O]CC(=O)C=C(C(=O)CO)CC
CCCCCCC(=O)O[O]
[O-][O+]=C(C)C
OOC(=O)C(C)(O)C(O)CON(=O)=O
[O]OC(C=O)C(C)ON(=O)=O
OOC(C(=O)C)C(=O)C
[O]OCC=C(C)CON(=O)=O
CCC12C=CC(O)([O])C(C)(OO1)C2ON(=O)=O
CCc1cc(C)cc(O)c1O
[O]OC1C=CC2(C)OOC1(C)C2O
[O-][N+](=O)OOC(=O)CC(=C)C
O=CC(O)C(C)([O])C=O
CCC(=O)C(C)C(=O)OO
CCC12OOC(C(=CC2(O)OO)C)C1ON(=O)=O
CC([O])C(O)C=O
[O]OC(=O)CCC(C)(C)O
O=CCC(=O)C(=O)O
CC([O])(CO)C=C
O=CC1(C)OC1C(=O)O
[O]CCC(=C)C1CC(C)(C)C1CC=O
Cc1c(C)cccc1[O]
O=CCOC(C)(C)C
CCCCCO
O=N(=O)OC1C2OOC1(C=C(N(=O)=O)C2([O])O)N(=O)=O
CCC(=O)C(O)C(C)(O)C1OC1(CC)C=O
OCCOC(C)OO
[O]OC(C=C)CON(=O)=O
C=CC(O)CON(=O)=O
CCCC1OC(=O)C(ON(=O)=O)C1=O
CC(C=O)C(=O)CC
Cc1cc(N(=O)=O)c([O])c(C)c1C
[O]OC(C)c1ccccc1C
CC(OO)C(=O)CC(=O)C(=O)C
O=N(=O)OC(C)C(=O)C(C)[O]
[O]OC12CC(=O)C(C)(ON(=O)=O)C(C1)C2(C)C
CCCCCOO
CCCCCCC(=O)C(=O)C(OO)C(=O)CC
CC(=O)CC(C)([O])C(C)O
OCCC(C)C(C)OO
OOCC(=CCO)C
CC(CC(=O)OC=O)O[O]
OOC1(O)C(=CC2(C)OOC1(C)C2O)C
OC(OC(=O)C(=O)C)C(=O)C
O=N(=O)OC(C)C(C)CC(C)O
CCc1c

OCC(=O)C(C)(OO)C=O
Cc1cc(C)c(O)c(C)c1
OCc1ccccc1C
Cc1c(C)cccc1O
CCC(C)(O)OC(=O)C
OCC(=C)C(O)CO
CC1=C(C)C(=O)OC1
[O]OC(=O)CC(=O)C(=O)C
OCC(=C)C(CO)O[O]
CCC(C)[O]
[O]OCc1ccccc1C
ClC(=O)C(O)(Cl)Cl
CCC(=O)CC([O])C(C)O
OCC(OO)CC(=O)C(=O)CO
[O]OCc1ccc(C)c(C)c1
O=CC(C)(C)C(=O)O
OOC1CC(CCC1(C)O)C(=O)C
OOCC1CC(C(=O)C)C1(C)C
OCC(OO)C=O
CCC(=O)C=CC1(C)OC1C(=O)OON(=O)=O
CCC(=O)CC(C)[O]
[O]CC(=O)C(C)(C)C=O
OOC(=O)CC(=O)CC=O
[O-][O+]=C(COO)C
CCC(OO)C(C)O
O=CC(O)C(C)(ON(=O)=O)C(=O)O
CCCC(=O)C([O])CC(=O)CC
O=CC(ON(=O)=O)C(C)(C)O
OCC(C)C(O)ON(=O)=O
CCC12OOC(N(=O)=O)(C2O)C(O)(OO)C(=C1C)N(=O)=O
CC(O)(C[O])C=O
CCC12C=CC(ON(=O)=O)C(OO1)C2O
O=CCC(=O)C(OO)CC=O
OCCC(C)OO
CC(COO)C(=O)C
OCCC(C(=C)C)C(=O)OO
OCC1CC(OO)(C(=O)C)C1(C)C
OOC1C(=O)C=CC(=O)C1O
CCCC1=CC(=O)C([O])C(ON(=O)=O)C1=O
CC(=O)CO[O]
OOc1ccc(C)c(C)c1
OCC=C(C)CO
OOC(=O)C=C(C)C(=O)C=O
CC(=O)OON(=O)=O
CC(=O)CC(O)C(C)(C)O
Cc1cc(N(=O)=O)c(O)c(O)c1C
CC1=C(O)C([O])(O)C2(OOC1(C)C2O)N(=O)=O
CCC(=O)C=C(C)C1OC1(CC)C(=O)OO
[O]OC(CC(=O)C)C(C)(C)O
CCC(=O)C(=O)C(

[O]Oc1ccccc1
CCC1OC(=O)C(=O)C1O
OCC(=O)C(C)(O)CON(=O)=O
OC/C=C(\C[O])/C
CCCC(C)ON(=O)=O
[O]CC(CCC(=O)C)C(=C)C
CC(OO)(C=CC=O)C(=O)CC
OOC(C)(C)C1CC(=O)CCC1=O
CC(C)(O)C(C)(C)[O]
O=COCC(=O)C
OOCC(=O)CCC(CC=O)C(=C)C
CCC12OOC(C)(C(O)C(=C1)C)C2O
COC
CCC12OOC(N(=O)=O)(C(=C(N(=O)=O)C2(O)O[O])C)C1ON(=O)=O
[O]OCCCC(C)(O)CC
CC(OO)C(=O)CCC
OOC(C)(C)C(=O)C
CCC(=O)C(O[O])C(O)C(=O)C=O
OOCCC(=O)C1CC(C)(C)C1CCC(=O)O
O=N(=O)OC1CC2CC(C1(C)[O])C2(C)C
OCC1CC(O[O])(C(=O)O)C1(C)C
OOC(C)(C)CC(=O)C
CCc1cc(C)cc(c1)C(=O)OON(=O)=O
OOC1(O)C=C(C)C2(C)OOC1C2O
O=CC(O)C(C)(O[O])C=O
O=CC([O])C(C)C
OOC1C(=CC2(CC)OOC1(C)C2O)CC
O=N(=O)OC1C2(C)C=CC([O])(O)C1(C)OO2
CC(O)CCC(C)(C)O
OCC1CC(O)(C(=O)O)C1(C)C
O=Cc1ccccc1C
CCCCC(=O)C(=O)C(O[O])C(=O)CC
CCC(=O)C(=O)CO[O]
O=CC(C)(C)C(=O)CCC(=O)O
OCCCCO
[O]Oc1c(O)ccc(C)c1C
[O]OCC1(CCC2CC1C2(C)C)ON(=O)=O
CCCC(OO)OCCO
OOc1c(O)cccc1C(C)C
O=CCC(=O)C(=O)CC(=O)OON(=O)=O
nan
COC=O
O=CCCCCC=O
CCC(OO)CC(=O)CON(=O)=O
CCC(=CC(=O)C(O)C(=O)C)C(=O)O[O]
OOC1(O)C(=C(C)C2(C)OOC1(N(=O)=O)C2(C)O)N(=O)=O

CCCC12OOC(C2O)C(O)(O)C=C1
CCC12OOC(C(=CC2(O)O)C)C1O
CCC1OC(=O)C([O])C1ON(=O)=O
O=CC(=CC(=O)O)N(=O)=O
nan
CCC(=O)C=CC1(C)OC1C(=O)OO
O=CCC1C(CC1(C)C)C(=O)CCC(=O)O
O=N(=O)OC(C)C(C)[O]
OOC1C(=O)CC(=O)C(C=O)C1(C)C
O=CCC(CC(O[O])C(=O)C)C(=O)C
CC1=CCC(CC1O)C(C)(C)[O]
O=CCC(CC(O)C(=O)C)C(=O)C
OOCC(O)CCC=O
O=C1C=C(C(C)C)C(=O)C(O)C1O
OOS(=O)(=O)C
O=CC1(C)OC1C(=O)C
O=CCC(=O)C([O])CC=O
CCC(O[O])CC(=O)CON(=O)=O
CC(CO[O])CC
CCCC12OOC(C2O)C(O)(O[O])C=C1
CC1=CCC2CC1C2(C)C
[O]OC1CCCCC1
CC(=C)C[O]
O=N(=O)OOC(=O)C(C)C(C)C
OCC(C)C(C)C(=O)OON(=O)=O
CC(CCCC)OO
CCC(C)CC(=O)OON(=O)=O
O=C(OON(=O)=O)CC(C)C(C)O
O=N(=O)OOC(=O)CC(C)C(=O)C
OCCC(C)(C)C(=O)OON(=O)=O
OCC(C)(C)CC(=O)OON(=O)=O
CCC(C)(C)C(=O)OON(=O)=O
O=N(=O)OOC(=O)C(C)(C)C(=O)C
O=N(=O)OOC(=O)CC(C)(C)C
O=CCCC(=O)C(C=O)C(C)(C)O[O]
CCCC(=O)C=O
OCc1ccccc1
CCC(O)OC(=O)C(=O)C
CC(=O)OC(=O)C(=O)C[O]
CCC(O[O])OC(=O)C(=O)C
CCC(C)(OO)C(=O)C
[O]OCc1ccccc1
O=CCCC(=O)C(C=O)C(C)(C)O
O=N(=O)OC(C)(C)C(C)[O]
OCCC(C)(C)COO
O=CCC(=O)C(CC=O)C(C)(C)OO
OOC(=O)C1OC1C=O
O=CCC(C

CC(C)CCC(C)ON(=O)=O
OOCCCl
O=CCCC(=O)C1CC(C)(C)C1CC(OO)C(=O)C
CCC(O)CCC(CC)O[O]
OOC1(O)C2OOC(C)(C2O)C(=C1C)C
CCCCCCC=O
OCC([O])C(O)C=O
CC(O)(C=O)C(=O)OO
OOc1c(O)cc(C)c(C)c1C
O=C1C=C(C)C(=O)C(=C1)C
CCC1=C(C)C(=O)C(O)C(O)C1=O
nan
CCCC(=O)O[O]
OOC(C)(C)C(C)O
[O]OC1C(O)C(=O)C(=C(C)C1=O)CC
O=N(=O)OCC(O)C(C)(ON(=O)=O)C(=O)O
CC(C(=O)CO)C(=O)C
CCCCCC(O)CCC(=O)CC
O=CC(=O)C(C)(C)O
COC(C)(C)[O]
OCC([O])C(C)C
CC(=C)C(O)CO
OCCC(C)(C)O
CCOCC(=O)O[O]
CCC(CC)ON(=O)=O
OCCC(=O)C1(OO)CC(C)(C)C1CCC(=O)O
OCCCC(=O)OON(=O)=O
OCCC(O)C(=O)OON(=O)=O
CC(O)CC(=O)OON(=O)=O
OCC(C)C(=O)OON(=O)=O
O=N(=O)OOC(=O)C(C)(C)O
O=N(=O)OOC(=O)C(O)C(=O)C
OCC=CC(=O)OON(=O)=O
C=CC(O)C(=O)OON(=O)=O
O=N(=O)OCC=CC(=O)OON(=O)=O
O=CC1C(=O)CC(=O)C(O)C1(C)C
CCOCCON(=O)=O
OOCCCC(C)(C)O
CCC(=O)C(C)(C)OO
[O]OCCC(=O)CC=O
CC(OO)C=O
O=CCC([O])C(=O)C
OCCC(=C)C(=O)CC(C)(C)C(O)CCC(=O)O
[O]CC(=O)CC=O
[O-][O+]=CC(=O)CO
[O]CC(CCC(=O)C)C(=O)C
[O]OC(C)(CC(=O)C)C(=O)C
OCC(ON(=O)=O)C(C)(ON(=O)=O)C(=O)O
CC1OC(=O)C(C)([O])C1O
[O]CCC(O)CC
OOC(Cl)C(O)(Cl)C

[O]OC(=O)C(C)(C)O
CC1OC(=O)C(C)([O])C1(C)O
[O]OC(=O)c1ccccc1
OOCC(O)C(C)(CO)ON(=O)=O
CS(=O)C
OCCC(C(=C)C)C(OO)C=O
OCC(C)CC(C)ON(=O)=O
CCCC(O)CC
O=CCCC(CO)(OO)C1CC(C)(C)C1CC=O
[O]OC(=O)C(C)(C)OC
OOC(C=O)C(C)(CO)ON(=O)=O
O=N(=O)OCC(=O)C(C)[O]
O=N(=O)OOC(=O)C(ON(=O)=O)C(=O)C
[O-][O+]=CCC
O=CC1(C)OC1C(=CC(=O)C)C
[O]OCC(=O)C(=O)CO
[O-][O+]=CCC
OOCC(=O)CC(O)C(C)C
CCC([O])C=O
CCC([O])C(C)ON(=O)=O
[O]OCC=O
O=N(=O)OC1(C)C2(C)OOC1(C)C([O])(O)C(=C2N(=O)=O)O
CC(Cl)(Cl)Cl
OCCC(=C)C(=O)CC(C)(C)C(OO)CC=O
OCC(C)CC(C)O[O]
OC(=O)C(Cl)CCl
OOC(=O)CCC(C)O
CCCC(CC)O[O]
O=COC=CC(=[O+][O-])C
OOC(C=O)C=O
[O-][O+]=Cc1ccccc1
Oc1ccccc1O
O=CC(O)C(C)(O)C=O
CC(=O)CC(C)(O)C(C)O
CCCCCCCC(O)CC(=O)OO
OOC(=O)C(ON(=O)=O)C(C)C
[O]OC1CCCCC1O
OCC(C(=O)C)C(=O)O[O]
O=C1C=C(C(C)C)C(=O)C(O)C1[O]
OOC1C(=CC2(CC)OOC1(C)C2O)C
CC(OO)C(=O)CC(=O)C
[O]OC(C)(CC(=O)C)C(C)O
[O]OC(Br)CBr
[O]OC(=O)CC1CC(C(=O)C)C1(C)C
[O]OC(=O)CC(=O)CC(O)C(=O)C
O=N(=O)OC(C)(C)C1CC=C(C)C(O)C1
CC(=O)OC(C)(C)ON(=O)=O
Cc1cccc(c1)C(=O)O
CCC(=O)C(C)(C)O[O]
OCC(O)CC

CCCCCCC(O)CC(=O)O[O]
O=CCCC(O)CC(=O)O[O]
[O]OC(C)CC(=O)CO
[O]OCC(=O)CCC(CC=O)C(=C)C
O=CC(=O)Cl
[O]CC=O
OOC(=O)C(C)C
O=N(=O)OC(C)(C)C(C)O
OCCC(=O)C(C)(CO)OO
O=CC(O)C(O)C1(C)OC1(CC)C=O
O=N(=O)OCC(C)(O)C(=O)O
CCC12OOC(C)(C=C(C)C2(O)OO)C1ON(=O)=O
CC(=O)C=CC(=O)OO
OOC(C)(C(O)CON(=O)=O)C(=O)OON(=O)=O
CCCCCC(=O)CCC(=O)CC
CCCCC([O])CC(=O)CC
OOC(=O)C(=CC(=O)C(=O)C)CC
CC(O)CC(O)CON(=O)=O
OCC(C=O)C(=O)C
OCCC(O)C(C)O
CCC1([O])C(=O)C=C(C)C(=O)C1O
CCC(C)C(C)O[O]
Cc1ccccc1C
OOCOC(=O)OC=O
OCC(ON(=O)=O)C(=O)C
C=CC(C)(C)O
[O]OC(=O)CC=O
CCCC(O[O])OC(=O)C(C)O
O=N(=O)c1ccc(C)c(C)c1O
CC(OO)C(=O)C(=O)CC
[O]OC1(O)C(=C(C)C2(C)OOC1(N(=O)=O)C2(C)ON(=O)=O)N(=O)=O
nan
COC([O])C(C)O
O=CCCC(=O)C1CC(C)(C)C1CCC(=O)O
OCC(O)C(C)(ON(=O)=O)C(=O)O
CCOC(CO)ON(=O)=O
CCC(C)C(C)O
CCC=C
O=CCC(CCO[N+](=O)[O-])C(=O)C
CCC(=O)C(C)(C)[O]
[O]OC(=O)C(O)CC(C)O
CCC(C)(C)O
O=N(=O)OC(C(O)C(=O)C)C1OC1(C)C(=O)C
CCC12OOC(CC)(C2ON(=O)=O)C(O)(O)C(=C1)C
OOC(=O)CC(CCC(=O)O)C(=O)C
CC(=O)C(=O)CO[O]
CCCC(C)(C)O
CCC(OO)C(C)ON(=O)=O
OC1CCCCC1[O]
CCCC

OOC(=O)C(C)(C)C
OC(Cl)CON(=O)=O
[O]OC1CC2CC(C1(C)ON(=O)=O)C2(C)C
[O]OC(Cl)CON(=O)=O
OOC(=O)CCl
O=CCCC(C)(C)O
OOC(=O)CC(=O)C(=O)CC
OOC(=O)COC(=O)C(=O)C
OOC(=O)C1CC(C(=O)O)C1(C)C
OOC1(CON(=O)=O)CCC2CC1C2(C)C
OCC(CO)C(=O)C
O=CC=CC([O])C=O
OCCC[O]
OCCC(=O)C(=O)C=O
O=CC(CCC(=O)C)C(=C)C
CCCCCCCCC(CC)ON(=O)=O
O=CC=CC(O[O])C=O
O=CCCC(=C)C(=O)CC(C)(C)C(O)CCO
O=CC(=CC(=O)OON(=O)=O)C
CCCCCC(O)CC
OOC(CC=O)C(=O)C(=O)CO
CCCC(O[O])OC(=O)C
OCC(C)C(C)O[O]
CCCCCCCC(O)CC(=O)C(C)O[O]
CCCCCC(CC)O[O]
O=CCC1C(CC1(C)C)C(=C)CCC(=O)OO
OOC(Cl)(Cl)Cl
O=N(=O)OCC(O)C(C)(C)O
CC([O])(CO)C(=O)CCC(=O)C
[O-][N+](=O)OC1(C)CCC2C(CC2(C)C)C(=C)CCC1O
O=CCCC(=C)C(=O)CC(C)(C)C(O[O])CCO
O=COC(C)(C)C(=O)OON(=O)=O
O=CC=CC(O)C=O
CCC(=O)C=C(C)C(=O)C(=O)C
CCCCCCCC([O])CC(=O)CC
CCC(=O)C=CC(=O)O
CCCCCC[O]
OCC(O[O])CC(=O)CC
[O]OC(=O)CC(=O)C=O
OCC1=CCC(CC1)C(C)(C)OO
CCC(ON(=O)=O)c1ccccc1
CCC(O[O])CON(=O)=O
CCCCC
OCC(=C)C([O])CO
CCC(=O)C=C(C)C([O])C(=O)CC
CCCC(=O)CC(=O)O[O]
CC(=O)C=CC(C)([O])C(=O)C
CC(=O)C(O)C(=O)OO
O=CC(C)(O)C(C)([O])C(

[O]OCCCCC=O
[O]OC/C=C(/CO)C
O=N(=O)OCC(C)(C)[O]
CC(O)(CC(=O)C)CO
O=C1C=CC2(C)OOC1C2O
OOC(=O)CC(=O)C(C)(C)O
O=CC(=O)C=CC(=O)OON(=O)=O
OCC(C)(O[O])CC(=O)C
[O]CC(O)C(C)CO
[O]CC(C)(C)C(=O)CCC(=O)C
CC1=C(C)C(=O)C(C)([O])C(O)C1=O
CCC(=O)OO
CCc1cc(C)cc(C)c1[O]
OC(=O)CC(=O)OC(=O)C
[O]Oc1c(CC)cccc1N(=O)=O
CC(C)OC(C)[O]
CCC(=O)C(=O)C([O])C(=O)CC
CC(CC(=O)CC)OO
[O]OC1(O)C=C(C)C2OOC1(C)C2O
O=CCCC(=C)C1CC(C)(C)C1CCC(=O)[O]
O=CCCC(=C)C1CC(C)(C)C1CCC(=O)O[O]
[O]OC(C=O)CC(=O)C1CC(C)(C)C1CCC(=O)C
[O]CCC(CC=O)C(=C)C
Cc1cc(O)c([O])c(C)c1C
[O]CCC(=O)CC
OOC(=O)CCC(=O)C1CC(C)(C)C1CCC(=O)C
CC(=O)OC(C)(C)O
OOC(=O)C=C
OCCC(=C)C(=O)CC(C)(C)C(CCC(=O)C)O[N+](=O)[O-]
O=CCCC(=O)C(C=O)C(C)(C)OO
O=CC(=O)C(=CC(=O)C(=O)C)C
O=CC(=O)C(=C(C)C(=O)C(=O)C)C
O=CCC(CC([O])C(=O)C)C(=O)C
CCC(=O)CC(ON(=O)=O)C(O)CC
O=CCCC(CO)(O[N+](=O)[O-])C1CC(C)(C)C1CCC(=O)OC
CC1=C(C)C(=O)C(C)(O)C(O)C1=O
OOCC(C)C(C)C
[O]CC(=O)C=C
[O]OC1(C)C(O)C(=O)C(=C(C)C1=O)C
[O]OC(=O)CC(C)C
[O]OCC(O)CCC=O
O=CCC(C)C(=O)C
CC([O])C=O
[O]OCCON(=O)=O
OCCC(=O)C(C)(

[O]C1CC2CC(C1(C)O)C2(C)C
CCc1c(C)ccc(N(=O)=O)c1O
OOC1C=CC2OOC1C2O
O=CC(=O)C(=O)CON(=O)=O
CC1=CC([O])(O)C2OOC1(C)C2O
OCC([O])CC(=O)C(=O)CO
[O]OC1(C)C=CC2(CC)OOC1C2O
CCC1=CC(=O)C(O)C(C)(OO)C1=O
O=CCC(=O)C
CCC(O[O])C(=O)C
ClCC(Cl)(Cl)OO
O=CC(O)C(C)(OO)CON(=O)=O
CCC(O)C(=O)C
CCCCCC
C=CC(=C)C
O=CCC(CO)C(C)(CO)OO
O=C(OC(C)([O])C(=O)C)C(=O)C
O=N(=O)OC1C2OOC1(C(C)C)C([O])(O)C(=C2N(=O)=O)O
OOC(C(=O)O)C(C)(CO)ON(=O)=O
O=CCC(C)(C)C(=O)CCC(=O)OC
OOC(C)C(=O)C(=O)OC(=O)C
[O]OCC(C)O
O=CC(C)(ON(=O)=O)C(=O)OON(=O)=O
CC(CCO)C(=O)C
OOC1C(=O)C=C(C(C)C)C(=O)C1O
OOCCl
[O]OC1(O)C(=C(C)C2(OOC1(C)C2(C)ON(=O)=O)N(=O)=O)O
CC(=O)OCC(=O)C
CCCCCCCC(O)CC
CC(O[O])C(=O)C(=O)C
O=CCCC(=C)C(=O)C=O
[O]OC(=O)C(ON(=O)=O)C(C)C
CCC(O[O])C(C)O
CCC(C)([O])C(=O)C
OOCC(=O)CC(=O)C=O
OOC1(O)C(=C(N(=O)=O)C2OOC1(C)C2O)O
CCC12OOC(CC)(C2O)C([O])(O)C(=C1)C
OCC(O)C(C)(ON(=O)=O)C(=O)O[O]
O=CCC(CCO[N+](=O)[O-])C(=C)C
O=CCC1C(CC1(C)C)C(=O)CCC(=O)OO
O=CC1(C)OC1C(C)(O)C(O)C(=O)C
CCCCCCCC(CC)O[O]
[O]OC1(C)C(=O)OCC1(C)ON(=O)=O
CC(O[O])C(=O)C
[O

In [21]:
import json
with open('fingerprint.json', 'w') as outfile:
    json.dump(flat_list, outfile)