In [1]:
import os
import json
import numpy as np
import re
import math
from IPython.display import Image

import Rmsd
import PrintMol2
import CompareStructures
import BuildTargetComplex
import ReadStereoisomerList
import FormulaHandling
from Isomers import Isomers
import CsdExtraction

In [5]:
_, _, filenames_csd = next(os.walk('G:\\!CSD-database'))
_, _, filenames_sparkle = next(os.walk('dados_sparkle'))
filenames_sparkle = [x.split('.')[0] for x in filenames_sparkle]
csd_codes = [x.split('.')[0] for x in filenames_csd]
print('union: ', len(set(csd_codes) & set(filenames_sparkle)), ' sparkle only: ', len(set(filenames_sparkle)))

union:  786  sparkle only:  808


In [6]:
def isbidentate(chelates):
    for chel in chelates:
        if len(chel) > 2:
            return False
    return True

coord10 = {}
for file in filenames_sparkle:
    fileMol2Name = os.path.join('G:\\!CSD-database', file + '.search1.mol2')
    try:
        name, lcoords, colors, composition, chelates = CsdExtraction.refineCoordinationPolyhedron(fileMol2Name)
    except:
        continue
        
    if len(lcoords) != 10:
        continue
    if not isbidentate(chelates):
        continue
        
    try:
        shape, _ = CompareStructures.findShape(lcoords)
    except:
        pass
    
    coord10[fileMol2Name] = {}
    coord10[fileMol2Name]['isomer'] = [name, lcoords.tolist(), colors, composition, chelates]
    coord10[fileMol2Name]['shape'] = shape

In [7]:
len(coord10)

36

In [9]:
with open('coord10_sparkle.json', 'w') as file:
    json.dump(coord10, file)

In [2]:
example = 'FAMKOV.search1.mol2'

In [4]:
name, lcoords, colors, composition, chelates = CsdExtraction.refineCoordinationPolyhedron(example)

In [9]:
# adicionar no 

[[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]]

In [13]:
mol2Input = open(example,"r")
ex_stream = mol2Input.read()

In [14]:
ex_stream
#.splitlines()

'# MOL2 file generated by ConQuest\n# Please check atom types\n\n@<TRIPOS>MOLECULE\nFAMKOV\n    75    79     1\nSMALL\nNO_CHARGES\n****\nGenerated from CSD\n\n@<TRIPOS>ATOM\n      1 Pr1        4.0660    3.8212    3.2140 Pr        1 RES1    0.0000\n      2 P1         6.9330    5.0332    5.5281 P.3       1 RES1    0.0000\n      3 P2         3.3608    4.8991    6.7596 P.3       1 RES1    0.0000\n      4 P3         6.8271    2.5742    0.8062 P.3       1 RES1    0.0000\n      5 P4         3.2745    2.9612   -0.4227 P.3       1 RES1    0.0000\n      6 N1         4.4399    6.8066    2.7767 N.pl3     1 RES1    0.0000\n      7 O1         4.7851    5.9260    1.9287 O.3       1 RES1    0.0000\n      8 O2         3.8928    6.3811    3.8147 O.3       1 RES1    0.0000\n      9 O3         4.6216    7.9948    2.5666 O.2       1 RES1    0.0000\n     10 N2         1.0577    3.7688    3.2284 N.pl3     1 RES1    0.0000\n     11 O4         1.7279    2.7191    3.4322 O.3       1 RES1    0.0000\n     12 O5  

# IDENTIFICANDO

In [10]:
def findisomer(allIsomers, csd_xyz, csd_colors):
    allRmsd = {}
    for isoName, isoColors, isoChelates, isoGeo in allIsomers:
        isoCoords, isoColorsDent = BuildTargetComplex.addChelation(isoGeo, isoColors, isoChelates)
        rmsd = Rmsd.MarquesMethod(csd_xyz, csd_colors, isoCoords, isoColorsDent)
        allRmsd[rmsd] = isoName
    minRmsd = min(list(allRmsd.keys()))
    return allRmsd[minRmsd]

In [11]:
isomerIdentification = {}
for sample_i in coord10:
    csd_name, csd_xyz, csd_colors, csd_composition, csd_chelates = coord10[sample_i]['isomer']
    csd_shape, _ = CompareStructures.findShape(np.array(csd_xyz))
    csd_xyz2, csd_colors2 = BuildTargetComplex.addChelation(csd_xyz, csd_colors, csd_chelates)
    isomerList = ReadStereoisomerList.defineIsomerList(csd_shape, csd_composition, 'cpp')
    allIsomers = Isomers(isomerList)
    try:
        finalIsomer = findisomer(allIsomers, csd_xyz2, csd_colors2)
    except:
        pass
    isomerIdentification[sample_i] = finalIsomer


In [14]:
isomerIdentification

{'G:\\!CSD-database\\BEQPOC.search1.mol2': '{[M(AA)3(BB)2] JBCSAPR-10 C1 c 1 A [1 5 3 2 6 7 10 4 8 9]}',
 'G:\\!CSD-database\\BOWXOA.search1.mol2': '{[M(AA)5] JBCSAPR-10 C2 c 2 A [1 2 3 4 5 6 7 8 9 10]}',
 'G:\\!CSD-database\\DODVAT.search1.mol2': '{[M(AA)3(AB)2] JSPC-10 Cs a 1 C [1 2 4 9 10 7 6 5 3 8]}',
 'G:\\!CSD-database\\DORDIX.search1.mol2': '{[Ma4(AA)3] JSPC-10 C1 c 1 A [1 4 7 9 2 10 3 6 8 5]}',
 'G:\\!CSD-database\\ECABAL.search1.mol2': '{[M(AA)3(BB)2] JBCSAPR-10 C2 c 2 B [1 2 6 7 4 10 5 8 9 3]}',
 'G:\\!CSD-database\\ECABIT.search1.mol2': '{[M(AA)3(BB)2] JBCSAPR-10 C2 c 2 B [1 2 6 7 4 10 5 8 9 3]}',
 'G:\\!CSD-database\\ECABOZ.search1.mol2': '{[M(AA)3(BB)2] JBCSAPR-10 C2 c 2 B [1 2 6 7 4 10 5 8 9 3]}',
 'G:\\!CSD-database\\FAHFID.search1.mol2': '{[Ma4(AA)3] JSPC-10 C1 c 1 A [1 4 7 9 2 10 3 6 8 5]}',
 'G:\\!CSD-database\\FIBXET.search1.mol2': '{[Ma2b2(AA)3] JBCSAPR-10 C1 c 1 A [1 2 7 8 4 10 3 5 6 9]}',
 'G:\\!CSD-database\\FICJEG.search1.mol2': '{[Ma3b(AA)3] JSPC-10 C1 c 1 A [6

In [15]:
with open('identificados_coord10_sparkle.json', 'w') as file:
    json.dump(isomerIdentification, file)

# Chelate binding angle

In [3]:
with open('coord10_sparkle.json', 'r') as j:
    csd_data = json.load(j)

In [11]:
with open('identificados_coord10_sparkle.json', 'r') as file:
    identified = json.load(file)

In [5]:
from MolFileHandling import MolFileHandling
from ExternalPrioritiesObtainer import ExternalPrioritiesObtainer
from PrioritiesObtainer import PrioritiesObtainer


def read_coords(fileMol2Name):
    prior_, molFileHandling_ = extractCoordinationPolyhedron(fileMol2Name)
    name = molFileHandling_.getBaseFileName()
    colors = prior_.getPrioritesOfMetalI()
    composition = prior_.getDirectFormula()
    chelates = prior_.getChelatesOfMetalI()

    lcoords = []
    metalCoordinates = molFileHandling_.getListAtoms()[molFileHandling_.getMetalsInMol2FileList()[0]]
    metalCoordinates = metalCoordinates.split()
    lcoordsi = [float(x) for x in metalCoordinates[2:5]]
    lcoords.append(lcoordsi)
    
    for i in prior_.getLigandsBondedToMetalI():
        donorAtomsCoordinates = molFileHandling_.getListAtoms()[i-1].split()
        lcoordsi = [float(x) for x in donorAtomsCoordinates[2:5]]
        lcoords.append(lcoordsi)
    return name, lcoords, colors, composition, chelates

def extractCoordinationPolyhedron(fileMol2Name):
    molFileHandling_ = MolFileHandling(fileMol2Name)
    molFileHandling_.writeMolFile()
    extPrior_ = ExternalPrioritiesObtainer(molFileHandling_.getTemporaryMolName())
    prior_ = PrioritiesObtainer(molFileHandling_, extPrior_.getPriorities())
    metalsList = molFileHandling_.getMetalsInMol2FileList()
    prior_.calculateLigandsPriorities(metalsList[0]) # SO O PRIMEIRO METAL POR ENQUANTO
    return prior_, molFileHandling_

def center(lcoords):
    X = []
    for line in lcoords[1:]:
        line = np.array(line)
        line -= np.array(lcoords[0])
        X.append(line)
    return np.array(X)

import math

def dotproduct(v1, v2):
    return sum((a*b) for a, b in zip(v1, v2))

def length(v):
    return math.sqrt(dotproduct(v, v))

def angle(v1, v2):
    return math.acos(dotproduct(v1, v2) / (length(v1) * length(v2))) * 57.295779513 

def maxCba(lcoords, chelates):
    mcba = []
    if len(chelates) == 0:
        return 0
    for chel in chelates:
        coord1 = lcoords[chel[0]]
        coord2 = lcoords[chel[1]]
        mcba.append(angle(coord1,coord2))
    return max(mcba)



In [7]:
csd_coords = {}
for fileMol2Name in csd_data.keys():
    name, lcoords, colors, composition, chelates = read_coords(fileMol2Name)
    lcoords = center(lcoords)
    csd_coords[fileMol2Name] = {}
    csd_coords[fileMol2Name]['name'] = name
    csd_coords[fileMol2Name]['lcoords'] = lcoords
    csd_coords[fileMol2Name]['colors'] = colors
    csd_coords[fileMol2Name]['composition'] = composition
    csd_coords[fileMol2Name]['chelates'] = chelates
    csd_coords[fileMol2Name]['maxcba'] = maxCba(lcoords,chelates)
    

In [8]:
csd_coords

{'G:\\!CSD-database\\BEQPOC.search1.mol2': {'name': 'BEQPOC.search1',
  'lcoords': array([[-2.2552,  0.1439, -1.0985],
         [ 0.4135,  0.641 , -2.4972],
         [-1.4969, -1.9116,  0.624 ],
         [-1.0776,  1.1431,  2.0013],
         [-0.8744,  2.4591, -0.5188],
         [ 1.1512,  2.3257,  0.2431],
         [ 1.0801, -0.5712,  2.1966],
         [ 1.5656, -2.1907, -0.02  ],
         [ 2.5786, -0.2983, -0.3378],
         [-0.1907, -1.4166, -2.2305]]),
  'colors': [1, 0, 1, 1, 0, 0, 1, 0, 0, 0],
  'composition': '(A2)3(B2)2',
  'chelates': [[0, 2], [1, 9], [3, 6], [4, 5], [7, 8]],
  'maxcba': 67.45254884184028},
 'G:\\!CSD-database\\BOWXOA.search1.mol2': {'name': 'BOWXOA.search1',
  'lcoords': array([[ 2.1054,  0.1879, -1.3137],
         [ 1.4304,  1.8742, -0.1764],
         [-2.1055,  0.1879,  1.3137],
         [ 0.5391, -2.1597, -0.9196],
         [-1.4305,  1.8742,  0.1764],
         [ 0.4968,  0.603 ,  2.29  ],
         [-1.9731, -0.6218, -1.343 ],
         [-0.5392, -2.1597,

In [13]:
for file in csd_coords:
    print(csd_coords[file]['name'].split('.')[0], ';',csd_coords[file]['maxcba'], ' ; ', identified[file])

BEQPOC ; 67.45254884184028  ;  {[M(AA)3(BB)2] JBCSAPR-10 C1 c 1 A [1 5 3 2 6 7 10 4 8 9]}
BOWXOA ; 52.54051924633924  ;  {[M(AA)5] JBCSAPR-10 C2 c 2 A [1 2 3 4 5 6 7 8 9 10]}
DODVAT ; 72.12531193563055  ;  {[M(AA)3(AB)2] JSPC-10 Cs a 1 C [1 2 4 9 10 7 6 5 3 8]}
DORDIX ; 49.227820637093444  ;  {[Ma4(AA)3] JSPC-10 C1 c 1 A [1 4 7 9 2 10 3 6 8 5]}
ECABAL ; 62.384841815473  ;  {[M(AA)3(BB)2] JBCSAPR-10 C2 c 2 B [1 2 6 7 4 10 5 8 9 3]}
ECABIT ; 63.589050200489666  ;  {[M(AA)3(BB)2] JBCSAPR-10 C2 c 2 B [1 2 6 7 4 10 5 8 9 3]}
ECABOZ ; 63.95842913395608  ;  {[M(AA)3(BB)2] JBCSAPR-10 C2 c 2 B [1 2 6 7 4 10 5 8 9 3]}
FAHFID ; 49.58902526013061  ;  {[Ma4(AA)3] JSPC-10 C1 c 1 A [1 4 7 9 2 10 3 6 8 5]}
FIBXET ; 49.36171148689722  ;  {[Ma2b2(AA)3] JBCSAPR-10 C1 c 1 A [1 2 7 8 4 10 3 5 6 9]}
FICJEG ; 48.49268101538666  ;  {[Ma3b(AA)3] JSPC-10 C1 c 1 A [6 1 7 3 5 2 4 10 9 8]}
GACJOJ ; 83.24612713058355  ;  {[M(AA)3(BB)2] JBCSAPR-10 C2 c 2 B [1 2 3 5 4 6 10 7 8 9]}
GIMMIY ; 69.1815780921114  ;  {[M(AA

In [10]:
csd_coords

{'G:\\!CSD-database\\BEQPOC.search1.mol2': {'name': 'BEQPOC.search1',
  'lcoords': array([[-2.2552,  0.1439, -1.0985],
         [ 0.4135,  0.641 , -2.4972],
         [-1.4969, -1.9116,  0.624 ],
         [-1.0776,  1.1431,  2.0013],
         [-0.8744,  2.4591, -0.5188],
         [ 1.1512,  2.3257,  0.2431],
         [ 1.0801, -0.5712,  2.1966],
         [ 1.5656, -2.1907, -0.02  ],
         [ 2.5786, -0.2983, -0.3378],
         [-0.1907, -1.4166, -2.2305]]),
  'colors': [1, 0, 1, 1, 0, 0, 1, 0, 0, 0],
  'composition': '(A2)3(B2)2',
  'chelates': [[0, 2], [1, 9], [3, 6], [4, 5], [7, 8]],
  'maxcba': 67.45254884184028},
 'G:\\!CSD-database\\BOWXOA.search1.mol2': {'name': 'BOWXOA.search1',
  'lcoords': array([[ 2.1054,  0.1879, -1.3137],
         [ 1.4304,  1.8742, -0.1764],
         [-2.1055,  0.1879,  1.3137],
         [ 0.5391, -2.1597, -0.9196],
         [-1.4305,  1.8742,  0.1764],
         [ 0.4968,  0.603 ,  2.29  ],
         [-1.9731, -0.6218, -1.343 ],
         [-0.5392, -2.1597,