In [None]:
import numpy as np
import matplotlib.pyplot as plt
import rdkit
from rdkit import Chem
from rdkit.Chem import rdDepictor
from rdkit.Chem import rdDistGeom
from rdkit.Chem import AllChem

In [None]:
MASS_DATA={"C": 12, "N": 14, "O": 16, "H": 1, "F": 19, "Cl": 36, "Br": 80, "I": 127, "S": 32, "P": 31, "Si": 28}
class AtomData:
    def __init__(self,element,partial_charge):
        self.q=partial_charge
        self.name=element
        
class MultiMol2Loader:
    def __init__(self,mol2file_name=None,allow_duplicates=False):
        self.filename=mol2file_name
        self.molecules=dict()
        self.atoms=dict()
        self.allow_duplicates=allow_duplicates
        if self.allow_duplicates==True:
            self.allow_duplicates="first"
        if mol2file_name is not None:
            self.loadFile(self.filename,self.allow_duplicates)

    def loadFile(self,filename,duplicates=False):
        mfile=open(filename,"r")
        started=False
        flags=[False,False,False]
        mcount=0
        for l in mfile:
            l=l.strip()
            #if not len(l): continue
            if "MOLECULE" in l:
                started=True
                named=False
                for f in flags: f=False
                continue
            if not started: continue
            if started and (not named):
                if not len(l):
                    l="Unnamed"+str(mcount)
                    mcount+=1
                if l in self.molecules:
                    if duplicates==False:
                        print("Molecule",l,"has a duplicate entry. Skipping")
                        started=False
                        continue
                    elif duplicates=="first":
                        started=False
                        continue
                    else:
                        raise ValueError("Duplicate Filter option '"+str(duplicates)+"' is not known")
                named=True
                molname=l.strip()
                self.molecules[molname]=[]
                self.atoms[molname]=[]
                continue
            elif started and named:
                self.molecules[molname].append(l)
                if "@<TRIPOS>" in l:
                    if "ATOM" in l: flags[0]=True
                    else: flags[0]=False
                else:
                    if flags[0]:
                        elname=l.split()[5].split(".")[0]
                        if elname not in MASS_DATA:
                            print("Bad element '"+elname+"' encountered in following line")
                            print(l.split())
                            raise ValueError()
                        #elname=l[42:46].strip().split(".")[0]
                        try: charge=float(l[-8:].strip())
                        except:
                            charge=0.0
                        self.atoms[molname].append(AtomData(elname,charge))

    def __getitem__(self,k):
        return self.molecules[k],self.atoms[k]

    def clearData(self):
        self.molecules=dict()
        self.atoms=dict()
        self.filename=None

    def writeMolecule(self,k,to_file=None):
        if str(k) not in self.molecules: raise ValueError("Molecule with key "+str(k)+" was not loaded")
        if to_file is not None: to_file.write("@<TRIPOS>MOLECULE\n")
        else: print("@<TRIPOS>MOLECULE")

        if to_file is not None: to_file.write(str(k)+"\n")
        else: print(str(k))

        for l in self.molecules[str(k)]:
            if to_file: to_file.write(l+"\n")
            else: print(l)

    def netCharge(self,k):
        if str(k) not in self.atoms: raise ValueError("Molecule with key "+str(k)+" was not loaded with atoms")
        q=0
        for at in self.atoms[k]:
            q+=at.q
        return q
    def netMass(self,k):
        if str(k) not in self.atoms: raise ValueError("Molecule with key "+str(k)+" was not loaded with atoms")
        m=0
        for at in self.atoms[k]:
            m+=MASS_DATA[at.name]
        return m

    def keys(self,from_atoms=False):
        if not from_atoms: return self.molecules.keys()
        else: return self.atoms.keys()

In [None]:
class Mol2Writer:
    amide_smarts = Chem.MolFromSmarts('[OX1]=CN')
    amide_pl_smarts = Chem.MolFromSmarts('[#7]-,=[#6]-,=[#7]')
    carboxylate_smarts = Chem.MolFromSmarts('[#6]C(=O)[O;H,-]')
    rdkit2sybyl = {
        '6SP3False': 'C.3',    '6SP2False': 'C.2',    '6SPFalse': 'C.1',      '6SP2True': 'C.ar',
        '7SP3False': 'N.3',    '7SP3False_4': 'N.4', 
        '7SP2False': 'N.2',    '7SP2False_4':'N.2',   '7SP2True': 'N.ar',    '7SP2True_4': 'N.ar',     
        '7SP2False_am': 'N.am',   'N_pl':'N.pl',
        '7SPFalse': 'N.1',     '7SPFalse_4': 'N.1',   
        '8SP3False': 'O.3',    '8SP2False': 'O.2',    '8SP2False_co2': 'O.co2',  
        '9SP3False':  'F',     '17SP3False':'Cl',     '35SP3False': 'Br',     '53SP3False': 'I',
        '15SP3False':'P.3',
        '16SP3False':'S.3',    '16SP2False':'S.2',    # '16SP2False':'S.O',     '16SP2False':'S.O2',
        '1UNSPECIFIEDFalse': 'H',
        '11SFalse': 'Na', '12SFalse': 'Mg', '13SFalse': 'Al', '19SFalse': 'K', '20SFalse': 'Ca',
        '26SFalse': 'Fe', '29SFalse': 'Cu', '30SFalse': 'Zn',
    }
    
    idx2element = {1:'H', 6:'C', 7:'N', 8:'O', 9:'F', 15:'P', 16:'S',
                  11:'Na', 12:'Mg', 13:'Al', 14:'Si', 3:'Li', 4:'P', 5:'B',
                  19:'K', 20:'Ca', 26:'Fe', 29:'Cu', 30:'Zn',
                  17:'Cl', 35:'Br', 53:'I'}
    
    bond2type = {1.0:'1', 1.5:'ar', 2.0:'2', 3.0:'3'}
    
    # atom_id  atom_name  x  y  z  atom_type  subst_id  subst_name  charge 
    atom_base_str = '{:>7d} {:<4s}     {:>9.4f} {:>9.4f} {:>9.4f} {:<5s}    {:>2d} {:>3s} {:>9.4f}\n'
    # bond_id  origin_atom_id  target_atom_id  bond_type  
    bond_base_str = '{:>6d} {:>4d} {:>4d} {:<2s}\n'

    def __init__(self,mol):
        self.mol=mol

    def addCharge(self):
        AllChem.ComputeGasteigerCharges(self.mol)

    def __call__(self,molname="UNL",conformer=-1): return self._mol_to_mol2_block(self.mol, molname, use_conf=conformer)
    def writeAllConformers(self,molname="UNL"):
        nconf=self.mol.GetNumConformers()
        s=""
        for i in range(nconf):
            s+=self._mol_to_mol2_block(self.mol, molname, i)
        s+="\n"
        return s

    def _mol_to_mol2_block(self, mol, mol_name="UNL", use_conf=-1):
        """Write molecule in MOL2 file as string
    
        Args:
            mol: rdMol, molecule
            mol_name: str, molecule name to set as name in mol2 file
        """
        mol2_blk = ''
    
        mol2_blk += '@<TRIPOS>MOLECULE\n{}\n{:<3d} {:<3d} 1 0 0\nSMALL\nGASTEIGER\n'.format(
                    mol_name, mol.GetNumAtoms(), mol.GetNumBonds())
    
        conf = mol.GetConformer(use_conf)
    
        mol2_blk += '@<TRIPOS>ATOM\n'
    
        # processing guadinium bonds TODO
        # pass
    
        # nitrogen trigonal planar 
        if mol.HasSubstructMatch(Mol2Writer.amide_pl_smarts):
            amide_pl_flag = True    
            amide_pl_matched_atoms = []
            for pair in mol.GetSubstructMatches(Mol2Writer.amide_pl_smarts):
                amide_pl_matched_atoms.extend(pair)
        else:
            amide_pl_flag = False
    
        # processing amide bonds
        if mol.HasSubstructMatch(Mol2Writer.amide_smarts):
            amide_flag = True    
            amide_matched_atoms = []
            for pair in mol.GetSubstructMatches(Mol2Writer.amide_smarts):
                amide_matched_atoms.extend(pair)
        else:
            amide_flag = False
        
        # processing carboxyllate O
        if mol.HasSubstructMatch(Mol2Writer.carboxylate_smarts):
            carboxy_flag = True
            carboxy_matched_atoms = []
            for pair in mol.GetSubstructMatches(Mol2Writer.carboxylate_smarts):
                carboxy_matched_atoms.extend(pair)
        else:
            carboxy_flag = False
        
        # travese all atoms and set atom type
        for atom in mol.GetAtoms():
            #if atom.GetAtomicNum() != 8: continue
            key = str(atom.GetAtomicNum()) + str(atom.GetHybridization()) + str(atom.GetIsAromatic())
            
            # N.4
            if atom.GetAtomicNum() == 7:
                if atom.GetFormalCharge()==1:
                    key += '_4' # N.4 and N.pl
                elif amide_flag:
                    if atom.GetIdx() in amide_matched_atoms:
                        key += '_am' # N.am
                elif amide_pl_flag:
                    if atom.GetIdx() in amide_pl_matched_atoms:
                        key = 'N_pl' # N.pl
    
    
            # O in CO2
            if atom.GetAtomicNum() == 8 and carboxy_flag:
                if atom.GetIdx() in carboxy_matched_atoms:
                    key += '_co2'
            
            atom_id = atom.GetIdx() + 1
            atom_name = Mol2Writer.idx2element[atom.GetAtomicNum()]
            coord = conf.GetAtomPosition(atom.GetIdx())
            atom_type = Mol2Writer.rdkit2sybyl[key]
            subst_id = 1
            subst_name = 'UNL'
            try:
                charge = atom.GetPropsAsDict()['_GasteigerCharge']
            except KeyError:
                charge = 0.00
            
            mol2_blk += Mol2Writer.atom_base_str.format(atom_id, atom_name, coord.x, coord.y, coord.z,
                                atom_type, subst_id, subst_name, charge)
    
        mol2_blk += '@<TRIPOS>BOND\n'
    
        # traverse all bonds
        for bond in mol.GetBonds():
            bond_id = bond.GetIdx()+1
            origin_atom_id = bond.GetBeginAtomIdx()+1
            target_atom_id = bond.GetEndAtomIdx()+1
            bond_type = Mol2Writer.bond2type[bond.GetBondTypeAsDouble()]
            if amide_flag:
                origin_atom = mol.GetAtomWithIdx(origin_atom_id-1).GetAtomicNum()
                target_atom = mol.GetAtomWithIdx(target_atom_id-1).GetAtomicNum()
                if bond.GetBeginAtomIdx() in amide_matched_atoms \
                    and bond.GetEndAtomIdx() in amide_matched_atoms \
                    and (origin_atom == 7 or target_atom == 7):
                    bond_type = 'am'
    
            mol2_blk += Mol2Writer.bond_base_str.format(bond_id, origin_atom_id, target_atom_id, bond_type)
            
        mol2_blk += '@<TRIPOS>SUBSTRUCTURE\n     1 UNL     1\n'
    
        return mol2_blk

In [None]:
print("Mol2 Reader Loaded")