In [1]:
import numpy as np
from rdkit import Chem

In [2]:
class SequentialSMILESLoader:
    def __init__(self,smifilename,skip_failures=True,attach_names=False):
        self.file=open(smifilename,"r")
        self.counter=0
        self.autoskip=skip_failures
        self.name=attach_names
        self.ended=False
    
    def getNext(self,num=1,as_smiles=False):
        ret=[]
        if self.name:
            names=[]
        K=self.counter
        skip=0
        
        if self.ended: return None
        
        for ln in self.file:
            ln=ln.strip().split()
            if not len(ln): continue
            
            mol=ln[0]
            if self.name: myname=ln[1].strip()
            else: myname = None
            if not as_smiles:
                mol=Chem.MolFromSmiles(mol)
                if self.autoskip and (mol is None):
                    self.counter+=1
                    skip+=1
                    continue
            ret.append(mol)
            if self.name: names.append(myname)
            
            self.counter+=1
            if self.counter-K-skip==num: break
        else: self.ended=True
        
        if self.name: return ret,names
        else: return ret
    
    def drain(self,as_smiles=False): return self.getNext(-1,as_smiles)
    def __del__(self): self.file.close()
    
    def __help__(self): print("This is help")

In [9]:
class RDKitMoleculeWrapper:
    def __init__(self,mol):
        self.mol = mol
        if type(self.mol)!=Chem.rdchem.Mol: self.mol=Chem.MolFromSmiles(self.mol)
    
    def __call__(self): return self.mol
    def get(self): return self.mol
    
    def getSize(self,includeHs=False):
        if includeHs: tempmol=Chem.AddHs(self.mol)
        else: tempmol=self.mol
        return (tempmol.GetNumHeavyAtoms() if (not includeHs) else len(tempmol.GetAtoms()))

In [None]:
print("Loaded Library 'General (RDKit)'")