In [1]:
from rdkit.Chem import AllChem as Chem
from rdkit.Chem import TorsionFingerprints
import pandas as pd

In [2]:
mol_pdb = pd.read_csv("chignolin/1uao.pdb", header=None, delim_whitespace=True)

hydrogen_idxs = set(mol_pdb.loc[mol_pdb[2].str.startswith("H")].index.tolist())
indices = mol_pdb.index.tolist()
num_non_H = len(indices) - len(hydrogen_idxs)
reindexed_atoms = []

non_H_count = 0
H_count = 0
for index in indices:
    if index in hydrogen_idxs:
        reindex = num_non_H + H_count
        H_count += 1
    else:
        reindex = non_H_count
        non_H_count += 1
    reindexed_atoms.append(reindex + 1)

ParserError: Error tokenizing data. C error: Expected 10 fields in line 2, saw 13


In [10]:
with open('chignolin/1uao.pdb', 'r') as f:
    lines = f.readlines()

reordered_lines = [lines[reindexed_atoms.index(i + 1) + 1] for i in range(len(reindexed_atoms))]

reindexed_lines = []
for idx, line in enumerate(reordered_lines):
    prev_idx = line.split()[1]
    new_idx = str(idx + 1)
    
    offset = len(prev_idx) - len(new_idx)
    start_idx = line.find(prev_idx)
    
    if offset < 0:
        replaced = line[:start_idx + offset] + new_idx + line[start_idx + len(prev_idx):]
    else:
        replaced = line[:start_idx] + new_idx.rjust(len(prev_idx), " ") + line[start_idx + len(prev_idx):]
    reindexed_lines.append(replaced)
    
reindexed_lines.insert(0, lines[0])
reindexed_lines.append(lines[-1])

In [11]:
with open('chignolin.pdb', 'w') as fo:
    fo.writelines(reindexed_lines)

In [12]:
def generate_chignolin() -> Chem.Mol:
    """Generates chignolin molecule.
    """

    chignolin_pdb_fn = "chignolin.pdb"
    chignolin = Chem.rdmolfiles.MolFromPDBFile(chignolin_pdb_fn, removeHs=False)
    Chem.SanitizeMol(chignolin)
    return chignolin

chignolin = generate_chignolin()

In [13]:
nonring, ring = TorsionFingerprints.CalculateTorsionLists(chignolin)
nonring_A = [list(atoms[0]) for atoms, ang in nonring]

In [14]:
nonring_A

[[3, 2, 1, 0],
 [5, 4, 2, 1],
 [14, 5, 4, 2],
 [7, 6, 5, 14],
 [8, 7, 6, 5],
 [15, 14, 5, 6],
 [17, 16, 14, 15],
 [22, 17, 16, 14],
 [19, 18, 17, 22],
 [20, 19, 18, 17],
 [24, 22, 17, 16],
 [26, 24, 22, 17],
 [30, 29, 26, 27],
 [32, 31, 29, 26],
 [38, 32, 31, 29],
 [34, 33, 32, 38],
 [35, 34, 33, 32],
 [36, 35, 34, 33],
 [40, 38, 32, 33],
 [41, 40, 38, 39],
 [42, 41, 40, 38],
 [44, 42, 41, 40],
 [46, 45, 41, 40],
 [48, 47, 45, 46],
 [49, 48, 47, 45],
 [50, 49, 48, 47],
 [52, 51, 49, 50],
 [53, 52, 51, 49],
 [55, 53, 52, 51],
 [57, 56, 52, 53],
 [59, 58, 56, 57],
 [70, 59, 58, 56],
 [61, 60, 59, 70],
 [62, 61, 60, 59],
 [71, 70, 59, 58],
 [73, 72, 70, 71],
 [74, 73, 72, 70],
 [75, 74, 73, 72]]

In [3]:
def generate_chignolin() -> Chem.Mol:
    """Generates chignolin molecule.
    """

    chignolin_pdb_fn = "chignolin/1uao.pdb"
    chignolin = Chem.rdmolfiles.MolFromPDBFile(chignolin_pdb_fn, removeHs=False)
    Chem.SanitizeMol(chignolin)
    return chignolin

In [4]:
chignolin = generate_chignolin()
cleaned_chig = Chem.rdmolops.RemoveHs(chignolin)
cleaned_chig = Chem.AddHs(cleaned_chig)
nonring, ring = TorsionFingerprints.CalculateTorsionLists(cleaned_chig)
nonring_B = [list(atoms[0]) for atoms, ang in nonring]

In [5]:
nonring_B

[[3, 2, 1, 0],
 [5, 4, 2, 1],
 [14, 5, 4, 2],
 [7, 6, 5, 14],
 [8, 7, 6, 5],
 [15, 14, 5, 6],
 [17, 16, 14, 15],
 [22, 17, 16, 14],
 [19, 18, 17, 22],
 [20, 19, 18, 17],
 [24, 22, 17, 16],
 [26, 24, 22, 17],
 [30, 29, 26, 27],
 [32, 31, 29, 26],
 [38, 32, 31, 29],
 [34, 33, 32, 38],
 [35, 34, 33, 32],
 [36, 35, 34, 33],
 [40, 38, 32, 33],
 [41, 40, 38, 39],
 [42, 41, 40, 38],
 [44, 42, 41, 40],
 [46, 45, 41, 40],
 [48, 47, 45, 46],
 [49, 48, 47, 45],
 [50, 49, 48, 47],
 [52, 51, 49, 50],
 [53, 52, 51, 49],
 [55, 53, 52, 51],
 [57, 56, 52, 53],
 [59, 58, 56, 57],
 [70, 59, 58, 56],
 [61, 60, 59, 70],
 [62, 61, 60, 59],
 [71, 70, 59, 58],
 [73, 72, 70, 71],
 [74, 73, 72, 70],
 [75, 74, 73, 72]]

In [15]:
nonring_A == nonring_B

True