In [1]:
import re
from collections import defaultdict
import pandas as pd
import numpy as np
# Load STRIDE file
with open("stride.dat", "r") as f:
    lines = f.readlines()

# Pattern to extract donor and acceptor (using PDB residue numbers)
pattern = re.compile(
    r"(?:DNR|ACC)\s+(\w+)\s+-\s+(\d+)\s+\d+\s+->\s+(\w+)\s+-\s+(\d+)\s+\d+"
)

# Count H-bonds between donor and acceptor PDB residues
hb_counts = defaultdict(int)

seen_pairs = set()
hb_counts = defaultdict(int)

for line in lines:
    if line.startswith(("DNR", "ACC")):
        match = pattern.search(line)
        if match:
            res1, res1_pdb, res2, res2_pdb = match.groups()
            if line.startswith("DNR"):
                donor = (res1, int(res1_pdb))
                acceptor = (res2, int(res2_pdb))
            else:  # ACC
                acceptor = (res1, int(res1_pdb))
                donor = (res2, int(res2_pdb))

            # Avoid duplicates: donor–acceptor and acceptor–donor are considered the same
            pair = tuple([donor, acceptor])

            # Count only once
            if pair not in seen_pairs:
                seen_pairs.add(pair)
                hb_counts[pair] += 1


# Optional: deduplicate bidirectional pairs by treating (A, B) same as (B, A)
dedup_counts = defaultdict(int)
for (donor, acceptor), count in hb_counts.items():
    pair = tuple(sorted([donor, acceptor]))  # sort to avoid direction
    dedup_counts[pair] += count

# Convert results to DataFrame
hb_df = pd.DataFrame([
    {
        "Residue_1": f"{res1[0]}-{res1[1]}",
        "Residue_2": f"{res2[0]}-{res2[1]}",
        "Num_Hydrogen_Bonds": count
    }
    for (res1, res2), count in dedup_counts.items()
])

# Save to CSV (optional)
# hb_df.to_csv("hydrogen_bonds_all_dedup.csv", index=False)

# Show the result
print(hb_df.head())


  Residue_1 Residue_2  Num_Hydrogen_Bonds
0    ILE-43     THR-3                   2
1    ILE-41     TRP-5                   2
2    ASP-39     SER-7                   2
3     ALA-9    TYR-37                   1
4     ALA-9    LEU-35                   1


In [2]:
dedup_counts

defaultdict(int,
            {(('ILE', 43), ('THR', 3)): 2,
             (('ILE', 41), ('TRP', 5)): 2,
             (('ASP', 39), ('SER', 7)): 2,
             (('ALA', 9), ('TYR', 37)): 1,
             (('ALA', 9), ('LEU', 35)): 1,
             (('LEU', 104), ('LYS', 10)): 1,
             (('GLN', 33), ('LEU', 11)): 2,
             (('LEU', 13), ('LEU', 31)): 1,
             (('LEU', 31), ('PHE', 14)): 1,
             (('LEU', 15), ('SER', 208)): 1,
             (('ASP', 210), ('LEU', 15)): 1,
             (('GLN', 29), ('TYR', 16)): 2,
             (('ILE', 17), ('THR', 194)): 1,
             (('THR', 18), ('THR', 27)): 1,
             (('GLY', 19), ('THR', 27)): 1,
             (('ARG', 21), ('TYR', 25)): 1,
             (('ARG', 21), ('GLY', 24)): 1,
             (('ALA', 153), ('THR', 30)): 2,
             (('ALA', 151), ('PHE', 32)): 2,
             (('ALA', 273), ('PHE', 34)): 2,
             (('ASP', 36), ('LYS', 271)): 1,
             (('ASP', 95), ('THR', 40)): 2,
            

In [8]:
n_residues = 283
eps_hb = 0.75*4.184 # kj/mol, convert from kcal/mol
hb_contact_matrix = np.zeros((n_residues, n_residues), dtype=int)
for (res1, res2), count in dedup_counts.items():
    i = res1[1] - 1 # 0-based index
    j = res2[1] - 1 # 0-based index
    hb_contact_matrix[i,j] = count
    hb_contact_matrix[j,i] = count


hb_interaction_strength = eps_hb*hb_contact_matrix


In [10]:
hb_interaction_strength[hb_contact_matrix>0]

array([6.276, 6.276, 6.276, 3.138, 3.138, 3.138, 6.276, 3.138, 3.138,
       3.138, 3.138, 6.276, 3.138, 3.138, 3.138, 3.138, 3.138, 3.138,
       3.138, 3.138, 3.138, 6.276, 6.276, 3.138, 3.138, 6.276, 6.276,
       6.276, 3.138, 3.138, 3.138, 6.276, 6.276, 6.276, 6.276, 6.276,
       6.276, 6.276, 3.138, 3.138, 3.138, 3.138, 3.138, 3.138, 3.138,
       3.138, 3.138, 3.138, 3.138, 3.138, 3.138, 3.138, 3.138, 3.138,
       3.138, 3.138, 3.138, 3.138, 3.138, 3.138, 3.138, 3.138, 3.138,
       3.138, 3.138, 3.138, 3.138, 3.138, 3.138, 3.138, 3.138, 3.138,
       3.138, 3.138, 3.138, 3.138, 3.138, 3.138, 3.138, 3.138, 3.138,
       3.138, 3.138, 3.138, 3.138, 3.138, 3.138, 3.138, 3.138, 3.138,
       3.138, 3.138, 3.138, 3.138, 6.276, 3.138, 3.138, 6.276, 3.138,
       3.138, 6.276, 6.276, 3.138, 3.138, 3.138, 3.138, 3.138, 3.138,
       3.138, 3.138, 3.138, 3.138, 3.138, 3.138, 3.138, 3.138, 3.138,
       3.138, 3.138, 3.138, 3.138, 3.138, 3.138, 3.138, 3.138, 3.138,
       3.138, 3.138,

In [11]:
0.75*4.184

3.138

In [12]:
hb_df['Num_Hydrogen_Bonds'].sum()

197