In [1]:
import numpy as np
import yaml
import re

In [28]:
def parse_residue_list(residue_items):
    """
    Just read the raw residue index, not shifted yet.
    """
    residues = []
    for item in residue_items:
        if isinstance(item, int):
            residues.append(item)
        elif isinstance(item, str):
            if '-' in item:
                start, end = map(int, item.split('-'))
                residues.extend(range(start, end + 1))
            else:
                residues.append(int(item))
    return residues

def read_yaml_config(filepath):
    with open(filepath, 'r') as f:
        config = yaml.safe_load(f)

    intra = config['intra_domains']
    inter = config['inter_domains']
    n_residues = int(config['n_residues'])

    domain_to_residues = {}
    intra_strengths = {}
    #all-residues present in domain definition
    all_residues = set()

    for domain, values in intra.items():
        raw_residues = values['residues']
        residues = parse_residue_list(raw_residues)
        domain_to_residues[domain] = residues
        intra_strengths[domain] = values['strength']
        
        all_residues.update(residues)

    # Infer full range of residues (assume minimum is 1)    
    full_residues = set(range(1, n_residues+1))
    unassigned_residues = sorted(full_residues - all_residues)
    # assign unassigned residues to domain 'X' (assume X is not presented in the domain def yet.)
    if unassigned_residues:
        domain_to_residues['X'] = unassigned_residues
        intra_strengths['X'] = 1.0

    inter_strengths = {}
    for pair_str, strength in inter.items():
        d1, d2 = pair_str.strip().split('-')
        inter_strengths[(d1, d2)] = strength
        inter_strengths[(d2, d1)] = strength  # ensure symmetry
        
    # add inter-domain interactions for domain X
    if 'X' in domain_to_residues:
        for other in domain_to_residues:
            if other != 'X':
                inter_strengths[('X', other)] = 1.0
                inter_strengths[(other, 'X')] = 1.0

    return domain_to_residues, intra_strengths, inter_strengths

def generate_interaction_matrix(domain_to_residues, intra_strengths, inter_strengths):
    """
    This function works on python matrix so need to shift the residue indices by 1.
    """
    residue_to_domain = {}
    residue_list = []

    for domain, residues in domain_to_residues.items():
        for res in residues:
            residue_to_domain[res] = domain
            residue_list.append(res)

    # shifted by 1 since python is 0-based index, residues is 1-based index
    residue_list = sorted(set(residue_list))
    res_to_idx = {res: i for i, res in enumerate(residue_list)}
    n = len(residue_list)
    matrix = np.zeros((n, n))

    for i_res in residue_list:
        i_idx = res_to_idx[i_res]
        dom_i = residue_to_domain[i_res]
        for j_res in residue_list:
            j_idx = res_to_idx[j_res]
            dom_j = residue_to_domain[j_res]

            if dom_i == dom_j:
                # intra-domain interaction
                matrix[i_idx, j_idx] = intra_strengths[dom_i]
            else:
                # inter-domain interaction
                # the else-if statement test for symetry, if (dom_i, dom_j) are not in inter_strength, it also test
                # for (dom_j, dom_i)
                key = (dom_i, dom_j)
                if key in inter_strengths:
                    matrix[i_idx, j_idx] = inter_strengths[key]
                elif (dom_j, dom_i) in inter_strengths:
                    matrix[i_idx, j_idx] = inter_strengths[(dom_j, dom_i)]
                else:
                    matrix[i_idx, j_idx] = 0.0

    return matrix #, res_to_idx





In [29]:
domain_to_residues, intra_strengths, inter_strengths = read_yaml_config("domain.yaml")
matrix = generate_interaction_matrix(domain_to_residues, intra_strengths, inter_strengths)

# print(matrix[res_idx[1], res_idx[22]])   # Within domain A
# print(matrix[res_idx[1], res_idx[60]])   # Between A and B

In [32]:
matrix[0,282]

1.0