# Analyze dynophores

## Aim of this notebook

Explore how to analyze dynophores in a Jupyter notebook!

In [1]:
from collections import defaultdict
import json
from pathlib import Path

import pandas as pd

## Read data files

In [2]:
DATA = Path('../data/out/DynophoreApp')

In [3]:
dynophores_files = [file for file in DATA.glob('*')]
dynophores_files[:3]

[PosixPath('../data/out/DynophoreApp/1KE7-1_Data__superFeature_HBA[4619]%12.3.txt'),
 PosixPath('../data/out/DynophoreApp/1KE7-1_Data__superFeature_HBA[4618]%0.2.txt'),
 PosixPath('../data/out/DynophoreApp/1KE7-1_Data__superFeature_AR[4622,4615,4623,4613,4614,4621]%4.0.txt')]

In [4]:
len(dynophores_files)

38

In [5]:
# Initialize dynophore
dynophore = {'dynophore': []}

# Get superfeatures
superfeature_names = set([file.stem.split('_')[4].split('%')[0] for file in dynophores_files])

for superfeature_name in superfeature_names:
    
    # Get all files with this superfeature
    superfeature_files = [file for file in dynophores_files if superfeature_name in file.stem]
    
    # Get superfeature file
    assert len([file for file in superfeature_files if len(file.stem.split('_')) == 5]) == 1
    superfeature_file = [file for file in superfeature_files if len(file.stem.split('_')) == 5][0]
    
    # Set superfeature
    superfeature = {'name': superfeature_name, 'file': str(superfeature_file), 'env_partners': []}
    
    # Get environmental partner files for this superfeature
    for file in superfeature_files:
        if len(file.stem.split('_')) == 7:
            env_partner_file = file
            env_partner_name = env_partner_file.stem.split('_')[6].split('%')[0]
            env_partner = {'name': env_partner_name, 'file': str(env_partner_file)}
            superfeature['env_partners'].append(env_partner)
        
    dynophore['dynophore'].append(superfeature)

In [6]:
with open(DATA / '..' / 'dynophore_metadata.json', 'w') as f:
    json.dump(dynophore, f)

### Superfeatures

#### Superfeatures occurrence

In [7]:
occurrence_superfeatures = pd.concat(
    [pd.read_csv(superfeature['file'], header=None) for superfeature in dynophore['dynophore']], 
    axis=1
)
occurrence_superfeatures.columns = [superfeature['name'] for superfeature in dynophore['dynophore']]
occurrence_superfeatures.head()

Unnamed: 0,HBA[4606],HBA[4596],"H[4599,4602,4601,4608,4609,4600]","AR[4622,4615,4623,4613,4614,4621]",HBD[4598],HBD[4612],HBA[4619],"H[4615,4623,4622,4613,4621,4614]",HBA[4618],"AR[4605,4607,4603,4606,4604]"
0,0,1,1,0,0,0,1,1,0,0
1,0,1,1,0,0,0,1,1,0,0
2,0,0,1,0,0,0,0,1,0,0
3,0,0,1,0,0,0,0,1,0,0
4,0,0,1,0,0,1,0,1,0,0


### Interactions

In [8]:
def read_interactions(dynophore, type='occurrence'):
    
    types = ['occurrence', 'distance']
    if type == 'occurrence':
        type_ix = 1
    elif type == 'distance':
        type_ix = 0
    else:
        raise ValueError(f'Wrong type. Select from: {", ".join(types)}')
    
    interactions = defaultdict()

    for superfeature in dynophore['dynophore']:

        interaction = pd.concat(
            [pd.read_csv(env_partner['file'], header=None)[type_ix] for env_partner in superfeature['env_partners']], 
            axis=1
        )
        interaction.columns = [env_partner['name'] for env_partner in superfeature['env_partners']]
        interactions[superfeature['name']] = interaction

    return interactions

#### Interactions occurrence

In [9]:
interactions_occurrence = read_interactions(dynophore, type='occurrence')
print(interactions_occurrence.keys())
interactions_occurrence['HBD[4612]'].head()

dict_keys(['HBA[4606]', 'HBA[4596]', 'H[4599,4602,4601,4608,4609,4600]', 'AR[4622,4615,4623,4613,4614,4621]', 'HBD[4598]', 'HBD[4612]', 'HBA[4619]', 'H[4615,4623,4622,4613,4621,4614]', 'HBA[4618]', 'AR[4605,4607,4603,4606,4604]'])


Unnamed: 0,GLN-131-A[2061],ASP-86-A[1319],LEU-83-A[1263],GLN-131-A[2057],ASP-86-A[1320]
0,0,0,0,0,0
1,0,0,0,0,0
2,0,0,0,0,0
3,0,0,0,0,0
4,0,0,1,0,0


#### Interactions distances

In [10]:
interactions_distance = read_interactions(dynophore, type='distance')
print(interactions_distance.keys())
interactions_distance['HBD[4612]'].head()

dict_keys(['HBA[4606]', 'HBA[4596]', 'H[4599,4602,4601,4608,4609,4600]', 'AR[4622,4615,4623,4613,4614,4621]', 'HBD[4598]', 'HBD[4612]', 'HBA[4619]', 'H[4615,4623,4622,4613,4621,4614]', 'HBA[4618]', 'AR[4605,4607,4603,4606,4604]'])


Unnamed: 0,GLN-131-A[2061],ASP-86-A[1319],LEU-83-A[1263],GLN-131-A[2057],ASP-86-A[1320]
0,11.944555,7.128676,3.427919,6.76952,8.252505
1,11.944555,7.128676,3.427919,6.76952,8.252505
2,12.278903,8.653643,3.582377,6.767191,7.264928
3,12.66895,7.213809,3.543346,7.105429,8.994092
4,12.911131,9.34115,3.175053,7.417255,7.662991
