In [None]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats

In [None]:
'''
Load in the Mouse Protein Complex JSON file created above
'''
file_path = '/content/mus_musculus_protein_complex_compiled_data.json'
with open(file_path, 'r') as f:
  data = json.load(f)

#Get Uniprot IDs for each protein participant in a complex
complex_protein_interactors = {key: [participant['identifier'] for participant in data[key]['participants'] if participant['interactorType']=='protein'] for key in list(data.keys())}

In [None]:
'''
Load in Proteomics Data for all Mouse Strains

'''
strains = ['dba','fvb','cej','c57', 'balbc','aj']
conditions = ['iso', 'ctrl']
imputations = ['after'] # I am just calculating p-values to compare DMI quanitified turnover means for interactors within a complex
pathway = '/content/'

all_data = list()
for strain in strains :
  for cond in conditions:
    for imp in imputations:
      filename = pathway + cond + "_" + strain + "_" + imp + ".csv"
      dat = pd.read_csv(filename)
      dat["cond"] = cond
      dat["strain"] = strain
      dat["imp"] = imp
      all_data.append(dat)

# Combine all dataframes in the list `all_data`
turnover_data = pd.concat(all_data)
# Convert the UniProt column to strings
turnover_data['UniProt'] = turnover_data['UniProt'].astype(str)
turnover_data['cond'] = turnover_data['cond'].astype(str)

In [None]:
turnover_data

Unnamed: 0,UniProt,median.K,cond,strain,imp
0,A2A432,0.700000,iso,dba,after
1,A2A8U2,0.450111,iso,dba,after
2,A2AAJ9,0.102450,iso,dba,after
3,A2ADF7,0.352111,iso,dba,after
4,A2ADY9,0.455000,iso,dba,after
...,...,...,...,...,...
2604,Q9Z2W0,0.095750,ctrl,aj,after
2605,Q9Z2X1,0.314000,ctrl,aj,after
2606,Q9Z2Y3,0.325714,ctrl,aj,after
2607,Q9Z2Y8,0.118833,ctrl,aj,after


In [None]:
# get log2 of turnover value for a UniProt ID from turnover_data
def turnovers(uniprot_id):
  return np.log2(turnover_data[(turnover_data['UniProt'] == uniprot_id) & (turnover_data['cond'] == 'iso')]['median.K'])

def turnovers_ctrl(uniprot_id):
  return np.log2(turnover_data[(turnover_data['UniProt'] == uniprot_id) & (turnover_data['cond'] == 'ctrl')]['median.K'])


In [None]:
## Obtain list of interactors for each complex
## Calculate p-values (using ANOVA) for protein interactor combinations (nC2) for each complex

cpxs = ["CPX-616", "CPX-5823", "CPX-3009", "CPX-5152"]

### ISO ###
fig4d_cpxs_dict = {}
# Build dictionary where each complex contains turnover values for each protein interactor (in ISO condition)
for cpx in cpxs:
  interactors = complex_protein_interactors[cpx]
  fig4d_cpxs_dict[cpx] = {}
  for prot in interactors:
    fig4d_cpxs_dict[cpx][prot] = turnovers(prot)

#Unpack the dictionary built above to calculate p-value for complex
coherence_iso = {}
for cpx, intcts in fig4d_cpxs_dict.items():
    # Unpack all turnover values in a complex into a list
    prot_turnover_list = [item for item in list(intcts.values())]
    #calculate ANOVA statistic
    coherence_iso[cpx] = stats.f_oneway(*prot_turnover_list).pvalue

### CTRL ###
fig4d_cpxs_ctrl_dict = {}
# Build dictionary where each complex contains turnover values for each protein interactor (in CTRL condition)
for cpx in cpxs:
  interactors = complex_protein_interactors[cpx]
  fig4d_cpxs_ctrl_dict[cpx] = {}
  for prot in interactors:
    fig4d_cpxs_ctrl_dict[cpx][prot] = turnovers_ctrl(prot)

#Unpack the dictionary built above to calculate p-value for complex
coherence_ctrl = {}
for cpx, intcts in fig4d_cpxs_ctrl_dict.items():
    # Unpack all turnover values in a complex into a list
    prot_turnover_list = [item for item in list(intcts.values())]
    #calculate ANOVA statistic
    coherence_ctrl[cpx] = stats.f_oneway(*prot_turnover_list).pvalue


In [None]:
coherence_iso

{'CPX-616': 0.001098410355840822,
 'CPX-5823': 6.485491152822282e-08,
 'CPX-3009': 0.00043004733894076366,
 'CPX-5152': 0.0028123486015905717}

In [None]:
coherence_ctrl

{'CPX-616': 0.16171282805794004,
 'CPX-5823': 0.008904107770296764,
 'CPX-3009': 0.0370817185215137,
 'CPX-5152': 0.7028259371241417}

In [None]:
with open('coherence_scores.json', 'w') as f:
  json.dump({'coherence_iso': coherence_iso, 'coherence_ctrl': coherence_ctrl}, f)