In [152]:
import pandas as pd

In [153]:
RELEVANT_DESA_BAD = {    
    '71TD', '144QL', '70DRA', '80TLR', '70DA', '67F', '76ED', '76ET', '158T', '45EV'
}

def load_mfi(path):
    dtype = {
        'TransplantID': 'int32',
        'LocusLong': 'category',
        'Locus': 'category',
        'SerologicalEquivalent': 'category',
        'ManConcl_Immucor': 'category',
    }
    usecols = [
        'TransplantID', 'LocusLong', 'Locus', 'Specificity', 
        'SerologicalEquivalent', 'MedianFI', 'TMeanFI',	'ManConcl_Immucor']
    df = pd.read_csv(path, sep=';', dtype=dtype, usecols=usecols)
    df['Specificity'] = df['Specificity'].apply(lambda x: set(x.split(',')))
    for col in ['MedianFI', 'TMeanFI']:          
        df[col] = (df[col]
                    .apply(lambda x: x.replace(',', '.'))
                    .astype('float32')
                    .astype('int')
        )
    # Finally drop the HLA's belonging to DP group 
    return df[df['Locus'] != 'DP']


In [154]:
path = '~/Repos/STRIDE/STRIDE-Analytics/data/20210614-mismatch_ep_db-extended.pickle'
df = pd.read_pickle(path)[['TransplantID', 'Epitope_Mismatch', 'DESA']]

In [155]:
ind = df['DESA'].apply(bool)
df['rel_ep_hla'] = df[ind]['Epitope_Mismatch'].apply(lambda x:{ep:hla for ep, hla in x.items() if ep in RELEVANT_DESA_BAD})
df['irel_ep_hla'] = df[ind]['Epitope_Mismatch'].apply(lambda x:{ep:hla for ep, hla in x.items() if ep not in RELEVANT_DESA_BAD})

In [156]:
df_desa = df[ind]
df_desa

Unnamed: 0,TransplantID,Epitope_Mismatch,DESA,rel_ep_hla,irel_ep_hla
5,327,"{'75IL': 'DQA1*02:01', '175E': 'DQA1*02:01', '...","{62GRN, 74Y, 44RMA, 97V, 94I, 71SA, 62GE}",{},"{'75IL': 'DQA1*02:01', '175E': 'DQA1*02:01', '..."
6,369,"{'85VV': 'DRB1*11:04', '86V': 'DRB1*12:01', '8...","{38L, 85A, 37L, 57V[DR], 26L[DR]}",{},"{'85VV': 'DRB1*11:04', '86V': 'DRB1*12:01', '8..."
7,5580,"{'65QIA': 'B*07:02', '177DK': 'B*07:02', '113H...","{45EV, 55PP, 55PPD}",{'45EV': 'DQB1*03:01'},"{'65QIA': 'B*07:02', '177DK': 'B*07:02', '113H..."
9,72,"{'26Y': 'DRB3*01:01', '37FL': 'DRB3*01:01', '3...","{9F[DQ], 30H[DQ], 67VG, 55RPD, 30D, 86A, 55R, ...",{},"{'26Y': 'DRB3*01:01', '37FL': 'DRB3*01:01', '3..."
12,279,"{'71TTS': 'B*50:01', '76ES': 'B*50:01', '113YN...","{96HK, 11STS, 70Q, 71E, 70DA, 67LQ, 98Q, 25R, ...",{'70DA': 'DRB1*13:02'},"{'71TTS': 'B*50:01', '76ES': 'B*50:01', '113YN..."
...,...,...,...,...,...
4657,954,"{'80K': 'C*02:02', '16S': 'C*02:02', '211T': '...","{211T, 163RW, 80K, 163R, 16S, 77NGT, 76ANT, 90...",{},"{'80K': 'C*02:02', '16S': 'C*02:02', '211T': '..."
4663,961,"{'9D': 'C*06:02', '24S': 'B*37:01', '69RA': 'C...",{30RV},"{'71TD': 'B*37:01', '76ET': 'B*37:01', '76ED':...","{'9D': 'C*06:02', '24S': 'B*37:01', '69RA': 'C..."
4672,973,"{'151ARV': 'B*40:01', '9H': 'B*40:01', '45KE':...","{125SQ, 144KR, 66NM, 97I, 163R, 152HA, 116I, 6...",{},"{'151ARV': 'B*40:01', '9H': 'B*40:01', '45KE':..."
4673,974,"{'13SE': 'DRB3*01:01', '37N': 'DRB1*03:01', '3...","{71TD, 76ED, 76ET, 80TLR, 82LR}","{'71TD': 'B*37:01', '76ET': 'B*37:01', '76ED':...","{'13SE': 'DRB3*01:01', '37N': 'DRB1*03:01', '3..."


In [157]:
# df[ind]['HLA'].apply(lambda x: list(x.keys())).tolist()

In [158]:


df_mfi = load_mfi('/Users/Danial/UMCUtrecht/RawData/MFI.csv')

In [159]:
df_mfi

Unnamed: 0,TransplantID,LocusLong,Locus,Specificity,SerologicalEquivalent,MedianFI,TMeanFI,ManConcl_Immucor
16,1,DQB1,DQ,"{DQA1*05:01, DQB1*02:01}",DQ2,440,532,Negative
17,1,DQB1,DQ,"{DQA1*02:01, DQB1*02:02}",DQ2,430,534,Negative
18,1,DQB1,DQ,"{DQB1*02:02, DQA1*03:02}",DQ2,2799,3173,Positive
19,1,DQB1,DQ,"{DQA1*05:01, DQB1*02:02}",DQ2,349,435,Negative
20,1,DQB1,DQ,"{DQB1*03:01, DQA1*03:02}",DQ7,8568,9105,Positive
...,...,...,...,...,...,...,...,...
209161,999,C,C,{C*06:02},Cw6,2313,2409,Positive
209162,999,C,C,{C*07:01},Cw7,209,223,Negative
209163,999,C,C,{C*07:02},Cw7,156,169,Negative
209164,999,C,C,{C*08:01},Cw8,135,140,Negative


In [161]:
def make_ep_hla_mfi_tuple(tx_id, ep_hla_dict):
    _list = []
    for ep, hla in ep_hla_dict.items():
        try:
            mfi_val = mfi[mfi['Specificity'].apply(lambda x: hla in x)]['TMeanFI'].values[0]
            _list.append((ep, hla, mfi_val))
        except:
            print(tx_id, ep, hla)
        
    return _list
    #         (
    #             ep, hla, mfi[mfi['Specificity'].apply(lambda x: hla in x)]['TMeanFI'].values[0],
    #         ) 
    #         for ep, hla in ep_hla_dict.items()
    #     ]



from collections import  defaultdict

results = defaultdict(list)

for _, row in df_desa[['TransplantID', 'rel_ep_hla', 'irel_ep_hla']].iterrows():

    tx_id, rel_hla, irel_hla = row
    # print(tx_id, rel_hla, irel_hla)
    results['TransplantID'].append(tx_id)
    mfi = df_mfi[df_mfi.TransplantID.eq(tx_id)]

    rel_mfi = make_ep_hla_mfi_tuple(tx_id, rel_hla) if rel_hla else ()
    results['rel_ep_hla_mfi'].append(rel_mfi)
       
    # irel_mfi = make_ep_hla_mfi_tuple(tx_id, irel_hla) if irel_hla else ()
    # results['irel_ep_hla_mfi'].append(irel_mfi)
    
    # break

pd.DataFrame(results)

279 70DA DRB1*13:02
3150 71TD B*37:01
3150 76ET B*44:02
3150 76ED B*37:01
3150 80TLR B*44:02
1274 70DA DRB1*13:02
5752 45EV DQB1*03:04
4606 67F DRB5*01:02
5798 45EV DQB1*03:19
1840 76ET B*13:02
1840 144QL B*13:02
1840 80TLR B*13:02
5285 76ET B*44:02
5285 80TLR B*44:02
4189 158T B*38:01
537 158T B*39:06
5197 76ET B*44:02
5197 80TLR B*44:02
4165 158T B*39:06
3800 158T B*39:06
293 158T B*39:10
1089 76ET B*38:02
1089 80TLR B*38:02
1121 45EV DQB1*03:01
1141 70DA DRB1*04:02
1214 76ET B*27:05
1214 76ED B*27:05
1214 80TLR B*27:05
130 158T B*39:06
1559 158T B*39:06
1657 70DA DRB1*13:02
1818 70DA DRB1*13:01
2142 76ET B*44:02
2142 80TLR B*44:02
241 76ET B*44:03
241 80TLR B*44:03
2463 70DA DRB1*13:01
2645 45EV DQB1*03:01
2682 158T B*39:06
2702 71TD B*37:01
2702 76ET B*37:01
2702 76ED B*37:01
2702 80TLR B*37:01
2926 70DA DRB1*13:02
2929 158T B*39:06
3040 76ET A*30:02
31 71TD B*37:01
31 76ET B*37:01
31 76ED B*37:01
31 80TLR B*37:01
3268 76ET B*13:02
3268 144QL B*13:02
3268 80TLR B*13:02
329 67F DRB1

Unnamed: 0,TransplantID,rel_ep_hla_mfi
0,327,()
1,369,()
2,5580,"[(45EV, DQB1*03:01, 4004)]"
3,72,()
4,279,[]
...,...,...
434,954,()
435,961,[]
436,973,()
437,974,"[(71TD, B*37:01, 1390), (76ET, B*37:01, 1390),..."


In [148]:
mfi[mfi.TransplantID.eq(369)]['Specificity'].apply(lambda x: 'B*35:02' in x).sum()

0