In [43]:
import pandas as pd
from lifelines import CoxPHFitter
from src.helper_methods import *
from src.pipe_store import *
from src.constants import *
from lifelines import CoxPHFitter

%matplotlib inline 
%config InlineBackend.figure_format = 'retina'
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [44]:
RELEVANT_DESA_BAD = {
    '71TD', '144QL', '70DRA', '80TLR', '70DA', '67F', '76ED', '76ET', '158T', '45EV'
}

# Data Loading

### Total Cohort

In [45]:
path_cohort = '~/UMCUtrecht/KaplanMeier/DESAsurvival.pickle'
path_antibody = '~/UMCUtrecht/20211104_mismatch_ep_db.pickle'
df_abs = pd.read_pickle(path_antibody)
df_cohort = pd.read_pickle(path_cohort)
df = df_abs.merge(df_cohort, on='TransplantID')
cols = [
    'TransplantID', 'DESA_Status', 'EpvsHLA_Donor', '#DESA', 'DESA', 'RecipientAge_NOTR', 
    'DonorAge_NOTR', 'TypeOfDonor_NOTR', 'FailureCode10Y_R', 'GraftSurvival10Y_R', 'IL2rMoAb',
    'CIPHour_DBD', 'CIPHour_DCD' , 'LivingDonorNum', # 'FailureCode10y'
]
df = df[cols]
df.columns

Index(['TransplantID', 'DESA_Status', 'EpvsHLA_Donor', '#DESA', 'DESA',
       'RecipientAge_NOTR', 'DonorAge_NOTR', 'TypeOfDonor_NOTR',
       'FailureCode10Y_R', 'GraftSurvival10Y_R', 'IL2rMoAb', 'CIPHour_DBD',
       'CIPHour_DCD', 'LivingDonorNum'],
      dtype='object')

### All the DESA found

In [46]:
list_set_desa = df[(df.DESA_Status.eq('DESA'))].DESA.values
desas = {ep for desa_set in list_set_desa for ep in desa_set}
print(desas)

{'152RA', '96H', '143S', '245AS', '52PQ', '116L', '97T', '13FE', '96EV', '70DRA', '84QL', '48Q', '95V', '163LW', '116D', '70QQ', '77T[DQ]', '25Q', '9F[DQ]', '30G[B]', '170RH', '74EL', '67F', '211T', '80I', '73ID', '74R', '80TLR', '70DR', '66NM', '96QN', '144TKH', '70GT', '62LQ', '71TD', '32L', '52LL', '37YV[DR]', '31FY', '166DG', '76V', '73TVS', '77D', '149H', '97I', '114H', '129QS', '80TL', '163RW', '87F', '4R', '145RT', '66IF', '26Y', '73GQ', '142M', '140TV', '97M', '70RE', '40GR', '152HA', '62EE', '193PL', '75I', '75S', '144KR', '131S', '30C', '14W', '31F', '16Y', '57A', '71KA', '70QT', '248M', '80N', '71TTS', '26F', '77SRN', '67VT', '70IAQ', '107W', '76ED', '45EV', '30D', '13SE', '62RR', '62GK', '193AV', '138K', '207S', '70Q', '185I', '44RMA', '150AAH', '163R', '151AHE', '44RT', '74S', '38L', '37N', '98KS', '65RK', '156DA', '138MI', '62GRN', '180E', '47KHL', '70R', '65QIA', '32Y', '26L[DQ]', '45EE', '97W', '80K', '44RME', '40E', '66IS', '57V[DR]', '94I', '151H', '66IY', '70DA', '12

### Total DESA

In [47]:
len(desas)

313

### NUmber of Tx with DESA

In [48]:
len(df[(df.DESA_Status.eq('DESA'))])

439

### NUmber of Tx with clinically relevant DESA

In [49]:
print('Total:', df['DESA'].apply(lambda x: bool(x & RELEVANT_DESA_BAD)).sum())
print('Deceased', df[df.TypeOfDonor_NOTR == 'Deceased']['DESA'].apply(lambda x: bool(x & RELEVANT_DESA_BAD)).sum())
print('Living', df[df.TypeOfDonor_NOTR == 'Living']['DESA'].apply(lambda x: bool(x & RELEVANT_DESA_BAD)).sum())

Total: 76
Deceased 55
Living 21


### Number of DESA per group [total 313]

In [50]:
from collections import defaultdict
from src.constants import Epitope_DB

cls_vs_desa = defaultdict(set)
for ep in desas: 
    for group, epitopes in Epitope_DB.items():
        if ep in epitopes: 
            cls_vs_desa[group].add(ep)

{cls: len(_set) for cls, _set in cls_vs_desa.items()}

{'ABC': 161, 'DR': 95, 'DQ': 57}

### Relevant DESA per group

In [25]:
cls_vs_desa = defaultdict(set)
for ep in RELEVANT_DESA_BAD: 
    for group, epitopes in Epitope_DB.items():
        if ep in epitopes: 
            cls_vs_desa[group].add(ep)

In [28]:
dict(cls_vs_desa)

{'ABC': {'144QL', '158T', '71TD', '76ED', '76ET', '80TLR'},
 'DR': {'67F', '70DA', '70DRA'},
 'DQ': {'45EV'}}

In [None]:
{
    'ABC': {'144QL', '158T', '71TD', '76ED', '76ET', '80TLR'},
    'DR': {'67F', '70DA', '70DRA'},
    'DQ': {'45EV'}
}

### number of transplants with clinically relevant DESA:

In [36]:
df[df.DESA_Status.eq('DESA')]['DESA'].apply(lambda x: len(x & RELEVANT_DESA_BAD)).value_counts()

0    363
1     53
2      9
3      9
4      4
5      1
Name: DESA, dtype: int64

### number of transplants directed to HLA

In [40]:
get_rel_hla = lambda x: {hla.split('*')[0] for ep, hla in x.items() if ep in RELEVANT_DESA_BAD}
df[(df.DESA_Status == 'DESA')]['EpvsHLA_Donor'].apply(get_rel_hla).value_counts()

{}              363
{B}              36
{DQB1}           28
{DRB1}            6
{DRB5}            3
{DRB1, DRB5}      1
{A}               1
{DQB1, B}         1
Name: EpvsHLA_Donor, dtype: int64

### Tineke Follow-up question: I also need detailed information about all DESAs (loci, number per patient).

In [41]:
get_hla = lambda x: {hla.split('*')[0] for ep, hla in x.items()}
hla_vs_desa = df[df.DESA_Status == 'DESA']['EpvsHLA_Donor'].apply(get_hla).value_counts()
hla_vs_desa

{A}                         83
{B}                         80
{DQB1}                      57
{DRB1}                      36
{A, B}                      20
{C}                         17
{DRB3}                      15
{DRB3, DRB1}                11
{DRB4}                       9
{C, B}                       8
{DQB1, DRB1}                 7
{A, DRB1}                    6
{C, A}                       6
{DRB1, B}                    6
{DRB5}                       5
{DRB4, DRB1}                 5
{DRB1, DRB5}                 5
{DQA1, DQB1}                 5
{A, DQB1}                    4
{A, DRB1, B}                 4
{DRB4, DQB1}                 3
{C, DQB1}                    3
{DQB1, DRB3, DRB1}           2
{DQB1, DRB3}                 2
{DQA1, DQB1, DRB1}           2
{A, C, B}                    2
{A, DQB1, DQA1}              2
{A, DRB3, DRB1}              2
{A, DQB1, DRB1}              2
{DQB1, DRB1, DRB5}           1
{A, DRB4}                    1
{DRB4, DQB1, B}              1
{A, DQB1

### Number of DESA per transplant

In [64]:
df[df['#DESA']!=0]['#DESA'].value_counts().sort_index(ascending=True)

1     82
2     63
3     51
4     44
5     40
6     27
7     27
8     17
9      9
10    10
11    13
12     6
13     7
14     8
15     6
16     6
17     2
18     2
19     1
20     2
21     3
22     2
23     2
24     2
25     3
32     3
34     1
Name: #DESA, dtype: int64

### Number of Relevant DESA per transplant

In [61]:
df[df['#DESA']!=0]['DESA'].apply(lambda x: len(x & RELEVANT_DESA_BAD)).value_counts().sort_index(ascending=True)

0    363
1     53
2      9
3      9
4      4
5      1
Name: DESA, dtype: int64