In [12]:
import pandas as pd
import re

In [13]:
# Constants
labelled_column_name = 'Labelled'

### Comparative binding to wild type vs mutant SARS-CoV-2 NP antigens by different epitope groups

In [14]:
antigens_by_epitope_groups = pd.read_csv("antigens_by_epitope_groups.csv", sep='\t')
antigens_by_epitope_groups.dropna(inplace=True)
antigens_by_epitope_groups = antigens_by_epitope_groups.reset_index()
antigens_by_epitope_groups

Unnamed: 0,index,blank,group
0,1,NP1501,IA
1,2,NP1507,IA
2,3,NP1514,IA
3,4,NP1516,IA
4,5,NP1517,IA
5,7,NP1526,IB
6,8,X190,IB
7,9,X200,IB
8,10,X201,IB
9,11,NP1512,IB/II


In [15]:
antigen_names = antigens_by_epitope_groups['blank'].tolist()

### Cross inhibition raw data

In [16]:
ci_raw_data = pd.read_csv("cross_inhibitor_raw_data.csv", sep='\t')
ci_raw_data.dropna(inplace=True)
ci_raw_data_filtered = ci_raw_data.loc[ci_raw_data[labelled_column_name].isin(antigen_names)]
ci_raw_data_filtered.reset_index(level=0, drop=True, inplace=True)
ci_raw_data_filtered = ci_raw_data_filtered.drop('Neat', axis=1)

In [17]:
ci_raw_data_filtered

Unnamed: 0,Labelled,NP1501*,NP1502*,NP1503*,NP1508*,NP1510*,NP1514*,NP1516*,NP1517*,NP1518*,...,X202*,X211*,X213*,X215*,X217*,X220*,X221*,X223*,X233*,X271*
0,NP1501,0.449,0.715,1.0664,1.0248,1.136,0.26,0.4172,0.268,0.8512,...,0.5316,1.2267,1.2519,0.64,1.0224,1.189,0.8856,1.4616,0.769,1.228
1,NP1502,0.893,0.425,0.336,0.196,0.349,0.3625,0.665,0.45,1.0,...,0.6492,1.2087,1.2753,0.55,1.0593,1.203,0.2979,1.4184,0.809,0.407
2,NP1503,0.768,0.309,0.068,0.052,0.095,0.305,0.7126,0.408,0.9888,...,0.642,1.197,1.2582,0.6075,1.0791,1.215,0.1035,1.4704,0.93,0.109
3,NP1507,0.422,0.856,0.7216,0.6088,0.785,0.1825,0.4256,0.154,1.0352,...,0.6432,1.2213,1.2672,0.5125,1.0215,1.244,0.6624,1.4168,1.018,0.843
4,NP1508,0.732,0.388,0.1456,0.0848,0.19,0.345,0.8456,0.411,1.1344,...,0.6492,1.2474,1.2699,0.5575,1.0089,1.151,0.207,1.376,1.052,0.233
5,NP1510,0.781,0.382,0.2152,0.1056,0.233,0.495,0.7826,0.527,1.0,...,0.6744,1.1547,1.1934,0.575,0.9513,1.117,0.2331,1.42,0.942,0.22
6,NP1512,0.79,0.789,0.876,0.7504,0.979,0.735,1.015,0.737,0.816,...,0.6012,1.1826,1.0251,0.7425,0.8532,1.123,0.6534,1.2544,1.018,0.627
7,NP1514,0.448,0.822,1.0968,1.0088,1.189,0.2475,0.4858,0.253,1.0208,...,0.6816,1.2384,1.224,0.8075,1.0413,1.332,0.8712,1.4616,0.772,0.964
8,NP1516,0.385,1.034,0.9832,0.9304,1.053,0.1775,0.3052,0.152,0.9504,...,0.5916,1.2078,1.1844,0.5925,1.0116,1.261,0.792,1.4944,0.878,0.974
9,NP1517,0.425,0.644,0.7952,0.7304,0.885,0.155,0.2758,0.079,1.1504,...,0.5016,1.1646,1.1601,0.5075,1.0179,1.245,0.7209,1.4376,0.882,1.011


### Cross inhibition sorted by homology

In [18]:
labelled_column_location = ci_raw_data_filtered.columns.get_loc(labelled_column_name)

In [19]:
# Get groups and insert
groups = ci_raw_data_filtered.apply(
    lambda row: antigens_by_epitope_groups.loc[antigens_by_epitope_groups['blank'] == row[labelled_column_name], 'group'].iloc[0], axis=1
)
ci_raw_data_filtered.insert(labelled_column_location+1, 'group', groups)

# Get sorting order and insert
sorting_order = ci_raw_data_filtered.apply(
    lambda row: antigens_by_epitope_groups.loc[antigens_by_epitope_groups['blank'] == row[labelled_column_name], 'index'].iloc[0], axis=1
)
ci_raw_data_filtered.insert(
    len(ci_raw_data_filtered.columns), 'sorting_order', sorting_order)


In [20]:
# Group by group and sort rows by sorting_order
ci_raw_data_filtered.groupby("group")
ci_raw_data_filtered = ci_raw_data_filtered.set_index('group', append=True)
ci_raw_data_filtered = ci_raw_data_filtered.swaplevel(0, 1)

ci_raw_data_filtered = ci_raw_data_filtered.reset_index(level=1, drop=True)
ci_raw_data_filtered = ci_raw_data_filtered.set_index('Labelled', append=True)

ci_raw_data_filtered = ci_raw_data_filtered.sort_values(
    by='sorting_order', key=lambda x: x.astype(int))


In [21]:
def ci_column_compare(x):
    index = antigens_by_epitope_groups.loc[
                antigens_by_epitope_groups['blank'].str.contains(x[:-1]), 'index']
    return index.values[0] if len(index)>0 else float('inf')

cols = ci_raw_data_filtered.columns.tolist()
cols.sort(key=ci_column_compare)

ci_raw_data_filtered = ci_raw_data_filtered[cols]

In [22]:
ci_raw_data_filtered

Unnamed: 0_level_0,Unnamed: 1_level_0,NP1501*,NP1514*,NP1516*,NP1517*,X200*,X201*,NP1521*,NP1502*,NP1503*,NP1508*,...,X217*,X223*,X233*,NP1524*,NP3715*,NP3706*,X211*,X215*,X220*,sorting_order
group,Labelled,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
IA,NP1501,0.449,0.26,0.4172,0.268,0.8073,1.055,0.596,0.715,1.0664,1.0248,...,1.0224,1.4616,0.769,1.1392,1.0766,0.9114,1.2267,0.64,1.189,1
IA,NP1507,0.422,0.1825,0.4256,0.154,0.4491,0.963,0.583,0.856,0.7216,0.6088,...,1.0215,1.4168,1.018,1.1104,1.0514,0.8862,1.2213,0.5125,1.244,2
IA,NP1514,0.448,0.2475,0.4858,0.253,0.891,1.038,0.626,0.822,1.0968,1.0088,...,1.0413,1.4616,0.772,1.2512,1.1725,0.84,1.2384,0.8075,1.332,3
IA,NP1516,0.385,0.1775,0.3052,0.152,0.6606,1.063,0.455,1.034,0.9832,0.9304,...,1.0116,1.4944,0.878,1.0936,1.0682,0.7833,1.2078,0.5925,1.261,4
IA,NP1517,0.425,0.155,0.2758,0.079,0.5103,1.121,0.414,0.644,0.7952,0.7304,...,1.0179,1.4376,0.882,1.0624,1.0563,0.8673,1.1646,0.5075,1.245,5
IB,NP1526,0.492,0.22,0.4074,0.21,0.2556,0.206,0.453,0.465,0.3544,0.2528,...,1.0755,1.3896,0.833,1.0688,1.0276,0.9786,1.1907,0.65,1.267,7
IB,X190,0.442,0.3275,0.4452,0.239,0.0846,0.168,0.349,0.386,0.0576,0.0448,...,0.9054,1.3192,0.895,0.9232,0.8638,0.7581,1.2681,0.535,1.186,8
IB,X200,0.509,0.39,0.4466,0.437,0.1746,0.401,0.345,0.397,0.308,0.2192,...,1.0557,1.4328,1.029,0.9832,1.0045,0.777,1.1106,0.475,1.232,9
IB,X201,0.443,0.3875,0.4802,0.426,0.1917,0.309,0.389,0.421,0.3024,0.24,...,0.9891,1.4024,0.922,0.8632,0.8008,1.0395,1.0107,0.4725,1.178,10
IB/II,NP1512,0.79,0.735,1.015,0.737,0.5643,0.439,0.638,0.789,0.876,0.7504,...,0.8532,1.2544,1.018,1.1096,0.9975,0.7623,1.1826,0.7425,1.123,11
