In [1]:
import pandas as pd
pd.options.mode.chained_assignment = None

In [2]:
preclinical_root = '/Users/brendan/Github/moalmanac-paper/analyses/preclinical'

handle = f'{preclinical_root}/formatted/sanger.gdsc.txt'
gdsc =  pd.read_csv(handle, sep='\t')

handle = f'{preclinical_root}/formatted/ccle.variants.txt'
variants = pd.read_csv(handle, sep='\t')

handle = f'{preclinical_root}/formatted/ccle.copy-numbers.txt'
cna = pd.read_csv(handle, sep='\t')

handle = f'{preclinical_root}/formatted/sanger.fusions.txt'
fusion = pd.read_csv(handle, sep='\t')

handle = f'{preclinical_root}/formatted/cell-lines.summary.txt'
summary = pd.read_csv(handle, sep='\t')

In [3]:
df = gdsc[gdsc['therapy_name'].eq('Pictilisib')]
df.reset_index(drop=True, inplace=True)

sanger_to_ccle = (
    summary
    .loc[:, ['sanger', 'ccle_name']]
    .set_index('sanger')
    .to_dict()['ccle_name']
)

sanger_to_broad = (
    summary
    .loc[:, ['sanger', 'broad']]
    .set_index('sanger')
    .to_dict()['broad']
)

df.loc[df.index, 'broad'] = df.loc[df.index, 'model_id'].map(sanger_to_broad)
df.loc[df.index, 'ccle'] = df.loc[df.index, 'model_id'].map(sanger_to_ccle)

df.head()

Unnamed: 0,DATASET,model_id,therapy_name,ln_ic50,AUC,z_score,broad,ccle
0,GDSC2,SIDM00412,Pictilisib,-3.286888,0.291951,-3.766375,ACH-001709,WSUNHL_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE
1,GDSC2,SIDM00407,Pictilisib,-3.169828,0.301248,-3.672634,ACH-000611,SUDHL6_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE
2,GDSC2,SIDM01148,Pictilisib,-2.089101,0.440165,-2.807196,ACH-000050,NCIH929_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE
3,GDSC2,SIDM01057,Pictilisib,-1.772624,0.480963,-2.553763,ACH-000081,GDM1_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE
4,GDSC2,SIDM00406,Pictilisib,-1.758086,0.480973,-2.542122,ACH-000660,SUDHL5_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE


In [4]:
pik3ca_mut_variants = (variants
                       .loc[variants['feature'].eq('PIK3CA'), 'model_id']
                       .reset_index(drop=True)
                       .drop_duplicates()
                       .tolist()
                      )

pik3ca_mut_variants_missense = (variants
                                .loc[variants['feature'].eq('PIK3CA') & variants['alteration_type'].eq('Missense'), 'model_id']
                                .reset_index(drop=True)
                                .drop_duplicates()
                                .tolist()
                               )

pik3ca_mut_variants_H1047R = (variants
                                .loc[variants['feature'].eq('PIK3CA') & variants['alteration'].eq('p.H1047R'), 'model_id']
                                .reset_index(drop=True)
                                .drop_duplicates()
                                .tolist()
                               )

pik3ca_mut_cna = (cna
                 .loc[cna['feature'].eq('PIK3CA'), 'model_id']
                 .reset_index(drop=True)
                 .drop_duplicates()
                 .tolist()
                 )

pik3ca_mut_fusions = (fusion
                      .loc[(fusion['feature'].eq('PIK3CA') | fusion['partner'].eq('PIK3CA')), 'model_id']
                      .reset_index(drop=True)
                      .drop_duplicates()
                      .tolist()
)


idx = df['ccle'].isin(pik3ca_mut_variants) | df['ccle'].isin(pik3ca_mut_cna) | df['model_id'].isin(pik3ca_mut_fusions)




df.loc[idx, 'PIK3CA'] = 1
df.loc[df['ccle'].isin(pik3ca_mut_variants), 'PIK3CA somatic variant'] = 1
df.loc[df['ccle'].isin(pik3ca_mut_variants_missense), 'PIK3CA missense'] = 1
df.loc[df['ccle'].isin(pik3ca_mut_variants_H1047R), 'PIK3CA p.H1047R'] = 1

df.fillna(0, inplace=True)

In [5]:
for column in df.columns[df.columns.str.contains('PIK3CA')]:
    print(column)
    print(df[column].value_counts())
    print('')

PIK3CA
0.0    604
1.0    162
Name: PIK3CA, dtype: int64

PIK3CA somatic variant
0.0    663
1.0    103
Name: PIK3CA somatic variant, dtype: int64

PIK3CA missense
0.0    668
1.0     98
Name: PIK3CA missense, dtype: int64

PIK3CA p.H1047R
0.0    745
1.0     21
Name: PIK3CA p.H1047R, dtype: int64



In [6]:
df.to_excel('supplementary-figure-6.xlsx', sheet_name='SF6', index=False)