In [1]:
import glob as glob
import pandas as pd

redact_germline = True

# Process retrospective cohorts
Both the illustrate notebook and calculations notebook will require the outputs from both MOAlmanac and PHIAL to be processed. In particular, we clean up the outputs and annotate the two so that we know how the other software annotated the same molecular feature.

In [2]:
def load_handles_simple(handles):
    list_ = []
    for handle in handles:
        patient_id = handle.split('/')[-1].split('_complete_muts_indels_scna_detailed.txt')[0]
        df = pd.read_csv(handle, sep='\t')
        df['patient_id'] = patient_id
        list_.append(df)
    return pd.concat(list_, ignore_index=True)
    
def load_handles(handles):
    list_ = []
    for handle in handles:
        list_.append(pd.read_csv(handle, sep='\t'))
    df = pd.concat(list_, ignore_index=True)
    return df

robinson_phial_handles = glob.glob('2015-Robinson/data/phial/*.txt')
vanallen_phial_handles = glob.glob('2015-VanAllen/data/phial/*.txt')

robinson_handles = glob.glob('2015-Robinson/data/almanac/*/*.actionable.txt')
vanallen_handles = glob.glob('2015-VanAllen/data/almanac/*/*.actionable.txt')

robinson_phial = load_handles_simple(robinson_phial_handles)
vanallen_phial = load_handles_simple(vanallen_phial_handles)

robinson_almanac = load_handles(robinson_handles)
vanallen_almanac = load_handles(vanallen_handles)

robinson_almanac['cohort'] = 'SU2C'
robinson_phial['cohort'] = 'SU2C'
vanallen_almanac['cohort'] = 'Melanoma'
vanallen_phial['cohort'] = 'Melanoma'

phial = pd.concat([robinson_phial, vanallen_phial], ignore_index=True)
almanac = pd.concat([robinson_almanac, vanallen_almanac], ignore_index=True)
if redact_germline:
    almanac = almanac[~almanac['feature_type'].eq('Germline Variant')]

phial = phial[~phial['Score_bin'].eq('Filtered Calls')]
phial = phial[~phial['Variant_Classification'].eq('lincRNA')]

target = pd.read_excel('../knowledge-bases/target/almanac-comparison.xlsx')
phial = phial[phial['Gene'].isin(target['Gene'])]

In [3]:
def annotate_feature_string_phial(dataframe):
    dataframe['Alteration'] = dataframe['Alteration'].replace({'Deleted': 'Deletion', 'Amplified': 'Amplification'})
    dataframe['feature_string'] = ''
    dataframe['feature_string'] = (dataframe['Gene'].fillna('') + '.' + dataframe['Alteration'].fillna(''))
    idx_class = dataframe['Variant_Classification'].isin(['Splice_Site', 'Start_Codon_Del'])
    dataframe.loc[idx_class, 'feature_string'] += dataframe.loc[idx_class, 'Variant_Classification']
    return dataframe

def annotate_feature_string_almanac(dataframe):
    dataframe['feature_string'] = ''
    idx_mutation = dataframe[dataframe['feature_type'] == 'Somatic Variant'].index
    idx_copynumber = dataframe[dataframe['feature_type'] == 'Copy Number'].index
    idx_fusion = almanac[almanac['feature_type'].eq('Rearrangement')].index
    idx_germline = almanac[almanac['feature_type'].eq('Germline Variant')].index
    idx_wgd = almanac[almanac['feature_type'].eq('Aneuploidy')].index
    idx_tmb = almanac[almanac['feature_type'].eq('Mutational Burden')].index
    idx_signature = almanac[almanac['feature_type'].eq('Mutational Signature')].index
    
    dataframe.loc[idx_mutation, 'feature_string'] = (dataframe.loc[idx_mutation, 'feature'].fillna('') + '.' + 
                                                     dataframe.loc[idx_mutation, 'alteration'].fillna(''))
    dataframe.loc[idx_copynumber, 'feature_string'] = (dataframe.loc[idx_copynumber, 'feature'].fillna('') + '.' + 
                                                       dataframe.loc[idx_copynumber, 'alteration_type'].fillna(''))
    dataframe.loc[idx_fusion, 'feature_string'] = dataframe.loc[idx_fusion, 'alteration']
    dataframe.loc[idx_germline, 'feature_string'] = dataframe.loc[idx_germline, 'feature'].fillna('') + '.' + dataframe.loc[idx_germline, 'alteration'].fillna('') + '.Germline'
    dataframe.loc[idx_wgd, 'feature_string'] = dataframe.loc[idx_wgd, 'feature'].fillna('')
    dataframe.loc[idx_tmb, 'feature_string'] = dataframe.loc[idx_tmb, 'feature'].fillna('')
    dataframe.loc[idx_signature, 'feature_string'] = dataframe.loc[idx_signature, 'feature'].fillna('')
    
    idx_class = dataframe['alteration_type'].isin(['Splice Site', 'Start_Codon_Del'])
    dataframe.loc[idx_class, 'feature_string'] += dataframe.loc[idx_class, 'alteration_type'].str.replace(' ', '_')
    return dataframe

phial = annotate_feature_string_phial(phial)
almanac = annotate_feature_string_almanac(almanac)

phial['patient_feature_string'] = phial['patient_id'].fillna('') + '.' + phial['feature_string'].fillna('')
almanac['patient_feature_string'] = almanac['patient_id'].fillna('') + '.' + almanac['feature_string'].fillna('')

phial = phial[~phial['feature_string'].eq('')]
almanac = almanac[~almanac['feature_string'].eq('')]

In [4]:
# This feature will be phased out
idx_CN = almanac[almanac['feature_type'].eq('Copy Number')].index
idx_IAH = almanac[almanac['score_bin'].eq('Investigate Actionability - High')].index
idx_IAH_s = almanac[almanac['sensitive_score_bin'].eq('Investigate Actionability - High')].index
idx_IAH_r = almanac[almanac['resistance_score_bin'].eq('Investigate Actionability - High')].index
idx_IAH_p = almanac[almanac['prognostic_score_bin'].eq('Investigate Actionability - High')].index

for column in ['score_bin', 'sensitive_score_bin', 'resistance_score_bin', 'prognostic_score_bin']:
    idx = almanac[almanac[column].eq('Investigate Actionability - High')].index
    almanac.loc[(idx_CN & idx), column] = 'Putatively Actionable'
    
# Removing double counted fusions. 
# This is a feature, not a bug, for reviewing individual fusions; however,
# it results in double counting for aggregated reporting. 
almanac.drop_duplicates(['patient_feature_string'], keep='first', inplace=True)

In [5]:
almanac['feature_type'].value_counts()

Somatic Variant         1132
Copy Number              751
Mutational Signature     371
Rearrangement            232
Aneuploidy               137
Mutational Burden         47
Name: feature_type, dtype: int64

In [6]:
# The bin "Investigate Biological Significance" is used for CNAs that score within
# a TARGET gene but the directionality does not match. Since there are only 95 of them 
# I am going to label them as "Investigate Actionability". There is not an
# exact map to Almanac bins because PHIAL/TARGET doesn't check if this has been catalogued
# as a CNA, just the direction / alteration_type. Specifically, PHIAL will ask,
# "does TARGET have 'Amplification' catalogued for this gene?"
# Whereas MOAlmanac asks, "Okay, is this a catalogued gene? Is this a catalogued CNA? 
# Is this a catalogued Amplification?"

phial['Score_bin'] = phial['Score_bin'].replace({'Investigate Biological Significance': 'Investigate Actionability'})

# High priority is given to somatic variants whose gene appears in PHIAL but does not match any further
# These are most similar to the Biologically Relevant category
phial['Score_bin'] = phial['Score_bin'].replace({'High Priority': 'Biologically Relevant'})

almanac['score_bin'] = (almanac['score_bin']
 .replace('Investigate Actionability - High', 'Investigate Actionability')
 .replace('Investigate Actionability - Low', 'Investigate Actionability'))

In [7]:
all_patients = pd.Index(almanac['patient_id'].drop_duplicates().sort_values())
all_features = (
    pd.Index(almanac['feature_string'].drop_duplicates().sort_values())
    .union(
        pd.Index(phial['feature_string'].drop_duplicates().sort_values())
    ).drop_duplicates().tolist()
)

all_considered = pd.DataFrame(columns=['almanac_bin', 'phial_bin'],
                             index=pd.MultiIndex.from_product([all_patients, all_features]))

almanac_indexed = almanac.loc[:, ['patient_id', 'feature_string', 'score_bin']].drop_duplicates(['patient_id', 'feature_string']).set_index(['patient_id', 'feature_string'])
phial_indexed = phial.loc[:, ['patient_id', 'feature_string', 'Score_bin']].drop_duplicates(['patient_id', 'feature_string']).set_index(['patient_id', 'feature_string'])

all_considered.loc[almanac_indexed.index, 'almanac_bin'] = (
    almanac_indexed.loc[almanac_indexed.index, 'score_bin']
)

all_considered.loc[phial_indexed.index, 'phial_bin'] = (
    phial_indexed.loc[phial_indexed.index, 'Score_bin']
)

In [8]:
all_considered['sum'] = all_considered.fillna(False).astype(bool).sum(axis=1)
df = all_considered[all_considered['sum'].gt(0)]
df = df[~df['almanac_bin'].isnull() | df['phial_bin'].isin(['Actionable', 'High Priority', 'Investigate Actionability', 'Investigate Biological Significance', 'Biologically Relevant'])]
df = df.reset_index().rename(columns={'level_1': 'feature_string'})

df.loc[~df['phial_bin'].isin(['Actionable', 'Investigate Actionability', 'Biologically Relevant']), 'phial_bin'] = pd.NA
df['almanac_bin'] = df['almanac_bin'].fillna(pd.NA)
df['phial_bin'] = df['phial_bin'].str.replace('Actionable', 'Putatively Actionable')

In [9]:
df['cohort'] = ''
idx = df['patient_id'].str.contains('MEL')
df.loc[idx, 'cohort'] = 'MEL'
df.loc[~idx, 'cohort'] = 'SU2C'
df.head()

Unnamed: 0,patient_id,feature_string,almanac_bin,phial_bin,sum,cohort
0,MEL-IPI_Pat02,BLM.Amplification,Biologically Relevant,,1,MEL
1,MEL-IPI_Pat02,BRAF.p.V600E,Putatively Actionable,Putatively Actionable,2,MEL
2,MEL-IPI_Pat02,COL1A1--CITED4,Investigate Actionability,,1,MEL
3,MEL-IPI_Pat02,COSMIC Signature 7,Biologically Relevant,,1,MEL
4,MEL-IPI_Pat02,CTNNB1.Amplification,Biologically Relevant,,1,MEL


In [10]:
df.tail()

Unnamed: 0,patient_id,feature_string,almanac_bin,phial_bin,sum,cohort
3065,TP_2064,COSMIC Signature 15,Biologically Relevant,,1,SU2C
3066,TP_2064,COSMIC Signature 6,Biologically Relevant,,1,SU2C
3067,TP_2064,MPL.p.A134T,Investigate Actionability,Biologically Relevant,2,SU2C
3068,TP_2064,TP53.p.H179R,Investigate Actionability,Investigate Actionability,2,SU2C
3069,TP_2064,Whole genome doubling,Investigate Actionability,,1,SU2C


In [11]:
df = df.merge(almanac.loc[:, ['sensitive_predictive_implication', 'patient_id', 'feature_string']],
        on=['patient_id', 'feature_string'], how='left')
df = df.merge(almanac.loc[:, ['resistance_predictive_implication', 'patient_id', 'feature_string']],
        on=['patient_id', 'feature_string'], how='left')
df = df.merge(almanac.loc[:, ['prognostic_predictive_implication', 'patient_id', 'feature_string']],
        on=['patient_id', 'feature_string'], how='left')
df = df.merge(almanac.loc[:, ['clinvar', 'patient_id', 'feature_string']],
        on=['patient_id', 'feature_string'], how='left')

In [12]:
df['feature_type'] = ''
df.loc[df['feature_string'].str.contains('Amplification'), 'feature_type'] = 'Somatic copy number'
df.loc[df['feature_string'].str.contains('Deletion'), 'feature_type'] = 'Somatic copy number'
df.loc[df['feature_string'].str.contains('--'), 'feature_type'] = 'Rearrangement'
df.loc[df['feature_string'].str.contains('Germline'), 'feature_type'] = 'Germline'
df.loc[df['feature_string'].eq('Whole genome doubling'), 'feature_type'] = 'Aneuploidy'
df.loc[df['feature_string'].eq('High Mutational Burden'), 'feature_type'] = 'Tumor mutational burden'
df.loc[df['feature_string'].str.contains('COSMIC Signature'), 'feature_type'] = 'Mutational signature'
df.loc[df['feature_type'].eq(''), 'feature_type'] = 'Somatic variant'

df['feature_string'].str.replace('.', ' ').str.replace('p ', 'p.')

0           BLM Amplification
1                BRAF p.V600E
2              COL1A1--CITED4
3          COSMIC Signature 7
4        CTNNB1 Amplification
                ...          
3065      COSMIC Signature 15
3066       COSMIC Signature 6
3067              MPL p.A134T
3068             TP53 p.H179R
3069    Whole genome doubling
Name: feature_string, Length: 3070, dtype: object

In [13]:
df['almanac_bin'].value_counts()

Biologically Relevant        1104
Investigate Actionability    1099
Putatively Actionable         467
Name: almanac_bin, dtype: int64

In [14]:
df['phial_bin'].value_counts()

Biologically Relevant        834
Investigate Actionability    547
Putatively Actionable         73
Name: phial_bin, dtype: int64

In [15]:
idx_cdkn2ab = df['feature_string'].isin(['CDKN2A.Deletion', 'CDKN2B.Deletion'])
df.loc[idx_cdkn2ab, 'feature_string'] = 'CDKN2A/B.Deletion'
df_cdkn2ab = df.loc[idx_cdkn2ab, :].copy(deep=True)
df_other = df.loc[~idx_cdkn2ab, :].copy(deep=True)

In [16]:
df_cdkn2ab.shape

(85, 11)

In [17]:
df_cdkn2ab['phial_bin'].fillna('').value_counts()

Investigate Actionability    68
                             17
Name: phial_bin, dtype: int64

In [18]:
df_cdkn2ab['almanac_bin'].fillna('').value_counts()

Putatively Actionable    83
                          2
Name: almanac_bin, dtype: int64

In [19]:
for label, group in df_cdkn2ab.groupby('patient_id'):
    almanac_values = group['almanac_bin'].dropna().drop_duplicates()
    phial_values = group['phial_bin'].dropna().drop_duplicates()
    df_cdkn2ab.loc[group.index, 'almanac_bin'] = almanac_values.tolist()[0] if almanac_values.tolist() else pd.NA
    df_cdkn2ab.loc[group.index, 'phial_bin'] = phial_values.tolist()[0] if phial_values.tolist() else pd.NA
df_cdkn2ab.drop_duplicates(['patient_id'], keep='first', inplace=True)
df = pd.concat([df_cdkn2ab, df_other], ignore_index=True)

In [20]:
group

Unnamed: 0,patient_id,feature_string,almanac_bin,phial_bin,sum,cohort,sensitive_predictive_implication,resistance_predictive_implication,prognostic_predictive_implication,clinvar,feature_type
2663,SC_9054,CDKN2A/B.Deletion,Putatively Actionable,Investigate Actionability,2,SU2C,Preclinical,,,,Somatic copy number
2664,SC_9054,CDKN2A/B.Deletion,Putatively Actionable,Investigate Actionability,2,SU2C,Preclinical,,,,Somatic copy number


In [21]:
df[df['feature_string'].eq('CDKN2A/B.Deletion')]['almanac_bin'].value_counts()

Putatively Actionable    43
Name: almanac_bin, dtype: int64

In [22]:
df[df['feature_string'].eq('CDKN2A/B.Deletion')]['phial_bin'].value_counts()

Investigate Actionability    35
Name: phial_bin, dtype: int64

In [23]:
df['feature_type'].value_counts()

Somatic variant            1444
Somatic copy number         798
Mutational signature        371
Rearrangement               232
Aneuploidy                  137
Tumor mutational burden      47
Name: feature_type, dtype: int64

In [24]:
df['feature_str_simple'] = ''
idx_direct = df['feature_type'].isin(['Mutational signature', 'Rearrangement'])
idx_wgd = df['feature_type'].eq('Aneuploidy')
idx_tmb = df['feature_type'].eq('Tumor mutational burden')
idx_germline = df['feature_type'].eq('Germline')
idx_cn = df['feature_type'].eq('Somatic copy number')
idx_som = df['feature_type'].eq('Somatic variant')

df.loc[idx_direct, 'feature_str_simple'] = df.loc[idx_direct, 'feature_string']
df.loc[idx_wgd, 'feature_str_simple'] = 'WGD'
df.loc[idx_tmb, 'feature_str_simple'] = 'TMB'
df.loc[idx_germline, 'feature_str_simple'] = (df.loc[idx_germline, 'feature_string']
                                              .str.split('.').str[0] 
                                              .add(' Germline'))
df.loc[idx_som, 'feature_str_simple'] = df.loc[idx_som, 'feature_string'].str.split('.').str[0]
df.loc[idx_cn, 'feature_str_simple'] = (df.loc[idx_cn, 'feature_string']
                                        .str.replace('Amplification', 'Amp')
                                        .str.replace('Deletion', 'Del')
                                        .str.replace('.', ' ')
                                       )


In [25]:
df['almanac_bin'].value_counts()

Biologically Relevant        1104
Investigate Actionability    1099
Putatively Actionable         427
Name: almanac_bin, dtype: int64

In [26]:
df['phial_bin'].value_counts()

Biologically Relevant        834
Investigate Actionability    514
Putatively Actionable         73
Name: phial_bin, dtype: int64

In [27]:
df.to_csv('retrospective.actionability.txt', sep='\t', index=False)

In [28]:
germline = almanac[almanac['feature_type'].eq('Germline Variant') & ~almanac['score_bin'].isin(['Biologically Relevant'])]
germline[germline['alteration_type'].eq('Missense')]['exac_common'].value_counts()

Series([], Name: exac_common, dtype: int64)

## Construct matrix of feature types x samples
0 = Wild type or biologically relevant for feature type  
1 = Molecular Oncology Almanac only Investigate Actionability or Putatively Actionable  
2 = Molecular Oncology Almanac & PHIAL observed Investigate Actionability or Putatively Actionable  

In [29]:
df['value'] = 0

idx_almanac = df['almanac_bin'].fillna('').isin(['Putatively Actionable', 'Investigate Actionability'])
idx_phial = df['phial_bin'].fillna('').isin(['Putatively Actionable', 'Investigate Actionability'])

df['value'] = 0
df.loc[idx_almanac, 'value'] = 1
df.loc[idx_almanac & idx_phial, 'value'] = 2

In [30]:
df['feature_type'] = df['feature_type'].str.replace('Germline', 'Germline variant')

def pivot_features_table(dataframe, cohort):
    subset = dataframe[dataframe['cohort'].eq(cohort)]
    df = (subset
          .sort_values('value', ascending=False)
          .loc[:, ['patient_id', 'feature_type', 'value']]
          .drop_duplicates(['patient_id', 'feature_type'], keep='first')
          .pivot_table(columns='patient_id', index='feature_type', values='value', fill_value=0)
         )
    display_order = ['Somatic variant', 'Somatic copy number', 'Rearrangement', 'Germline variant', 'Aneuploidy', 'Tumor mutational burden', 'Mutational signature']
    df = df.reindex(display_order)
    return df.T.sort_values(by=df.index.tolist(), axis=0, ascending=False).T

vanallen_pivoted = pivot_features_table(df, 'MEL')
robinson_pivoted = pivot_features_table(df, 'SU2C')

In [31]:
vanallen_pivoted.to_csv('retrospective.pivoted.vanallen.txt', sep='\t')
robinson_pivoted.to_csv('retrospective.pivoted.robinson.txt', sep='\t')

## Fraction of samples with therapeutic sensitivity by evidence

In [32]:
def return_max_per_patient(dataframe, samples):
    inverse_map = {
        0.0: 'No sensitive',
        1.0: 'Inferential', 2.0: 'Preclinical', 3.0: 'Clinical evidence', 4.0: 'Clinical trial',
        5.0: 'Guideline', 6.0: 'FDA-Approved'}
    series = pd.Series(inverse_map[0.0], index=samples, name='max_imp')
    grouped = dataframe.groupby(['patient_id'])['sensitive_score']
    for group, value in grouped:
        series.loc[group] = inverse_map[value.max()]
    return series

values_map = {'Inferential': 1.0, 
              'Preclinical': 2.0, 
              'Clinical evidence': 3.0,
              'Clinical trial': 4.0,
              'Guideline': 5.0,
              'FDA-Approved': 6.0}

sensitive = df[df['sensitive_predictive_implication'].notnull()].reset_index(drop=True)
sensitive['sensitive_score'] = sensitive['sensitive_predictive_implication'].replace(values_map)

robinson_samples = df[df['cohort'].eq('SU2C')]['patient_id'].drop_duplicates().tolist()
vanallen_samples = df[df['cohort'].eq('MEL')]['patient_id'].drop_duplicates().tolist()

robinson_max = return_max_per_patient(sensitive[sensitive['cohort'].eq('SU2C')], robinson_samples)
vanallen_max = return_max_per_patient(sensitive[sensitive['cohort'].eq('MEL')], vanallen_samples)

robinson_max_vc = robinson_max.value_counts().reindex(['FDA-Approved', 'Guideline', 
                                                       'Clinical trial', 'Clinical evidence', 
                                                       'Preclinical', 'Inferential']).fillna(0.0)
robinson_max_vc['WT'] = len(robinson_samples) - robinson_max_vc.astype(int).sum()
robinson_max_vc_fract = (round(robinson_max_vc / len(robinson_samples), 2)*100).astype(int).astype(str)

vanallen_max_vc = vanallen_max.value_counts().reindex(['FDA-Approved', 'Guideline', 
                                                       'Clinical trial', 'Clinical evidence', 
                                                       'Preclinical', 'Inferential']).fillna(0.0)
vanallen_max_vc['WT'] = len(vanallen_samples) - vanallen_max_vc.astype(int).sum()
vanallen_max_vc_fract = (round(vanallen_max_vc / len(vanallen_samples), 2)*100).astype(int).astype(str)

vanallen_counts_sens = pd.concat([vanallen_max_vc, vanallen_max_vc_fract], axis=1).reset_index()
vanallen_counts_sens.columns = ['evidence', 'counts', 'fraction']

robinson_counts_sens = pd.concat([robinson_max_vc, robinson_max_vc_fract], axis=1).reset_index()
robinson_counts_sens.columns = ['evidence', 'counts', 'fraction']

vanallen_counts_sens['fraction'] = vanallen_counts_sens['fraction'].astype(int).divide(100)
robinson_counts_sens['fraction'] = robinson_counts_sens['fraction'].astype(int).divide(100)

labels = ['FDA', 'Guideline', 'Clinical trial', 
          'Clinical evidence', 'Preclinical', 'Inferential', 
          'No event associated with therapeutic sensitivity']

vanallen_counts_sens['label'] = labels
robinson_counts_sens['label'] = labels

# Due to rounding, the fraction doesn't quite equal 100, we make a couple of adjustments for the display
vanallen_counts_sens.loc[6, 'fraction'] = 0.04
robinson_counts_sens.loc[6, 'fraction'] = 0.07

vanallen_counts_sens.to_csv('retrospective.sensitivity-by-evidence.vanallen.txt', sep='\t', index=False)
robinson_counts_sens.to_csv('retrospective.sensitivity-by-evidence.robinson.txt', sep='\t', index=False)

## Count number of features called between PHIAL/TARGET and MOAlmanac for MEL and Prostate
Per feature type, we count the number of events per cohort and per methodology

In [33]:
feature_types = ['Somatic variant', 'Somatic copy number', 'Rearrangement', 'Germline variant', 
                 'Tumor mutational burden', 'Mutational signature', 'Aneuploidy']
index_values = ['PHIAL/TARGET', 'Molecular Oncology Almanac']

idx_mel = df['cohort'].eq('MEL')
idx_su2c = df['cohort'].eq('SU2C')
idx_almanac = df['almanac_bin'].fillna('').isin(['Putatively Actionable', 'Investigate Actionability'])
idx_phial = df['phial_bin'].fillna('').isin(['Putatively Actionable', 'Investigate Actionability'])

counts_mel = pd.DataFrame(0, columns = feature_types, index = index_values).T
counts_mel.loc[feature_types, 'PHIAL/TARGET'] = df[idx_mel & idx_phial]['feature_type'].value_counts()
counts_mel.loc[feature_types, 'Molecular Oncology Almanac'] = df[idx_mel & idx_almanac]['feature_type'].value_counts()
counts_mel.fillna(0, inplace=True)
counts_mel = counts_mel.T

counts_su2c = pd.DataFrame(0, columns = feature_types, index = index_values).T
counts_su2c.loc[feature_types, 'PHIAL/TARGET'] = df[idx_su2c & idx_phial]['feature_type'].value_counts()
counts_su2c.loc[feature_types, 'Molecular Oncology Almanac'] = df[idx_su2c & idx_almanac]['feature_type'].value_counts()
counts_su2c.fillna(0, inplace=True)
counts_su2c = counts_su2c.T

counts_mel.to_csv('retrospective.feature-type-counts.vanallen.txt', sep='\t', index_label='method')
counts_su2c.to_csv('retrospective.feature-type-counts.robinson.txt', sep='\t', index_label='method')


In [34]:
counts_mel

Unnamed: 0,Somatic variant,Somatic copy number,Rearrangement,Germline variant,Tumor mutational burden,Mutational signature,Aneuploidy
PHIAL/TARGET,167.0,98.0,0.0,0.0,0.0,0.0,0.0
Molecular Oncology Almanac,584.0,138.0,19.0,0.0,43.0,5.0,62.0


In [35]:
counts_su2c

Unnamed: 0,Somatic variant,Somatic copy number,Rearrangement,Germline variant,Tumor mutational burden,Mutational signature,Aneuploidy
PHIAL/TARGET,108.0,214.0,0.0,0.0,0.0,0.0,0.0
Molecular Oncology Almanac,226.0,227.0,108.0,0.0,4.0,35.0,75.0
