# Calculations for retrospective cohorts
We perform calculations explicitly mentioned in or related to the paper's section titled, "Evaluating expanded molecular profiling and actionability in two retrospective cohorts"

In [1]:
import pandas as pd
import glob as glob

redact_germline = True

## Load MOAlmanac outputs

In [2]:
df = pd.read_csv('retrospective.actionability.txt', sep='\t')
if redact_germline:
    df = df[~df['feature_type'].eq('Germline')]
df.head()

Unnamed: 0,patient_id,feature_string,almanac_bin,phial_bin,sum,cohort,sensitive_predictive_implication,resistance_predictive_implication,prognostic_predictive_implication,clinvar,feature_type,feature_str_simple
0,MEL-IPI_Pat04,CDKN2A/B.Deletion,Putatively Actionable,Investigate Actionability,2,MEL,Preclinical,,,,Somatic copy number,CDKN2A/B Del
1,MEL-IPI_Pat07,CDKN2A/B.Deletion,Putatively Actionable,,1,MEL,Preclinical,,,,Somatic copy number,CDKN2A/B Del
2,MEL-IPI_Pat100,CDKN2A/B.Deletion,Putatively Actionable,Investigate Actionability,2,MEL,Preclinical,,,,Somatic copy number,CDKN2A/B Del
3,MEL-IPI_Pat106,CDKN2A/B.Deletion,Putatively Actionable,Investigate Actionability,2,MEL,Preclinical,,,,Somatic copy number,CDKN2A/B Del
4,MEL-IPI_Pat119,CDKN2A/B.Deletion,Putatively Actionable,Investigate Actionability,2,MEL,Preclinical,,,,Somatic copy number,CDKN2A/B Del


In [3]:
df['almanac_bin'].value_counts()

Biologically Relevant        1104
Investigate Actionability    1099
Putatively Actionable         427
Name: almanac_bin, dtype: int64

In [4]:
df['phial_bin'].value_counts()

Biologically Relevant        834
Investigate Actionability    514
Putatively Actionable         73
Name: phial_bin, dtype: int64

## How many patients had > 0 events

In [5]:
idx_almanac = df['almanac_bin'].fillna('').isin(['Putatively Actionable', 'Investigate Actionability'])
idx_phial = df['phial_bin'].fillna('').isin(['Putatively Actionable', 'Investigate Actionability'])
idx_mel = df['cohort'].eq('MEL')
idx_su2c = df['cohort'].eq('SU2C')

n_almanac_mel = df[idx_almanac & idx_mel]['patient_id'].drop_duplicates().shape[0]
n_almanac_su2c = df[idx_almanac & idx_su2c]['patient_id'].drop_duplicates().shape[0]
n_phial_mel = df[idx_phial & idx_mel]['patient_id'].drop_duplicates().shape[0]
n_phial_su2c = df[idx_phial & idx_su2c]['patient_id'].drop_duplicates().shape[0]

print('Almanac')
print('Mel', n_almanac_mel, n_almanac_mel/110)
print('SU2C', n_almanac_su2c, n_almanac_su2c/150)
print('')
print('PHIAL')
print('Mel', n_phial_mel, n_phial_mel/110)
print('SU2C', n_phial_su2c, n_phial_su2c/150)

Almanac
Mel 109 0.990909090909091
SU2C 148 0.9866666666666667

PHIAL
Mel 101 0.9181818181818182
SU2C 131 0.8733333333333333


## How many somatic variants and copy number alterations were called

In [6]:
idx_v = df['feature_type'].eq('Somatic variant')
idx_cn = df['feature_type'].eq('Somatic copy number')
idx_a = df['almanac_bin'].fillna('').isin(['Putatively Actionable', 'Investigate Actionability'])
idx_b = df['almanac_bin'].eq('Biologically Relevant')

idx = (idx_a) & (idx_v | idx_cn)
tmp = df[idx]
print(tmp.shape)
tmp.head()

(1175, 12)


Unnamed: 0,patient_id,feature_string,almanac_bin,phial_bin,sum,cohort,sensitive_predictive_implication,resistance_predictive_implication,prognostic_predictive_implication,clinvar,feature_type,feature_str_simple
0,MEL-IPI_Pat04,CDKN2A/B.Deletion,Putatively Actionable,Investigate Actionability,2,MEL,Preclinical,,,,Somatic copy number,CDKN2A/B Del
1,MEL-IPI_Pat07,CDKN2A/B.Deletion,Putatively Actionable,,1,MEL,Preclinical,,,,Somatic copy number,CDKN2A/B Del
2,MEL-IPI_Pat100,CDKN2A/B.Deletion,Putatively Actionable,Investigate Actionability,2,MEL,Preclinical,,,,Somatic copy number,CDKN2A/B Del
3,MEL-IPI_Pat106,CDKN2A/B.Deletion,Putatively Actionable,Investigate Actionability,2,MEL,Preclinical,,,,Somatic copy number,CDKN2A/B Del
4,MEL-IPI_Pat119,CDKN2A/B.Deletion,Putatively Actionable,Investigate Actionability,2,MEL,Preclinical,,,,Somatic copy number,CDKN2A/B Del


In [7]:
print(tmp['phial_bin'].value_counts().loc[['Putatively Actionable', 'Investigate Actionability']].sum())
tmp['phial_bin'].value_counts().loc[['Putatively Actionable', 'Investigate Actionability']]

423


Putatively Actionable         73
Investigate Actionability    350
Name: phial_bin, dtype: int64

In [8]:
n_almanac_pa_ia = tmp['almanac_bin'].value_counts().sum()
n_almanac_phial_pa_ia = tmp['phial_bin'].value_counts().loc[['Putatively Actionable', 'Investigate Actionability']].sum()
fraction_almanac_phial_pa_ia = (n_almanac_phial_pa_ia / n_almanac_pa_ia)*100
n_almanac_phial_br = tmp['phial_bin'].value_counts().loc['Biologically Relevant']

In [9]:
f"Of the {n_almanac_pa_ia} somatic variants and copy number alterations noted by MOAlmanac, \
{n_almanac_phial_pa_ia} ({fraction_almanac_phial_pa_ia}%) were highlighted for putative actionability or \
investigate actionability by PHIAL, with an additional {n_almanac_phial_br} noted by PHIAL for biological relevance."

'Of the 1175 somatic variants and copy number alterations noted by MOAlmanac, 423 (36.0%) were highlighted for putative actionability or investigate actionability by PHIAL, with an additional 369 noted by PHIAL for biological relevance.'

## Restricting considered features
Features considered: Somatic variants, copy number alterations  
Evidence considered: FDA-approved, clinical trials, or clinical evidence  
Matches considered: First putatively actionable and then also investigate actionability

In [10]:
feature_types = ['Somatic variant', 'Somatic copy number']
evidence_types = ['FDA-Approved', 'Guideline', 'Clinical trial', 'Clinical evidence']
idx_match = df['almanac_bin'].fillna('').isin(['Putatively Actionable'])
idx_features = df['feature_type'].isin(feature_types)
idx_evidence_s = df['sensitive_predictive_implication'].isin(evidence_types)
idx_evidence_r = df['resistance_predictive_implication'].isin(evidence_types)
idx_evidence_p = df['prognostic_predictive_implication'].isin(evidence_types)

tmp = df[idx_match & idx_features & (idx_evidence_s | idx_evidence_r | idx_evidence_p)]
print(tmp.shape)
tmp

(312, 12)


Unnamed: 0,patient_id,feature_string,almanac_bin,phial_bin,sum,cohort,sensitive_predictive_implication,resistance_predictive_implication,prognostic_predictive_implication,clinvar,feature_type,feature_str_simple
45,MEL-IPI_Pat02,BRAF.p.V600E,Putatively Actionable,Putatively Actionable,2,MEL,FDA-Approved,Guideline,Guideline,,Somatic variant,BRAF
60,MEL-IPI_Pat03,BRAF.p.V600E,Putatively Actionable,Putatively Actionable,2,MEL,FDA-Approved,Guideline,Guideline,,Somatic variant,BRAF
95,MEL-IPI_Pat07,BRAF.p.V600E,Putatively Actionable,Putatively Actionable,2,MEL,FDA-Approved,Guideline,Guideline,,Somatic variant,BRAF
109,MEL-IPI_Pat08,BRAF.p.V600E,Putatively Actionable,Putatively Actionable,2,MEL,FDA-Approved,Guideline,Guideline,,Somatic variant,BRAF
116,MEL-IPI_Pat08,PTEN.Deletion,Putatively Actionable,Investigate Actionability,2,MEL,Preclinical,Clinical evidence,,,Somatic copy number,PTEN Del
...,...,...,...,...,...,...,...,...,...,...,...,...
3011,TP_2061,PIK3CA.Amplification,Putatively Actionable,Investigate Actionability,2,SU2C,Clinical trial,,,,Somatic copy number,PIK3CA Amp
3012,TP_2061,PTEN.Deletion,Putatively Actionable,Investigate Actionability,2,SU2C,Preclinical,Clinical evidence,,,Somatic copy number,PTEN Del
3013,TP_2061,RB1.Deletion,Putatively Actionable,Investigate Actionability,2,SU2C,,,Clinical evidence,,Somatic copy number,RB1 Del
3017,TP_2061,TP53.Deletion,Putatively Actionable,Investigate Actionability,2,SU2C,,,Clinical evidence,,Somatic copy number,TP53 Del


In [11]:
pa_patients = tmp['patient_id'].drop_duplicates()

In [12]:
n_events = tmp.shape[0]
n_patients = pa_patients.shape[0]
n_patients_mel = tmp[tmp['cohort'].eq('MEL')]['patient_id'].drop_duplicates().shape[0]
n_patients_su2c = tmp[tmp['cohort'].eq('SU2C')]['patient_id'].drop_duplicates().shape[0]
n_phial = tmp['phial_bin'].dropna().shape[0]
frac = round((n_phial / n_events)*100, 2)
tmp['phial_bin'].value_counts()

Investigate Actionability    161
Putatively Actionable         57
Name: phial_bin, dtype: int64

In [13]:
f"MOAlmanac identified {n_events} such putatively actionable events from {n_patients} ({n_patients_mel} melanoma, \
{n_patients_su2c} mCRPC), {n_phial} ({frac}%) of which were flagged by PHIAL for clinical relevance"

'MOAlmanac identified 312 such putatively actionable events from 191 (73 melanoma, 118 mCRPC), 218 (69.87%) of which were flagged by PHIAL for clinical relevance'

In [14]:
for label, group in tmp.groupby('cohort'):
    print(label)
    print(group['feature_string'].value_counts().head())
    print('')

MEL
BRAF.p.V600E         39
PTEN.Deletion         9
MET.Amplification     9
NRAS.p.Q61R           8
NRAS.p.Q61L           7
Name: feature_string, dtype: int64

SU2C
AR.Amplification     82
PTEN.Deletion        40
RB1.Deletion         21
MYC.Amplification    14
TP53.Deletion        10
Name: feature_string, dtype: int64



In [15]:
feature_types = ['Somatic variant', 'Somatic copy number']
evidence_types = ['FDA-Approved', 'Guideline', 'Clinical trial', 'Clinical evidence']
idx_match = df['almanac_bin'].fillna('').isin(['Putatively Actionable', 'Investigate Actionability'])
idx_features = df['feature_type'].isin(feature_types)
idx_evidence_s = df['sensitive_predictive_implication'].isin(evidence_types)
idx_evidence_r = df['resistance_predictive_implication'].isin(evidence_types)
idx_evidence_p = df['prognostic_predictive_implication'].isin(evidence_types)

tmp = df[idx_match & idx_features & (idx_evidence_s | idx_evidence_r | idx_evidence_p)]
print(tmp.shape)
tmp

(943, 12)


Unnamed: 0,patient_id,feature_string,almanac_bin,phial_bin,sum,cohort,sensitive_predictive_implication,resistance_predictive_implication,prognostic_predictive_implication,clinvar,feature_type,feature_str_simple
45,MEL-IPI_Pat02,BRAF.p.V600E,Putatively Actionable,Putatively Actionable,2,MEL,FDA-Approved,Guideline,Guideline,,Somatic variant,BRAF
52,MEL-IPI_Pat02,MSH2.p.D887N,Investigate Actionability,Biologically Relevant,2,MEL,Clinical evidence,,,,Somatic variant,MSH2
55,MEL-IPI_Pat02,STAG2.p.F467I,Investigate Actionability,,1,MEL,,,Clinical evidence,,Somatic variant,STAG2
58,MEL-IPI_Pat02,ZRSR2.p.N261I,Investigate Actionability,,1,MEL,,,Clinical evidence,,Somatic variant,ZRSR2
60,MEL-IPI_Pat03,BRAF.p.V600E,Putatively Actionable,Putatively Actionable,2,MEL,FDA-Approved,Guideline,Guideline,,Somatic variant,BRAF
...,...,...,...,...,...,...,...,...,...,...,...,...
3013,TP_2061,RB1.Deletion,Putatively Actionable,Investigate Actionability,2,SU2C,,,Clinical evidence,,Somatic copy number,RB1 Del
3017,TP_2061,TP53.Deletion,Putatively Actionable,Investigate Actionability,2,SU2C,,,Clinical evidence,,Somatic copy number,TP53 Del
3020,TP_2064,AR.Amplification,Putatively Actionable,Investigate Actionability,2,SU2C,Guideline,Guideline,,,Somatic copy number,AR Amp
3022,TP_2064,BLM.p.V54I,Investigate Actionability,,1,SU2C,,,Clinical evidence,,Somatic variant,BLM


In [16]:
ia_patients = tmp['patient_id'].drop_duplicates()
diff_patients = pd.Index(ia_patients).difference(pd.Index(pa_patients))
len(diff_patients)

54

In [17]:
round(len(diff_patients) / 260, 3)

0.208

In [18]:
tmp[tmp['almanac_bin'].eq('Investigate Actionability')]['feature_string'].value_counts()

ESR1.Deletion                10
NRAS.p.Q61K                  10
BRAF.p.D594Y                  5
BRAF.p.E611*                  5
TP53.p.R175H                  4
                             ..
PTPN11.p.P312LSplice_Site     1
BRAF.p.V197A                  1
BLM.p.G891E                   1
ERCC2.p.R253C                 1
TP53.p.M246V                  1
Name: feature_string, Length: 562, dtype: int64

In [19]:
tmp[tmp['feature_string'].eq('NRAS.p.Q61K')]['patient_id'].drop_duplicates().shape[0]

10

In [20]:
tmp[~tmp['phial_bin'].isnull() & tmp['patient_id'].isin(diff_patients) & tmp['phial_bin'].isin(['Putatively Actionable', 'Investigate Actionability'])]['patient_id'].drop_duplicates().shape

(31,)

## How many events did PHIAL call that Almanac didn't?

In [21]:
target = pd.read_excel('../knowledge-bases/target/almanac-comparison.xlsx')
tmp = df[df['phial_bin'].isin(['Putatively Actionable', 'Investigate Actionability']) & df['almanac_bin'].isnull()]
tmp['gene'] = tmp['feature_string'].str.split('.').apply(lambda x: x[0])
tmp['gene'].isin(target[target['in moalmanac'].eq(0)]).value_counts()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


False    113
Name: gene, dtype: int64

In [22]:
tmp['phial_bin'].value_counts()

Investigate Actionability    113
Name: phial_bin, dtype: int64

## Expanding evidence
Features considered: Somatic variants, copy number alterations  
Evidence considered: Preclinical  
Matches considered: Putatively actionable

In [23]:
feature_types = ['Somatic variant', 'Somatic copy number']
evidence_types = ['Preclinical']
idx_match = df['almanac_bin'].fillna('').isin(['Putatively Actionable'])
idx_features = df['feature_type'].isin(feature_types)
idx_evidence_s = df['sensitive_predictive_implication'].isin(evidence_types)
idx_evidence_r = df['resistance_predictive_implication'].isin(evidence_types)
idx_evidence_p = df['prognostic_predictive_implication'].isin(evidence_types)

tmp = df[idx_match & idx_features & (idx_evidence_s | idx_evidence_r | idx_evidence_p)]
print(tmp.shape)
tmp.head()

(120, 12)


Unnamed: 0,patient_id,feature_string,almanac_bin,phial_bin,sum,cohort,sensitive_predictive_implication,resistance_predictive_implication,prognostic_predictive_implication,clinvar,feature_type,feature_str_simple
0,MEL-IPI_Pat04,CDKN2A/B.Deletion,Putatively Actionable,Investigate Actionability,2,MEL,Preclinical,,,,Somatic copy number,CDKN2A/B Del
1,MEL-IPI_Pat07,CDKN2A/B.Deletion,Putatively Actionable,,1,MEL,Preclinical,,,,Somatic copy number,CDKN2A/B Del
2,MEL-IPI_Pat100,CDKN2A/B.Deletion,Putatively Actionable,Investigate Actionability,2,MEL,Preclinical,,,,Somatic copy number,CDKN2A/B Del
3,MEL-IPI_Pat106,CDKN2A/B.Deletion,Putatively Actionable,Investigate Actionability,2,MEL,Preclinical,,,,Somatic copy number,CDKN2A/B Del
4,MEL-IPI_Pat119,CDKN2A/B.Deletion,Putatively Actionable,Investigate Actionability,2,MEL,Preclinical,,,,Somatic copy number,CDKN2A/B Del


In [24]:
n_events = tmp.shape[0]
n_patients = tmp['patient_id'].drop_duplicates().shape[0]
n_phial = tmp['phial_bin'].dropna().shape[0]

In [25]:
f"...with {n_events} putatively actionable events from {n_patients} patients, \
{n_phial}({round(100*(n_phial / n_events), 2)}%) were also highlighted by PHIAL."

'...with 120 putatively actionable events from 107 patients, 86(71.67%) were also highlighted by PHIAL.'

Features considered: Somatic variants, copy number alterations  
Evidence considered: Inferential  
Matches considered: Putatively actionable

In [26]:
feature_types = ['Somatic variant', 'Somatic copy number']
evidence_types = ['Inferential']
idx_match = df['almanac_bin'].fillna('').isin(['Putatively Actionable'])
idx_features = df['feature_type'].isin(feature_types)
idx_evidence_s = df['sensitive_predictive_implication'].isin(evidence_types)
idx_evidence_r = df['resistance_predictive_implication'].isin(evidence_types)
idx_evidence_p = df['prognostic_predictive_implication'].isin(evidence_types)

tmp = df[idx_match & idx_features & (idx_evidence_s | idx_evidence_r | idx_evidence_p)]
print(tmp.shape)
tmp.head()

(19, 12)


Unnamed: 0,patient_id,feature_string,almanac_bin,phial_bin,sum,cohort,sensitive_predictive_implication,resistance_predictive_implication,prognostic_predictive_implication,clinvar,feature_type,feature_str_simple
1377,MEL-IPI_Pat54,CCND1.Amplification,Putatively Actionable,,1,MEL,Inferential,,Clinical evidence,,Somatic copy number,CCND1 Amp
1394,MEL-IPI_Pat55,CCND1.Amplification,Putatively Actionable,Investigate Actionability,2,MEL,Inferential,,Clinical evidence,,Somatic copy number,CCND1 Amp
1403,MEL-IPI_Pat56,CCND1.Amplification,Putatively Actionable,Investigate Actionability,2,MEL,Inferential,,Clinical evidence,,Somatic copy number,CCND1 Amp
1533,MEL-IPI_Pat64,CCND1.Amplification,Putatively Actionable,Investigate Actionability,2,MEL,Inferential,,Clinical evidence,,Somatic copy number,CCND1 Amp
1586,MEL-IPI_Pat74,CCND1.Amplification,Putatively Actionable,Investigate Actionability,2,MEL,Inferential,,Clinical evidence,,Somatic copy number,CCND1 Amp


In [27]:
tmp['feature_string'].value_counts()

CCND1.Amplification    15
CDKN2C.Deletion         3
CD274.Amplification     1
Name: feature_string, dtype: int64

In [28]:
n_events = tmp.shape[0]
n_patients = tmp['patient_id'].drop_duplicates().shape[0]
f"Inferential evidence {n_events} additional putatively actionable copy number alterations from {n_patients}, \
most prominently CCND1 amplifications for reported sensitivity to Palbocicilib."

'Inferential evidence 19 additional putatively actionable copy number alterations from 19, most prominently CCND1 amplifications for reported sensitivity to Palbocicilib.'

## In total for somatic variants and copy number alterations
Features considered: Somatic variants, copy number alterations  
Evidence considered: FDA-approved, clinical trials, clinical evidence, preclinical, inferential  
Matches considered: Putatively actionable, Investigate actionability

In [29]:
feature_types = ['Somatic variant', 'Somatic copy number']
evidence_types = ['FDA-Approved', 'Guideline', 'Clinical trial', 'Clinical evidence', 'Preclinical', 'Inferential']
idx_match = df['almanac_bin'].fillna('').isin(['Putatively Actionable', 'Investigate Actionability'])
idx_features = df['feature_type'].isin(feature_types)
idx_evidence_s = df['sensitive_predictive_implication'].isin(evidence_types)
idx_evidence_r = df['resistance_predictive_implication'].isin(evidence_types)
idx_evidence_p = df['prognostic_predictive_implication'].isin(evidence_types)

tmp = df[idx_match & idx_features & (idx_evidence_s | idx_evidence_r | idx_evidence_p)]
print(tmp.shape)
tmp

(1175, 12)


Unnamed: 0,patient_id,feature_string,almanac_bin,phial_bin,sum,cohort,sensitive_predictive_implication,resistance_predictive_implication,prognostic_predictive_implication,clinvar,feature_type,feature_str_simple
0,MEL-IPI_Pat04,CDKN2A/B.Deletion,Putatively Actionable,Investigate Actionability,2,MEL,Preclinical,,,,Somatic copy number,CDKN2A/B Del
1,MEL-IPI_Pat07,CDKN2A/B.Deletion,Putatively Actionable,,1,MEL,Preclinical,,,,Somatic copy number,CDKN2A/B Del
2,MEL-IPI_Pat100,CDKN2A/B.Deletion,Putatively Actionable,Investigate Actionability,2,MEL,Preclinical,,,,Somatic copy number,CDKN2A/B Del
3,MEL-IPI_Pat106,CDKN2A/B.Deletion,Putatively Actionable,Investigate Actionability,2,MEL,Preclinical,,,,Somatic copy number,CDKN2A/B Del
4,MEL-IPI_Pat119,CDKN2A/B.Deletion,Putatively Actionable,Investigate Actionability,2,MEL,Preclinical,,,,Somatic copy number,CDKN2A/B Del
...,...,...,...,...,...,...,...,...,...,...,...,...
3020,TP_2064,AR.Amplification,Putatively Actionable,Investigate Actionability,2,SU2C,Guideline,Guideline,,,Somatic copy number,AR Amp
3021,TP_2064,AURKA.Amplification,Putatively Actionable,,1,SU2C,Preclinical,,Preclinical,,Somatic copy number,AURKA Amp
3022,TP_2064,BLM.p.V54I,Investigate Actionability,,1,SU2C,,,Clinical evidence,,Somatic variant,BLM
3026,TP_2064,MPL.p.A134T,Investigate Actionability,Biologically Relevant,2,SU2C,Preclinical,,,,Somatic variant,MPL


In [30]:
for label, group in tmp.groupby('cohort'):
    print(label, group['patient_id'].drop_duplicates().shape[0])

MEL 109
SU2C 140


In [31]:
tmp['phial_bin'].value_counts()

Biologically Relevant        369
Investigate Actionability    350
Putatively Actionable         73
Name: phial_bin, dtype: int64

In [32]:
tmp['phial_bin'].value_counts().divide(tmp.shape[0])

Biologically Relevant        0.314043
Investigate Actionability    0.297872
Putatively Actionable        0.062128
Name: phial_bin, dtype: float64

## Validation sequencing

In [33]:
handles = glob.glob('2015-Robinson/data/*/*/*.actionable.txt') + glob.glob('2015-VanAllen/data/*/*/*.actionable.txt')
tmp = pd.concat([pd.read_csv(handle, sep='\t') for handle in handles], ignore_index=True)
tmp = tmp[tmp['feature_type'].eq('Somatic Variant') & tmp['score_bin'].isin(['Putatively Actionable', 'Investigate Actionability - High', 'Investigate Actionability - Low']) & ~tmp['validation_total_coverage'].isnull()]
tmp

Unnamed: 0,score_bin,sensitive_predictive_implication,resistance_predictive_implication,prognostic_predictive_implication,feature_type,feature,alteration_type,alteration,tumor_f,total_coverage,...,prognostic_url,number_germline_mutations_in_gene,validation_total_coverage,validation_tumor_f,validation_detection_power,feature_display,preclinical_efficacy_observed,patient_id,tumor_sample_barcode,normal_sample_barcode
4,Investigate Actionability - High,,Clinical evidence,Clinical trial,Somatic Variant,TP53,Frameshift,p.P322fs,0.7905,296.0,...,https://doi.org/10.1200/JCO.2017.75.7310,1.0,0.0,0.0000,0.0000,TP53 p.P322fs (Frameshift),,SC_9054,SC_9054-Tumor,SC_9054-Normal
13,Investigate Actionability - High,,Clinical evidence,Clinical evidence,Somatic Variant,TP53,Missense,p.V173L,0.1545,123.0,...,https://www.nccn.org/professionals/physician_g...,1.0,65.0,0.3538,0.9848,TP53 p.V173L (Missense),,MO_1020,MO_1020-Tumor,MO_1020-Normal
20,Investigate Actionability - High,,Clinical evidence,Clinical trial,Somatic Variant,TP53,Splice Site,,0.6491,57.0,...,https://doi.org/10.1200/JCO.2017.75.7310,1.0,1.0,1.0000,0.0000,TP53 (Splice Site),,PROS10448-1115156,PROS10448-1115156-Tumor-SM-5EEBN,PROS10448-1115156-Normal-SM-4YS2E
25,Investigate Actionability - High,Preclinical,Clinical evidence,Clinical trial,Somatic Variant,PTEN,Frameshift,p.VL317fs,0.7667,30.0,...,https://doi.org/10.1200/JCO.2017.75.7310,,0.0,0.0000,0.0000,PTEN p.VL317fs (Frameshift),1.0,SC_9062,SC_9062-Tumor,SC_9062-Normal
36,Investigate Actionability - High,Preclinical,Clinical evidence,Clinical trial,Somatic Variant,PTEN,Nonsense,p.E235*,0.6811,185.0,...,https://doi.org/10.1200/JCO.2017.75.7310,,212.0,0.1792,1.0000,PTEN p.E235* (Nonsense),1.0,SC_9096,SC_9096-Tumor,SC_9096-Normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3585,Putatively Actionable,FDA-Approved,Guideline,Guideline,Somatic Variant,BRAF,Missense,p.V600E,0.4099,405.0,...,https://www.nccn.org/professionals/physician_g...,,106.0,0.6698,1.0000,BRAF p.V600E (Missense),1.0,MEL-IPI_Pat86,MEL-IPI_Pat86-Tumor-SM-4DK3C,MEL-IPI_Pat86-Normal-SM-53U59
3590,Investigate Actionability - High,FDA-Approved,Guideline,Guideline,Somatic Variant,BRAF,Missense,p.L597I,0.0126,396.0,...,https://www.nccn.org/professionals/physician_g...,,113.0,0.0000,0.1145,BRAF p.L597I (Missense),1.0,MEL-IPI_Pat86,MEL-IPI_Pat86-Tumor-SM-4DK3C,MEL-IPI_Pat86-Normal-SM-53U59
3591,Investigate Actionability - High,FDA-Approved,Guideline,Guideline,Somatic Variant,BRAF,Missense,p.H585N,0.0158,380.0,...,https://www.nccn.org/professionals/physician_g...,,116.0,0.0000,0.1849,BRAF p.H585N (Missense),1.0,MEL-IPI_Pat86,MEL-IPI_Pat86-Tumor-SM-4DK3C,MEL-IPI_Pat86-Normal-SM-53U59
3592,Investigate Actionability - Low,FDA-Approved,Guideline,Guideline,Somatic Variant,BRAF,Nonsense,p.E611*,0.0219,411.0,...,https://www.nccn.org/professionals/physician_g...,,122.0,0.0000,0.3431,BRAF p.E611* (Nonsense),1.0,MEL-IPI_Pat86,MEL-IPI_Pat86-Tumor-SM-4DK3C,MEL-IPI_Pat86-Normal-SM-53U59


In [34]:
print(tmp[tmp['validation_detection_power'].ge(0.80)].shape)

(190, 39)


In [35]:
tmp[tmp['validation_detection_power'].ge(0.80)]['feature_display'].value_counts()

BRAF p.V600E (Missense)     10
AR p.L702H (Missense)        6
NRAS p.Q61K (Missense)       5
AR p.T878A (Missense)        4
AR p.H875Y (Missense)        4
                            ..
AR p.D297Y (Missense)        1
TP53 p.P278A (Missense)      1
PTEN p.F241S (Missense)      1
NRAS p.Q61L (Missense)       1
PALB2 p.V928M (Missense)     1
Name: feature_display, Length: 154, dtype: int64

## Extended features
Features considered: Germline variants, fusions, mutational burden, mutational signatures, aneuploidy  
Evidence considered: FDA-approved, clinical trials, or clinical evidence  
Matches considered: First putatively actionable and then also investigate actionability

In [36]:
idx_f = df['feature_type'].isin(['Germline', 'Rearrangement', 'Aneuploidy', 
                                 'Tumor mutational burden', 'Mutational signature'])
idx_a = df['almanac_bin'].fillna('').isin(['Putatively Actionable', 'Investigate Actionability'])

tmp = df[idx_f & idx_a]
print(tmp.shape)
tmp.head()

(351, 12)


Unnamed: 0,patient_id,feature_string,almanac_bin,phial_bin,sum,cohort,sensitive_predictive_implication,resistance_predictive_implication,prognostic_predictive_implication,clinvar,feature_type,feature_str_simple
46,MEL-IPI_Pat02,COL1A1--CITED4,Investigate Actionability,,1,MEL,Guideline,,,,Rearrangement,COL1A1--CITED4
57,MEL-IPI_Pat02,Whole genome doubling,Investigate Actionability,,1,MEL,,,Inferential,,Aneuploidy,WGD
62,MEL-IPI_Pat03,COL1A1--MALAT1,Investigate Actionability,,1,MEL,Guideline,,,,Rearrangement,COL1A1--MALAT1
66,MEL-IPI_Pat03,High Mutational Burden,Investigate Actionability,,1,MEL,Clinical evidence,,,,Tumor mutational burden,TMB
73,MEL-IPI_Pat03,Whole genome doubling,Investigate Actionability,,1,MEL,,,Inferential,,Aneuploidy,WGD


In [37]:
tmp.shape

(351, 12)

In [38]:
tmp_germline = tmp[tmp['feature_type'].eq('Germline') & tmp['clinvar'].isin(['Pathogenic', 'Pathogenic/Likely pathogenic', 'Likely pathogenic'])]
tmp_germline.shape

(0, 12)

In [39]:
tmp_germline

Unnamed: 0,patient_id,feature_string,almanac_bin,phial_bin,sum,cohort,sensitive_predictive_implication,resistance_predictive_implication,prognostic_predictive_implication,clinvar,feature_type,feature_str_simple


In [40]:
tmp_rearrangement = tmp[tmp['feature_type'].eq('Rearrangement')]
print(tmp_rearrangement.shape)
print(tmp_rearrangement['patient_id'].drop_duplicates().shape)

(127, 12)
(82,)


In [41]:
erg_patients = tmp[tmp['feature_string'].eq('TMPRSS2--ERG')]['patient_id'].drop_duplicates()
len(pd.Index(erg_patients).difference(pd.Index(pa_patients)))

10

In [42]:
tmp[tmp['feature_string'].eq('TMPRSS2--ERG')]

Unnamed: 0,patient_id,feature_string,almanac_bin,phial_bin,sum,cohort,sensitive_predictive_implication,resistance_predictive_implication,prognostic_predictive_implication,clinvar,feature_type,feature_str_simple
1791,MO_1008,TMPRSS2--ERG,Putatively Actionable,,1,SU2C,Preclinical,,Clinical evidence,,Rearrangement,TMPRSS2--ERG
1898,MO_1015,TMPRSS2--ERG,Putatively Actionable,,1,SU2C,Preclinical,,Clinical evidence,,Rearrangement,TMPRSS2--ERG
1907,MO_1040,TMPRSS2--ERG,Putatively Actionable,,1,SU2C,Preclinical,,Clinical evidence,,Rearrangement,TMPRSS2--ERG
1917,MO_1071,TMPRSS2--ERG,Putatively Actionable,,1,SU2C,Preclinical,,Clinical evidence,,Rearrangement,TMPRSS2--ERG
1941,MO_1095,TMPRSS2--ERG,Putatively Actionable,,1,SU2C,Preclinical,,Clinical evidence,,Rearrangement,TMPRSS2--ERG
1960,MO_1114,TMPRSS2--ERG,Putatively Actionable,,1,SU2C,Preclinical,,Clinical evidence,,Rearrangement,TMPRSS2--ERG
1966,MO_1118,TMPRSS2--ERG,Putatively Actionable,,1,SU2C,Preclinical,,Clinical evidence,,Rearrangement,TMPRSS2--ERG
1975,MO_1124,TMPRSS2--ERG,Putatively Actionable,,1,SU2C,Preclinical,,Clinical evidence,,Rearrangement,TMPRSS2--ERG
1999,MO_1162,TMPRSS2--ERG,Putatively Actionable,,1,SU2C,Preclinical,,Clinical evidence,,Rearrangement,TMPRSS2--ERG
2006,MO_1176,TMPRSS2--ERG,Putatively Actionable,,1,SU2C,Preclinical,,Clinical evidence,,Rearrangement,TMPRSS2--ERG


In [43]:
tmb = tmp[tmp['feature_string'].eq('High Mutational Burden')]
for label, group in tmb.groupby('cohort'):
    print(label, group['patient_id'].drop_duplicates().shape[0])

MEL 43
SU2C 4


In [44]:
tmp[tmp['feature_type'].eq('Mutational signature')]['patient_id'].drop_duplicates().shape

(40,)

In [45]:
tmp[tmp['feature_type'].eq('Aneuploidy')]['patient_id'].drop_duplicates().shape[0]

137

## Therapeutic sensitivity by evidence

In [46]:
tmp = df[df['sensitive_predictive_implication'].notnull()]
tmp['sensitive_predictive_implication'].value_counts()

evidence_map = {
    'FDA-Approved': 0,
    'Guideline': 1,
    'Clinical trial': 2,
    'Clinical evidence': 3,
    'Preclinical': 4,
    'Inferential': 5
}

tmp['evidence_map'] = tmp['sensitive_predictive_implication'].replace(evidence_map)
tmp = (tmp
 .loc[:, ['patient_id', 'sensitive_predictive_implication', 'evidence_map']]
 .sort_values('evidence_map')
 .drop_duplicates(subset='patient_id', keep='first')
)

idx_mel = tmp['patient_id'].str.contains('MEL-IPI')
tmp_mel_vcs = tmp[idx_mel]['sensitive_predictive_implication'].value_counts()
tmp_mel_vcs

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]


FDA-Approved         63
Clinical evidence    19
Guideline            12
Preclinical          11
Inferential           1
Clinical trial        1
Name: sensitive_predictive_implication, dtype: int64

In [47]:
tmp_mel_vcs['FDA-Approved'] + tmp_mel_vcs['Guideline']

75

In [48]:
tmp_su2c_vcs = tmp[~idx_mel]['sensitive_predictive_implication'].value_counts()
tmp_su2c_vcs

Guideline            87
Preclinical          31
FDA-Approved         13
Clinical trial        7
Inferential           2
Clinical evidence     2
Name: sensitive_predictive_implication, dtype: int64

In [49]:
tmp_su2c_vcs['FDA-Approved'] + tmp_su2c_vcs['Guideline']

100

In [50]:
patients_fda_guideline = tmp[tmp['sensitive_predictive_implication'].isin(['FDA-Approved', 'Guideline'])]['patient_id'].tolist()
patients_all = tmp['patient_id'].tolist()
patients_preclinical = tmp[tmp['sensitive_predictive_implication'].isin(['Preclinical'])]['patient_id'].tolist()


In [51]:
patients_all = tmp['patient_id'].tolist()

In [52]:
tmp = df[df['sensitive_predictive_implication'].notnull() & df['feature_type'].isin(['Somatic variant', 'Somatic copy number'])]
tmp['sensitive_predictive_implication'].value_counts()

evidence_map = {
    'FDA-Approved': 0,
    'Guideline': 1,
    'Clinical trial': 2,
    'Clinical evidence': 3,
    'Preclinical': 4,
    'Inferential': 5
}

tmp['evidence_map'] = tmp['sensitive_predictive_implication'].replace(evidence_map)
tmp = (tmp
 .loc[:, ['patient_id', 'sensitive_predictive_implication', 'evidence_map']]
 .sort_values('evidence_map')
 .drop_duplicates(subset='patient_id', keep='first')
)

patients_fda_guideline_somatic = (tmp[
    tmp['sensitive_predictive_implication'].isin(['FDA-Approved', 'Guideline'])]['patient_id'].tolist())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]


In [53]:
tmp_patients = pd.Index(patients_fda_guideline).difference(pd.Index(patients_fda_guideline_somatic))
print(tmp_patients)
print(tmp_patients.shape)

Index(['MEL-IPI_Pat06', 'MO_1012', 'PROS01448-1115183', 'PROS01448-1115244',
       'SC_9034', 'SC_9066', 'SC_9081', 'SC_9082', 'SC_9086', 'TP_2032',
       'TP_2060'],
      dtype='object')
(11,)


In [54]:
tmp = (df[df['patient_id'].isin(tmp_patients) 
          & df['sensitive_predictive_implication'].isin(['FDA-Approved', 'Guideline'])])
tmp

Unnamed: 0,patient_id,feature_string,almanac_bin,phial_bin,sum,cohort,sensitive_predictive_implication,resistance_predictive_implication,prognostic_predictive_implication,clinvar,feature_type,feature_str_simple
90,MEL-IPI_Pat06,COL1A1--TGM2,Investigate Actionability,,1,MEL,Guideline,,,,Rearrangement,COL1A1--TGM2
1813,MO_1012,CREBZF--COL1A1,Investigate Actionability,,1,SU2C,Guideline,,,,Rearrangement,CREBZF--COL1A1
1819,MO_1012,EMC1--ALK,Investigate Actionability,,1,SU2C,FDA-Approved,,,,Rearrangement,EMC1--ALK
1823,MO_1012,FIP1L1--THG1L,Investigate Actionability,,1,SU2C,FDA-Approved,,,,Rearrangement,FIP1L1--THG1L
1830,MO_1012,HIC2--PDGFRB,Investigate Actionability,,1,SU2C,FDA-Approved,,,,Rearrangement,HIC2--PDGFRB
1831,MO_1012,High Mutational Burden,Investigate Actionability,,1,SU2C,Guideline,,,,Tumor mutational burden,TMB
1837,MO_1012,KIF13A--EML4,Investigate Actionability,,1,SU2C,Guideline,,,,Rearrangement,KIF13A--EML4
1843,MO_1012,MUTYH--FIP1L1,Investigate Actionability,,1,SU2C,FDA-Approved,,,,Rearrangement,MUTYH--FIP1L1
1846,MO_1012,NTRK1--CHAF1B,Investigate Actionability,,1,SU2C,FDA-Approved,,,,Rearrangement,NTRK1--CHAF1B
1847,MO_1012,NTRK1--RP11-566K19.5,Investigate Actionability,,1,SU2C,FDA-Approved,,,,Rearrangement,NTRK1--RP11-566K19.5


In [55]:
tmp['feature_string'].value_counts()

High Mutational Burden    2
COL1A1--KLK3              1
COL1A1--TGM2              1
NTRK2--ACOX2              1
COL1A1--SPARC             1
NTRK1--RP11-566K19.5      1
COL1A1--SEC61A1           1
CDK5RAP2--COL1A1          1
MT-RNR2--COL1A1           1
SAMD14--NTRK3             1
HIC2--PDGFRB              1
COL3A1--COL1A1            1
RFX6--BCR                 1
MUTYH--FIP1L1             1
CREBZF--COL1A1            1
NTRK1--CHAF1B             1
COL1A1--LRP1              1
PDGFRA--NTF4              1
COL1A1--BCL2L2-PABPN1     1
COL1A1--CPE               1
COL1A1--MT-CO2            1
COL1A1--C1R               1
EMC1--ALK                 1
FIP1L1--THG1L             1
KIF13A--EML4              1
Name: feature_string, dtype: int64

In [56]:
tmp[tmp['feature_string'].str.contains('NTRK')]

Unnamed: 0,patient_id,feature_string,almanac_bin,phial_bin,sum,cohort,sensitive_predictive_implication,resistance_predictive_implication,prognostic_predictive_implication,clinvar,feature_type,feature_str_simple
1846,MO_1012,NTRK1--CHAF1B,Investigate Actionability,,1,SU2C,FDA-Approved,,,,Rearrangement,NTRK1--CHAF1B
1847,MO_1012,NTRK1--RP11-566K19.5,Investigate Actionability,,1,SU2C,FDA-Approved,,,,Rearrangement,NTRK1--RP11-566K19.5
1848,MO_1012,NTRK2--ACOX2,Investigate Actionability,,1,SU2C,FDA-Approved,,,,Rearrangement,NTRK2--ACOX2
1864,MO_1012,SAMD14--NTRK3,Investigate Actionability,,1,SU2C,FDA-Approved,,,,Rearrangement,SAMD14--NTRK3


In [57]:
patients_otherwise_variant_negative = pd.Index(patients_all).difference(pd.Index(patients_fda_guideline))
print(260 - len(patients_fda_guideline))
print(260 - len(patients_all))

85
11


In [58]:
len(patients_preclinical)

42

In [59]:
df[df['patient_id'].isin(patients_preclinical) & df['sensitive_predictive_implication'].eq('Preclinical')]['feature_string']

3             CDKN2A/B.Deletion
13            CDKN2A/B.Deletion
15            CDKN2A/B.Deletion
16            CDKN2A/B.Deletion
19            CDKN2A/B.Deletion
30            CDKN2A/B.Deletion
200                FLT3.p.H809D
551              ARID1A.p.P855S
560              FGFR1.Deletion
561              FGFR2.Deletion
563               PTEN.Deletion
882     AKT3.p.L233LSplice_Site
890       MIR17HG.Amplification
913              FGFR1.Deletion
933               PIK3CA.p.V71I
971              FGFR2.Deletion
1554             FGFR2.Deletion
1555              MAP2K1.p.F53I
1907               TMPRSS2--ERG
1910              TMPRSS2--ETV4
1930              PTEN.Deletion
1938             PIK3CA.p.C901F
1940              PTEN.Deletion
1941               TMPRSS2--ERG
1963               PTEN.p.R130*
1966               TMPRSS2--ERG
1972              PTEN.Deletion
1975               TMPRSS2--ERG
1999               TMPRSS2--ERG
2043              PTEN.Deletion
2080              PTEN.Deletion
2082    

In [60]:
df[df['patient_id'].isin(patients_preclinical) & df['sensitive_predictive_implication'].eq('Preclinical')]

Unnamed: 0,patient_id,feature_string,almanac_bin,phial_bin,sum,cohort,sensitive_predictive_implication,resistance_predictive_implication,prognostic_predictive_implication,clinvar,feature_type,feature_str_simple
3,MEL-IPI_Pat106,CDKN2A/B.Deletion,Putatively Actionable,Investigate Actionability,2,MEL,Preclinical,,,,Somatic copy number,CDKN2A/B Del
13,MEL-IPI_Pat162,CDKN2A/B.Deletion,Putatively Actionable,Investigate Actionability,2,MEL,Preclinical,,,,Somatic copy number,CDKN2A/B Del
15,MEL-IPI_Pat165,CDKN2A/B.Deletion,Putatively Actionable,,1,MEL,Preclinical,,,,Somatic copy number,CDKN2A/B Del
16,MEL-IPI_Pat166,CDKN2A/B.Deletion,Putatively Actionable,Investigate Actionability,2,MEL,Preclinical,,,,Somatic copy number,CDKN2A/B Del
19,MEL-IPI_Pat24,CDKN2A/B.Deletion,Putatively Actionable,Investigate Actionability,2,MEL,Preclinical,,,,Somatic copy number,CDKN2A/B Del
30,MEL-IPI_Pat66,CDKN2A/B.Deletion,Putatively Actionable,Investigate Actionability,2,MEL,Preclinical,,,,Somatic copy number,CDKN2A/B Del
200,MEL-IPI_Pat109,FLT3.p.H809D,Investigate Actionability,Biologically Relevant,2,MEL,Preclinical,Clinical evidence,,,Somatic variant,FLT3
551,MEL-IPI_Pat133,ARID1A.p.P855S,Investigate Actionability,,1,MEL,Preclinical,,Clinical evidence,,Somatic variant,ARID1A
560,MEL-IPI_Pat135,FGFR1.Deletion,Investigate Actionability,,1,MEL,Preclinical,,,,Somatic copy number,FGFR1 Del
561,MEL-IPI_Pat135,FGFR2.Deletion,Investigate Actionability,,1,MEL,Preclinical,,,,Somatic copy number,FGFR2 Del


## Table of sample counts by assertion type, evidence, and match

In [61]:
columns = df.columns[df.columns.str.contains('predictive_implication')]

evidence_columns = ['FDA-Approved', 'Guideline', 'Clinical trial', 'Clinical evidence', 'Preclinical', 'Inferential']
match_columns = ['Putatively Actionable', 'Investigate Actionability', 'Biologically Relevant']
types_columns = ['Somatic variant', 'Somatic copy number', 'Germline', 'Rearrangement', 'Mutational signature', 'Tumor mutational burden', 'Aneuploidy']

outname = 'retrospective.feature_types_by_evidence.xlsx'

with pd.ExcelWriter(outname) as writer: 
    for label, group in df.groupby('cohort'):
        for column in columns:
            sheetname = '{} {}'.format(label, column.split('_')[0])
            (
                df[df[column].isin(evidence_columns)]
                .loc[:, ['patient_id', 'feature_type', column]]
                .drop_duplicates()
                .pivot_table(index='feature_type', columns=column, values='patient_id', aggfunc=lambda x: len(x.unique()))
                .reindex(evidence_columns, axis=1)
                .reindex(types_columns)
                .fillna(0)
                .astype(int)
                .to_excel(writer, sheet_name=sheetname)
            )

## In conclusion

In [62]:
tmp = df[(df['almanac_bin'].fillna('').isin(['Putatively Actionable', 'Investigate Actionability']))
        & df['feature_type'].isin(['Somatic variant', 'Somatic copy number'])]
tmp_patients = tmp['patient_id'].drop_duplicates()
print(tmp.shape)
print(tmp['patient_id'].drop_duplicates().shape[0])

(1175, 12)
249


In [63]:
tmp = df[(df['phial_bin'].fillna('').isin(['Putatively Actionable', 'Investigate Actionability']))
        & df['feature_type'].isin(['Somatic variant', 'Somatic copy number'])]
print(tmp.shape)
print(tmp['patient_id'].drop_duplicates().shape[0])

(587, 12)
232


In [64]:
tmp = df[(df['almanac_bin'].fillna('').isin(['Putatively Actionable', 'Investigate Actionability']))
        & ~df['feature_type'].isin(['Somatic variant', 'Somatic copy number'])]
tmp_patients_2 = tmp['patient_id'].drop_duplicates()
print(tmp.shape)
print(tmp[~tmp['patient_id'].isin(tmp_patients)].shape)
print(tmp[~tmp['patient_id'].isin(tmp_patients)]['patient_id'].drop_duplicates().shape)

(351, 12)
(12, 12)
(8,)


In [65]:
pd.Index(tmp_patients).union(pd.Index(tmp_patients_2))

Index(['MEL-IPI_Pat02', 'MEL-IPI_Pat03', 'MEL-IPI_Pat04', 'MEL-IPI_Pat06',
       'MEL-IPI_Pat07', 'MEL-IPI_Pat08', 'MEL-IPI_Pat100', 'MEL-IPI_Pat101',
       'MEL-IPI_Pat103', 'MEL-IPI_Pat104',
       ...
       'TP_2001', 'TP_2009', 'TP_2010', 'TP_2020', 'TP_2032', 'TP_2034',
       'TP_2054', 'TP_2060', 'TP_2061', 'TP_2064'],
      dtype='object', name='patient_id', length=257)

## Figure 2 caption

In [66]:
idx_v = df['feature_type'].eq('Somatic variant')
idx_cn = df['feature_type'].eq('Somatic copy number')
idx_a = df['almanac_bin'].fillna('').isin(['Putatively Actionable', 'Investigate Actionability'])
idx_p = df['phial_bin'].fillna('').isin(['Putatively Actionable', 'Investigate Actionability'])

idx = (idx_a) & (idx_v | idx_cn)
tmp = df[idx]['patient_id'].drop_duplicates()
print(tmp.shape)

idx = (idx_p) & (idx_v | idx_cn)
tmp = df[idx]['patient_id'].drop_duplicates()
print(tmp.shape)

(249,)
(232,)


In [67]:
df[idx & df['almanac_bin'].isnull()]

Unnamed: 0,patient_id,feature_string,almanac_bin,phial_bin,sum,cohort,sensitive_predictive_implication,resistance_predictive_implication,prognostic_predictive_implication,clinvar,feature_type,feature_str_simple
39,MO_1014,CDKN2A/B.Deletion,,Investigate Actionability,1,SU2C,,,,,Somatic copy number,CDKN2A/B Del
149,MEL-IPI_Pat101,ETV1.Amplification,,Investigate Actionability,1,MEL,,,,,Somatic copy number,ETV1 Amp
156,MEL-IPI_Pat101,PIK3R1.Deletion,,Investigate Actionability,1,MEL,,,,,Somatic copy number,PIK3R1 Del
158,MEL-IPI_Pat101,ROS1.Deletion,,Investigate Actionability,1,MEL,,,,,Somatic copy number,ROS1 Del
160,MEL-IPI_Pat101,SMAD4.Deletion,,Investigate Actionability,1,MEL,,,,,Somatic copy number,SMAD4 Del
...,...,...,...,...,...,...,...,...,...,...,...,...
2961,SC_9109,TSC2.Deletion,,Investigate Actionability,1,SU2C,,,,,Somatic copy number,TSC2 Del
2967,TP_2001,MITF.Deletion,,Investigate Actionability,1,SU2C,,,,,Somatic copy number,MITF Del
2979,TP_2010,TMPRSS2.Deletion,,Investigate Actionability,1,SU2C,,,,,Somatic copy number,TMPRSS2 Del
2986,TP_2020,NOTCH2.Deletion,,Investigate Actionability,1,SU2C,,,,,Somatic copy number,NOTCH2 Del
