# Create a mapping between therapies catalogued in the Molecular Oncology Almanac and those profiles in GDSC
We read the fitted dose response curves from GDSC2 and create a dictionary, mapping almanac therapies of keys to a list of GDSC therapies. 

In summary,
- The Molecular Oncology Almanac contains 137 therapies. 
- GDSC2 tested 192 therapies and GDSC1 tested 345, 257 of which do not appear in GDSC1.
- We mapped almanac therapies to GDSC, with GDSC2 measurements taking priority. 
- In all, 59 therapies were mapped to GDSC therapies

In [1]:
import glob
import pandas as pd
import json
import tinydb

gdsc1 = pd.read_excel('../source/gdsc/GDSC1_fitted_dose_response_25Feb20.xlsx')
gdsc1 = gdsc1['DRUG_NAME'].drop_duplicates().sort_values()

gdsc2 = pd.read_excel('../source/gdsc/GDSC2_fitted_dose_response_25Feb20.xlsx')
gdsc2 = gdsc2['DRUG_NAME'].drop_duplicates().sort_values()

gdsc1 = gdsc1[~gdsc1.isin(gdsc2)]
almanac = tinydb.TinyDB('../../moalmanac/moalmanac.json')


In [2]:
almanac.tables()

{'Aneuploidy',
 'Copy Number',
 'Germline Variant',
 'Microsatellite Stability',
 'Mutational Burden',
 'Mutational Signature',
 'Rearrangement',
 'Release',
 'Somatic Variant',
 '_default',
 'genes'}

In [3]:
records = []
for feature_type in ['Aneuploidy', 
                     'Copy Number', 
                     'Germline Variant', 
                     'Microsatellite Stability', 
                     'Mutational Burden',
                     'Mutational Signature',
                     'Rearrangement',
                     'Somatic Variant']:
    table = almanac.table(feature_type).all()
    for record in table:
        records.append((record['therapy_name'], record['therapy_type']))

almanac = (pd
           .DataFrame(records, columns=['therapy_name', 'therapy_type'])
           .dropna()
           .drop_duplicates()
           .sort_values(['therapy_name', 'therapy_type'])
          )
almanac

Unnamed: 0,therapy_name,therapy_type
185,5-Fluorouracil,Chemotherapy
583,AMG 510,Targeted therapy
474,AZD3759,Targeted therapy
71,AZD8186,Targeted therapy
1,Abiraterone,Hormone therapy
...,...,...
754,VX-680,Targeted therapy
264,Vandetanib,Targeted therapy
278,Veliparib,Targeted therapy
10,Vemurafenib,Targeted therapy


In [4]:
dictionary = {}
for idx in almanac.index:
    therapy = almanac.loc[idx, 'therapy_name']
    therapy_type = almanac.loc[idx, 'therapy_type']
    dictionary[therapy] = {}
    dictionary[therapy]['type'] = therapy_type
    dictionary[therapy]['gdsc'] = []

## Map GDSC1
Earlier in the notebook we subset GDSC1 for therapies that do not appear in GDSC2. We map any almanac therapies to these drugs.

In [5]:
almanac['therapy_name'][almanac['therapy_name'].isin(gdsc1)].sort_values()

221       Alectinib
381       Bleomycin
211       Bosutinib
60     Cabozantinib
398       Cetuximab
56         Imatinib
762    Lenalidomide
379      Omipalisib
98        Pazopanib
240       Ponatinib
523     Quizartinib
275       Rucaparib
608         SU11274
77        Sunitinib
278       Veliparib
Name: therapy_name, dtype: object

In [6]:
for therapy in almanac['therapy_name'][almanac['therapy_name'].isin(gdsc1)].sort_values().tolist():
    dictionary[therapy]['gdsc'] = [therapy]

## Map GDSC2
Likewise, we map any almanac therapies to GDSC2 therapies

In [7]:
almanac['therapy_name'][almanac['therapy_name'].isin(gdsc2)].sort_values()

185    5-Fluorouracil
474           AZD3759
71            AZD8186
466          Afatinib
656         Alpelisib
14         Bortezomib
103         Cisplatin
58         Crizotinib
357        Dabrafenib
212         Dasatinib
463         Erlotinib
26          Gefitinib
230               JQ1
30          Lapatinib
319           MK-2206
219         Nilotinib
90          Niraparib
11           Olaparib
462       Osimertinib
13        Palbociclib
70         Pictilisib
542       Ruxolitinib
9         Selumetinib
237         Sorafenib
101       Talazoparib
68          Tamoxifen
363        Trametinib
Name: therapy_name, dtype: object

In [8]:
gdsc2

19581     5-Fluorouracil
105110            ABT737
107352          AGI-5198
70082           AGI-6780
126888           AMG-319
               ...      
64214            Wnt-C59
38410             XAV939
35075           YK-4-279
14067           ZM447439
84651        Zoledronate
Name: DRUG_NAME, Length: 192, dtype: object

In [9]:
for therapy in almanac['therapy_name'][almanac['therapy_name'].isin(gdsc2)].sort_values().tolist():
    dictionary[therapy]['gdsc'] = [therapy]

dictionary['AMG 510']['gdsc'] = ['KRAS (G12C) Inhibitor-12']
dictionary['nutlin-3']['gdsc'] = ['Nutlin-3a (-)']
dictionary['Vemurafenib']['gdsc'] = ['PLX-4720']

## Combination therapies
Some therapies catalogued in the Molecular Oncology Almanac are cocktails, we catalogue them here.

In [10]:
almanac['therapy_name'][almanac['therapy_name'].str.contains('\+')]

272                           Azacitidine + Panobinostat
208                               Bevacizumab + Olaparib
575                              Buparlisib + Trametinib
38                Capecitabine + Trastuzumab + Tucatinib
54             Carbogen and nicotinamide  + radiotherapy
367                              Cetuximab + Encorafenib
400                              Cetuximab + Vemurafenib
41     Chemotherapy + Hyaluronidase-zzxf + Pertuzumab...
17                          Chemotherapy + Pembrolizumab
37                            Chemotherapy + Trastuzumab
373                            Cobimetinib + Vemurafenib
358                              Dabrafenib + Trametinib
42     Docetaxel + Hyaluronidase-zzxf + Pertuzumab + ...
470                               Durvalumab + Gefitinib
469                             Durvalumab + Osimertinib
589                         FGFR1 inhibitor + Trametinib
590                        FGFR1 inhibitor + Trametinib 
391                            

In [11]:
dictionary['Buparlisib + Trametinib']['gdsc'] = ['Buparlisib', 'Trametinib']
dictionary['Cetuximab + Vemurafenib']['gdsc'] = ['PLX-4720', 'Cetuximab']
dictionary['Cobimetinib + Vemurafenib']['gdsc'] = ['PLX-4720']
dictionary['Dabrafenib + Trametinib']['gdsc'] = ['Dabrafenib', 'Trametinib']
dictionary['Durvalumab + Osimertinib']['gdsc'] = ['Osimertinib']
dictionary['FGFR1 inhibitor + Trametinib']['gdsc'] = ['Trametinib']
dictionary['GANT61 + Obatoclax']['gdsc'] = ['Obatoclax Mesylate']
dictionary['Ipilimumab + Vemurafenib']['gdsc'] = ['PLX-4720']
dictionary['Lapatinib + Trastuzumab']['gdsc'] = ['Lapatinib']
dictionary['Letrozole + Tamoxifen']['gdsc'] = ['Tamoxifen']
dictionary['Neratinib + Vemurafenib']['gdsc'] = ['PLX-4720']
dictionary['Selumetinib + Vemurafenib']['gdsc'] = ['PLX-4720', 'Selumetinib']
dictionary['Palbociclib + Trametinib']['gdsc'] = ['Palbociclib', 'Trametinib']
dictionary['Trametinib + Vemurafenib']['gdsc'] = ['PLX-4720', 'Trametinib']

In [12]:
catalogued_therapies = []
count = 0
for key in list(dictionary.keys()):
    if dictionary[key]['gdsc']:
        print(key)
        count += 1
        catalogued_therapies.append(key)
count

5-Fluorouracil
AMG 510
AZD3759
AZD8186
Afatinib
Alectinib
Alpelisib
Bleomycin
Bortezomib
Bosutinib
Buparlisib + Trametinib
Cabozantinib
Cetuximab
Cetuximab + Vemurafenib
Cisplatin
Cobimetinib + Vemurafenib
Crizotinib
Dabrafenib
Dabrafenib + Trametinib
Dasatinib
Durvalumab + Osimertinib
Erlotinib
FGFR1 inhibitor + Trametinib
GANT61 + Obatoclax
Gefitinib
Imatinib
Ipilimumab + Vemurafenib
JQ1
Lapatinib
Lapatinib + Trastuzumab
Lenalidomide
Letrozole + Tamoxifen
MK-2206
Neratinib + Vemurafenib
Nilotinib
Niraparib
Olaparib
Omipalisib
Osimertinib
Palbociclib
Palbociclib + Trametinib
Pazopanib
Pictilisib
Ponatinib
Quizartinib
Rucaparib
Ruxolitinib
SU11274
Selumetinib
Selumetinib + Vemurafenib
Sorafenib
Sunitinib
Talazoparib
Tamoxifen
Trametinib
Trametinib + Vemurafenib
Veliparib
Vemurafenib
nutlin-3


59

## Export

In [13]:
def write(handle, dictionary):
    with open(handle, 'w') as json_handle:
        json.dump(dictionary, json_handle, sort_keys=True, indent=4)

write('almanac-gdsc-mappings.json', dictionary)