# Create a mapping between therapies catalogued in the Molecular Oncology Almanac and those profiles in GDSC
We read the fitted dose response curves from GDSC2 and create a dictionary, mapping almanac therapies of keys to a list of GDSC therapies. 

In summary,
- The Molecular Oncology Almanac contains 124 therapies. 
- GDSC2 tested 192 therapies and GDSC1 tested 345, 257 of which do not appear in GDSC1.
- We mapped almanac therapies to GDSC, with GDSC2 measurements taking priority. 
- In all, 59 therapies were mapped to GDSC therapies

To do items
- Relabel combination therapies so they are always alphabetically sorted
- All therapies, even those in combinations, should have the first letter capitalized

In [1]:
import glob
import pandas as pd
import json

gdsc1 = pd.read_excel('source/gdsc/GDSC1_fitted_dose_response_25Feb20.xlsx')
gdsc1 = gdsc1['DRUG_NAME'].drop_duplicates().sort_values()

gdsc2 = pd.read_excel('source/gdsc/GDSC2_fitted_dose_response_25Feb20.xlsx')
gdsc2 = gdsc2['DRUG_NAME'].drop_duplicates().sort_values()

gdsc1 = gdsc1[~gdsc1.isin(gdsc2)]

handles = glob.glob('/Users/brendan/Github/moalmanac-admin/database/almanac/*')
almanac = []

columns = [ 'therapy_name', 'therapy_type']

for handle in handles:
    dtype = handle.split('/')[-1].split('.tsv')[0]
    tmp = pd.read_csv(handle, sep='\t')
    tmp['dtype'] = dtype
    almanac.append(tmp.loc[:, columns])
    
almanac = pd.concat(almanac, ignore_index=True)
almanac = almanac[~almanac['therapy_name'].isnull()].sort_values('therapy_name')
almanac['therapy_type'] = almanac['therapy_type'].replace('Targeted Therapy', 'Targeted therapy')
almanac.drop_duplicates(inplace=True)
almanac.shape

(124, 2)

In [2]:
dictionary = {}
for idx in almanac.index:
    therapy = almanac.loc[idx, 'therapy_name']
    therapy_type = almanac.loc[idx, 'therapy_type']
    dictionary[therapy] = {}
    dictionary[therapy]['type'] = therapy_type
    dictionary[therapy]['gdsc'] = []

## Map GDSC1
Earlier in the notebook we subset GDSC1 for therapies that do not appear in GDSC2. We map any almanac therapies to these drugs.

In [3]:
almanac['therapy_name'][almanac['therapy_name'].isin(gdsc1)].sort_values()

122       Alectinib
286       Bleomycin
216       Bosutinib
164    Cabozantinib
455       Cetuximab
161        Imatinib
632    Lenalidomide
284      Omipalisib
16        Pazopanib
141       Ponatinib
402     Quizartinib
311       Rucaparib
487         SU11274
721       Sunitinib
173       Veliparib
Name: therapy_name, dtype: object

In [4]:
for therapy in almanac['therapy_name'][almanac['therapy_name'].isin(gdsc1)].sort_values().tolist():
    dictionary[therapy]['gdsc'] = [therapy]

## Map GDSC2
Likewise, we map any almanac therapies to GDSC2 therapies

In [5]:
almanac['therapy_name'][almanac['therapy_name'].isin(gdsc2)].sort_values()

177    5-Fluorouracil
361           AZD3759
553           AZD8186
374          Afatinib
530         Alpelisib
664        Bortezomib
26          Cisplatin
706        Crizotinib
297        Dabrafenib
192         Dasatinib
485         Erlotinib
354         Gefitinib
131               JQ1
679         Lapatinib
231           MK-2206
210         Nilotinib
307          Olaparib
367       Osimertinib
134       Palbociclib
538        Pictilisib
423       Ruxolitinib
517       Selumetinib
240         Sorafenib
19        Talazoparib
712         Tamoxifen
300        Trametinib
Name: therapy_name, dtype: object

In [6]:
gdsc2

19581     5-Fluorouracil
105110            ABT737
107352          AGI-5198
70082           AGI-6780
126888           AMG-319
               ...      
64214            Wnt-C59
38410             XAV939
35075           YK-4-279
14067           ZM447439
84651        Zoledronate
Name: DRUG_NAME, Length: 192, dtype: object

In [7]:
for therapy in almanac['therapy_name'][almanac['therapy_name'].isin(gdsc2)].sort_values().tolist():
    dictionary[therapy]['gdsc'] = [therapy]

dictionary['AMG 510']['gdsc'] = ['KRAS (G12C) Inhibitor-12']
dictionary['nutlin-3']['gdsc'] = ['Nutlin-3a (-)']
dictionary['Vemurafenib']['gdsc'] = ['PLX-4720']

## Combination therapies
Some therapies catalogued in the Molecular Oncology Almanac are cocktails, we catalogue them here.

In [8]:
almanac['therapy_name'][almanac['therapy_name'].str.contains('\+')]

454                      Buparlisib + Trametinib
699    Carbogen and nicotinamide  + radiotherapy
268                      Dabrafenib + Trametinib
357                       Durvalumab + Gefitinib
356                     Durvalumab + Osimertinib
296                           GANT61 + Obatoclax
282                     Ipilimumab + Vemurafenib
294                  Ixazomib + Interferon-alpha
680                      Lapatinib + Trastuzumab
708           Neoadjuvant chemotherapy + surgery
93                        Nivolumab + ipilimumab
453                     Palbociclib + Trametinib
167                   Panobinostat + Azacitidine
325                    Pembrolizumab + Nivolumab
682                     Pertuzumab + Trastuzumab
386                        Tamoxifen + Letrozole
469                 Trametinib + FGFR1 Inhibitor
686                   Trastuzumab + Chemotherapy
677                      Trastuzumab + Lapatinib
305                      Vemurafenib + Cetuximab
278                 

In [9]:
dictionary['Buparlisib + Trametinib']['gdsc'] = ['Buparlisib', 'Trametinib']
dictionary['Dabrafenib + Trametinib']['gdsc'] = ['Dabrafenib', 'Trametinib']
dictionary['Durvalumab + Osimertinib']['gdsc'] = ['Osimertinib']
dictionary['GANT61 + Obatoclax']['gdsc'] = ['Obatoclax Mesylate']
dictionary['Ipilimumab + Vemurafenib']['gdsc'] = ['PLX-4720']
dictionary['Lapatinib + Trastuzumab']['gdsc'] = ['Lapatinib']
dictionary['Palbociclib + Trametinib']['gdsc'] = ['Palbociclib', 'Trametinib']
dictionary['Tamoxifen + Letrozole']['gdsc'] = ['Tamoxifen']
dictionary['Trametinib + FGFR1 Inhibitor']['gdsc'] = ['Trametinib']
dictionary['Trastuzumab + Lapatinib']['gdsc'] = ['Lapatinib']
dictionary['Vemurafenib + Cetuximab']['gdsc'] = ['PLX-4720', 'Cetuximab']
dictionary['Vemurafenib + Cobimetinib']['gdsc'] = ['PLX-4720']
dictionary['Vemurafenib + Neratinib']['gdsc'] = ['PLX-4720']
dictionary['Vemurafenib + Selumetinib']['gdsc'] = ['PLX-4720', 'Selumetinib']
dictionary['Vemurafenib + Trametinib']['gdsc'] = ['PLX-4720', 'Trametinib']

In [10]:
count = 0
for key in list(dictionary.keys()):
    if dictionary[key]['gdsc']:
        print(key)
        count += 1
count

5-Fluorouracil
AMG 510
AZD3759
AZD8186
Afatinib
Alectinib
Alpelisib
Bleomycin
Bortezomib
Bosutinib
Buparlisib + Trametinib
Cabozantinib
Cetuximab
Cisplatin
Crizotinib
Dabrafenib
Dabrafenib + Trametinib
Dasatinib
Durvalumab + Osimertinib
Erlotinib
GANT61 + Obatoclax
Gefitinib
Imatinib
Ipilimumab + Vemurafenib
JQ1
Lapatinib
Lapatinib + Trastuzumab
Lenalidomide
MK-2206
Nilotinib
Olaparib
Omipalisib
Osimertinib
Palbociclib
Palbociclib + Trametinib
Pazopanib
Pictilisib
Ponatinib
Quizartinib
Rucaparib
Ruxolitinib
SU11274
Selumetinib
Sorafenib
Sunitinib
Talazoparib
Tamoxifen
Tamoxifen + Letrozole
Trametinib
Trametinib + FGFR1 Inhibitor
Trastuzumab + Lapatinib
Veliparib
Vemurafenib
Vemurafenib + Cetuximab
Vemurafenib + Cobimetinib
Vemurafenib + Neratinib
Vemurafenib + Selumetinib
Vemurafenib + Trametinib
nutlin-3


59

## Export

In [11]:
def write(handle, dictionary):
    with open(handle, 'w') as json_handle:
        json.dump(dictionary, json_handle, sort_keys=True, indent=4)

write('almanac-gdsc-mappings.json', dictionary)