In [32]:
import json
import requests 
import csv

### Initial Parameters configuration:

In [33]:
csvPath = 'gene_results.csv'
#--------------------------
#Mutation URL:
URL_base = "https://www.cbioportal.org/api/molecular-profiles/"
branch_mutations = "gbm_tcga_mutations/mutations/"
para_mutations = "fetch?projection=SUMMARY&pageSize=10000000&pageNumber=0&direction=ASC"
#------------------------
#Copy Num URL:
branch_copynum = "gbm_tcga_gistic/discrete-copy-number/"
para_copynum = "fetch?discreteCopyNumberEventType=ALL&projection=SUMMARY"
#------------------------
sampleListId = 'gbm_tcga_cnaseq'

In [34]:
# Initialize variables:
list_geneId = []
list_geneName = ['TP53','MDM2','MDM4']  #list_geneName = ['TP53','MDM2','MDM4']

### EntrezGeneIds LookUp:

In [35]:
# csvPath = 'gene_results.csv'
def convertCsvToDict(csvPath):
    reader = csv.reader(open(csvPath, 'r'))
    dic = {}
    for row in reader:
        geneId, geneName = row
        dic[geneName] = geneId
    return dic

In [36]:
csv_to_dic = convertCsvToDict(csvPath) #{'TP53': '7157'

In [37]:
def lookup(csv_to_dic, key):
    if not key: return 'Please type geneName.'
    if not key in csv_to_dic or not csv_to_dic[key] : return 'No such a geneId.'
    
    val = csv_to_dic[key]
    
    return val

In [38]:
def getGeneId(csv_to_dic, list_geneName):
    dict_geneId_geneName = {}
    for geneName in list_geneName:
        geneId = lookup(csv_to_dic, geneName)
        dict_geneId_geneName[geneName] = geneId
    print(dict_geneId_geneName)
    return dict_geneId_geneName

    list_geneId = [int(val) for val in dict_geneId_geneName.values()]
    

In [39]:
dict_geneId_geneName = getGeneId(csv_to_dic, list_geneName)
list_geneId = [int(val) for val in dict_geneId_geneName.values()]
list_geneId

{'TP53': '7157', 'MDM2': '4193', 'MDM4': '4194'}


[7157, 4193, 4194]

### REST processing:

#### Make a post request:

In [40]:
def POSTRequester(URL, json_body):
    r = requests.post(URL, json=json_body)
    return r.json()

In [41]:
URL_mutation = URL_base + branch_mutations + para_mutations
#size_mutations = len(POSTRequester(URL_mutation, json_)) #geneId=7157, sampleListId=gbm_tcga_cnaseq

#### Make a GET request:

In [42]:
def GETRequester(URL):
    r = requests.get(URL)
    return r.json()

In [43]:
# Get patients numbers(cases)
URL_patients = "https://www.cbioportal.org/api/studies/gbm_tcga/patients/"
size_patients = len(GETRequester(URL_patients)) 
size_patients

606

#### Group Mutation Count:

In [44]:
def GetGroupMutationCount(list_geneId):
    dict_mutations = {}

    json_={"entrezGeneIds": 
        list_geneId
      ,
      "sampleListId": sampleListId
    }

    size_mutations = POSTRequester(URL_mutation, json_)
    
    for geneId in list_geneId:
        count = 0
        print(geneId)
        
        for item in size_mutations:
            if item['entrezGeneId']== geneId:
                count += 1
                dict_mutations[geneId] = count
    
    return dict_mutations

In [45]:
dict_mutations = GetGroupMutationCount(list_geneId)
dict_mutations

7157
4193
4194


{7157: 95, 4193: 2, 4194: 1}

#### Group mutation rates:

In [46]:
group_mutation_rates = {}
for geneid, mutation in dict_mutations.items():
    group_mutation_rates[geneid] = round(mutation/size_patients,3) * 100
print(group_mutation_rates)

{7157: 15.7, 4193: 0.3, 4194: 0.2}


#### Get  Copynum:

In [47]:
def getCopyNum(URL_copynum, list_geneId):
    
    dict_countCopyNum = {}
    
    json_={"entrezGeneIds": 
        list_geneId #geneId=7157
      ,
      "sampleListId": sampleListId
    }
    
    json_res = POSTRequester(URL_copynum, json_)
    
    for geneId in list_geneId:
        count = 0
        for item in json_res:
            if item['entrezGeneId']==geneId and item['alteration'] != 0 and item['alteration'] != 'NA':
                count += 1
        dict_countCopyNum[geneId] = count
        
    return dict_countCopyNum

In [48]:
URL_copynum = URL_base + branch_copynum + para_copynum
#list_geneId = [7157, 3569, 7040]

dict_countCopyNum = getCopyNum(URL_copynum, list_geneId)
print(dict_countCopyNum)

{7157: 61, 4193: 65, 4194: 83}


#### Group copy number altered rate:

In [49]:
def getGroupCopyNumAlteredRate(dict_countCopyNum, size_patients):
    group_CopyNumAlteredRates = {}
    
    for geneId, copyNum in dict_countCopyNum.items():
        group_CopyNumAlteredRates[geneId] = round(copyNum/size_patients, 3) * 100
        
    return group_CopyNumAlteredRates

In [50]:
group_CopyNumAlteredRates = getGroupCopyNumAlteredRate(dict_countCopyNum, size_patients)
group_CopyNumAlteredRates

{7157: 10.100000000000001, 4193: 10.7, 4194: 13.700000000000001}

#### Combine Single mutation & copy number altered:

In [51]:
def combMutationAndCopyNum(group_mutation_rates, group_CopyNumAlteredRates):
    new_dict = {}
    for id_mut, rate_mut in group_mutation_rates.items():
        for id_co, rate_co in group_CopyNumAlteredRates.items():
            if id_mut == id_co:
                id_ = id_mut
                new_dict[id_] = round(rate_mut + rate_co, 3)
    return new_dict

In [52]:
combMutationAndCopyNum(group_mutation_rates, group_CopyNumAlteredRates)

{7157: 25.8, 4193: 11.0, 4194: 13.9}

### Output:

In [53]:
def checkGeneNames(dict_geneId_geneName, geneId):
    for name, gid in dict_geneId_geneName.items():
            if int(gid)== int(geneId):
                return name

In [54]:
group_mutation_rates

{7157: 15.7, 4193: 0.3, 4194: 0.2}

In [55]:
group_CopyNumAlteredRates

{7157: 10.100000000000001, 4193: 10.7, 4194: 13.700000000000001}

In [56]:
def output(group_mutation_rates, group_CopyNumAlteredRates):
    if len(group_mutation_rates)!= len(group_CopyNumAlteredRates):
        return "Error in length."
    
    length = len(group_CopyNumAlteredRates)
    if length==0:
        return "No Gene."
    
    elif length == 1:
        geneName = checkGeneNames(dict_geneId_geneName, list(group_mutation_rates.keys())[0])
        print(geneName, 'is mutated in ', list(group_mutation_rates.values())[0], '% of all cases.')
        
        print(geneName, ' is copy number altered in ', list(group_CopyNumAlteredRates.values())[0], '% of all cases.')
        
        comb_rate = combMutationAndCopyNum(group_mutation_rates, group_CopyNumAlteredRates)
        print('Cases with at least one mutation or copy number alteration in ', geneName, ': ', list(comb_rate.values())[0], '% of all cases.')
    
    elif length >= 1:
        comb_rate = combMutationAndCopyNum(group_mutation_rates, group_CopyNumAlteredRates)
        total_rate = 0
        for geneId, rate in comb_rate.items():
            geneName = checkGeneNames(dict_geneId_geneName, geneId)
            print(geneName, 'is mutated or copy number altered in ', rate ,'% of cases.')
            total_rate += rate
        print('Cases with at least one mutation or copy number alteration in one of the genes:', round(total_rate), '%')

In [57]:
output(group_mutation_rates, group_CopyNumAlteredRates)

TP53 is mutated or copy number altered in  25.8 % of cases.
MDM2 is mutated or copy number altered in  11.0 % of cases.
MDM4 is mutated or copy number altered in  13.9 % of cases.
Cases with at least one mutation or copy number alteration in one of the genes: 51 %
