In [64]:
import requests
from itertools import chain

In [86]:
api_url = "http://ec2-52-15-200-208.us-east-2.compute.amazonaws.com/"

## Start with the HGNC symbols for the FA genes

In [87]:
fa_genes = ['FANCA', 'FANCB', 'FANCC', 'FANCE', 'FANCF', 'FANCG', 'FANCL', 'FANCM', 'FANCD2', 'FANCI', 'UBE2T', 'FANCD1', 'BRCA2', 'FANCJ', 'FANCN', 'FANCO', 'FANCP', 'FANCQ', 'FANCR', 'FANCS', 'FANCV', 'FANCU']
len(fa_genes)

22

### Get Wikidata ids for these HGNC symbols from wikidata using garbanzo

In [88]:
c = ' '.join(["HGNCS:" + x for x in fa_genes])
endpoint = "translator/exactMatches/"
params = {'c': c}
r = requests.get(api_url + endpoint, params=params)
qids = [x for x in r.json() if "wd" in x]

In [89]:
print(qids)
print(len(qids))

['wd:Q18041564', 'wd:Q17927502', 'wd:Q18041663', 'wd:Q18044458', 'wd:Q18250517', 'wd:Q17927069', 'wd:Q17853272', 'wd:Q17927077', 'wd:Q17927056', 'wd:Q18039587', 'wd:Q17927471', 'wd:Q17927524']
12


### Get all orthologs from wikidata using garbanzo

In [101]:
endpoint = 'translator/statements'
params = {'c': qids}
r = requests.get(api_url + endpoint, params=params)
print(r.url)

http://ec2-52-15-200-208.us-east-2.compute.amazonaws.com/translator/statements?c=wd%3AQ18041564&c=wd%3AQ17927502&c=wd%3AQ18041663&c=wd%3AQ18044458&c=wd%3AQ18250517&c=wd%3AQ17927069&c=wd%3AQ17853272&c=wd%3AQ17927077&c=wd%3AQ17927056&c=wd%3AQ18039587&c=wd%3AQ17927471&c=wd%3AQ17927524


In [102]:
r.json()

[{'id': 'Q18041663-13943EC0-4544-4251-A4DC-14A0B38A82BC',
  'object': {'id': 'wd:Q7187', 'name': 'gene'},
  'predicate': {'id': 'wd:P31', 'name': 'instance of'},
  'subject': {'id': 'wd:Q18041663', 'name': 'FANCI'}},
 {'id': 'Q18044458-38CCF3C6-9277-4943-9A21-C4D76AC10FB8',
  'object': {'id': 'wd:Q7187', 'name': 'gene'},
  'predicate': {'id': 'wd:P31', 'name': 'instance of'},
  'subject': {'id': 'wd:Q18044458', 'name': 'FANCM'}},
 {'id': 'Q17927524-1D71748E-4C58-479F-8ACA-E9B2878E1E67',
  'object': {'id': 'wd:Q7187', 'name': 'gene'},
  'predicate': {'id': 'wd:P31', 'name': 'instance of'},
  'subject': {'id': 'wd:Q17927524', 'name': 'FANCG'}},
 {'id': 'Q17927502-D9BDB5EE-663A-4CD8-8022-37E8A5F6E1DF',
  'object': {'id': 'wd:Q7187', 'name': 'gene'},
  'predicate': {'id': 'wd:P31', 'name': 'instance of'},
  'subject': {'id': 'wd:Q17927502', 'name': 'FANCF'}},
 {'id': 'Q18039587-C2FE6736-C0F1-4E15-9874-7EEB04231FFF',
  'object': {'id': 'wd:Q7187', 'name': 'gene'},
  'predicate': {'id': 'wd:

In [103]:
ortholog_statements = [x for x in r.json() if x['predicate']['name'] == "ortholog"]
ortholog_qids = set(chain(*[(x['object']['id'], x['subject']['id']) for x in ortholog_statements]))
print(len(ortholog_qids))

35


In [104]:
## Get ncbi gene ids for these 35
endpoint = "translator/exactMatches/"
params = {'c': ' '.join(ortholog_qids)}
r = requests.get(api_url + endpoint, params=params)
ncbi = [x for x in r.json() if "NCBI" in x]

In [105]:
ncbi

['NCBIGene:29089',
 'NCBIGene:361435',
 'NCBIGene:104806',
 'NCBIGene:72775',
 'NCBIGene:100360594',
 'NCBIGene:60534',
 'NCBIGene:100040608',
 'NCBIGene:67030',
 'NCBIGene:2187',
 'NCBIGene:211651',
 'NCBIGene:691105',
 'NCBIGene:2177',
 'NCBIGene:208836',
 'NCBIGene:501552',
 'NCBIGene:499155',
 'NCBIGene:675',
 'NCBIGene:2189',
 'NCBIGene:305600',
 'NCBIGene:312641',
 'NCBIGene:14087',
 'NCBIGene:309643',
 'NCBIGene:2176',
 'NCBIGene:67196',
 'NCBIGene:2188',
 'NCBIGene:55120',
 'NCBIGene:2178',
 'NCBIGene:2175',
 'NCBIGene:57697',
 'NCBIGene:55215',
 'NCBIGene:360254',
 'NCBIGene:360847',
 'NCBIGene:14088',
 'NCBIGene:24361',
 'NCBIGene:12190',
 'NCBIGene:237211']