In [4]:
import re

from toolz.dicttoolz import itemfilter,get_in
from toolz.itertoolz import partition_all,first
from tqdm.notebook import tqdm
import yaml
from Bio import Entrez
import requests

In [5]:
%load_ext watermark

In [6]:
# *Always* tell NCBI who you are
Entrez.email = "seth.sims@gmail.com"

def retrieve_annotation(id_list):
    """Annotates Entrez Gene IDs using Bio.Entrez, in particular epost (to
    submit the data to NCBI) and esummary to retrieve the information.
    Returns a list of dictionaries with the annotations."""

    request = Entrez.epost(db="gene", id=','.join(id_list))
    try:
        result = Entrez.read(request)
    except RuntimeError as e:
        # FIXME: How generate NAs instead of causing an error with invalid IDs?
        print("An error occurred while retrieving the annotations.")
        print("The error returned was %s" % e)
        print(','.join(id_list))
        return []

    webEnv = result["WebEnv"]
    queryKey = result["QueryKey"]
    data = Entrez.esummary(db="gene", webenv=webEnv, query_key=queryKey)
    annotations = Entrez.read(data)

    #print("Retrieved %d annotations for %d genes" % (len(annotations), len(id_list)))
    return get_in(['DocumentSummarySet', 'DocumentSummary'], annotations)

In [81]:
response = requests.get(
    'http://rest.genenames.org/fetch/symbol/LIPT2',
    headers={
        'Accept': 'application/json'
    }
)
print(response)

<Response [200]>


In [82]:
response.json()

{'responseHeader': {'status': 0, 'QTime': 1},
 'response': {'numFound': 1,
  'start': 0,
  'docs': [{'hgnc_id': 'HGNC:37216',
    'symbol': 'LIPT2',
    'name': 'lipoyl(octanoyl) transferase 2',
    'status': 'Approved',
    'locus_type': 'gene with protein product',
    'prev_name': ['lipoyl(octanoyl) transferase 2 (putative)'],
    'alias_name': ['lipoate-protein ligase B',
     'lipoyl/octanoyl transferase',
     'octanoyl-[acyl-carrier-protein]-protein N-octanoyltransferase'],
    'location': '11q13.4',
    'date_approved_reserved': '2009-09-09T00:00:00Z',
    'date_modified': '2021-12-07T00:00:00Z',
    'date_name_changed': '2017-08-08T00:00:00Z',
    'enzyme_id': ['2.3.1.181'],
    'entrez_id': '387787',
    'mgd_id': ['MGI:1914414'],
    'orphanet': 456990,
    'pubmed_id': [28628643, 28757203],
    'refseq_accession': ['NM_001144869'],
    'vega_id': 'OTTHUMG00000165646',
    'lsdb': ['LRG_1089|http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_1089.xml'],
    'ensembl_gene_id': 'ENS

In [7]:
with open('../data/gene-names.yaml') as in_fd:
    name_map = yaml.load(in_fd, yaml.FullLoader)

selection_pattern = re.compile(r'LOC\d+')
def predicate(itm):
    k,v = itm
    return v is None and k is not None and selection_pattern.fullmatch(k)

loc_map = itemfilter(predicate, name_map)
for ids in tqdm(list(partition_all(10, [name[3:].strip() for name in loc_map]))):
    resp = retrieve_annotation(ids)

    for doc in tqdm(resp, leave=False):
        response = requests.get(
            f'http://rest.genenames.org/fetch/entrez_id/{doc["CurrentID"]}',
            headers={
                'Accept': 'application/json'
            }
        )
        if not requests.codes.ok:
            continue

        hgnc_data = response.json()['response']
        if hgnc_data['numFound'] != 1:
            continue

        name_map[doc['Name']] = hgnc_data['docs'][0]['symbol']
        
with open('../data/gene-names.yaml', 'wt') as out_fd:
    yaml.dump(name_map, stream=out_fd)

0it [00:00, ?it/s]

In [3]:
def predicate(itm):
    k,v = itm
    return v is None and k is not None

with open('../data/gene-names.yaml') as in_fd:
    name_map = yaml.load(in_fd, yaml.FullLoader)

skipped = 0
to_update = itemfilter(predicate, name_map)
with tqdm(desc='overall progress', total=len(to_update)) as pbar:
    for ids in partition_all(100, to_update):
        for symbol in tqdm(ids, leave=False, desc='Current batch'):
            response = requests.get(
                f'http://rest.genenames.org/fetch/symbol/{symbol}',
                headers={
                    'Accept': 'application/json'
                }
            )
            if not requests.codes.ok:
                continue

            hgnc_data = response.json()['response']
            if hgnc_data['numFound'] != 1:
                name_map[symbol] = False
                skipped += 1
                pbar.set_description(f'overall progress (skipped: {skipped})')
            else:
                name_map[symbol] = hgnc_data['docs'][0]['symbol']
                
            pbar.update()
            
        with open('../data/gene-names.yaml', 'wt') as out_fd:
            yaml.dump(name_map, stream=out_fd)

overall progress:   0%|          | 0/18681 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/100 [00:00<?, ?it/s]

Current batch:   0%|          | 0/81 [00:00<?, ?it/s]

In [43]:
# *Always* tell NCBI who you are
Entrez.email = "seth.sims@gmail.com"

def search_entrez(this_id):
    request = Entrez.esearch(db="gene", term=f'({this_id}[gene])AND(Homo sapiens[orgn])', retmax=1)
    try:
        result = Entrez.read(request)
        return get_in(['IdList', 0], result, no_default=True)
    except Exception as e:
        # FIXME: How generate NAs instead of causing an error with invalid IDs?
        print("An error occurred while retrieving the annotations.")
        print("The error returned was %s" % e)
        print(this_id)
        return None
    
def retrieve_annotation(id_list):
    """Annotates Entrez Gene IDs using Bio.Entrez, in particular epost (to
    submit the data to NCBI) and esummary to retrieve the information.
    Returns a list of dictionaries with the annotations."""

    request = Entrez.epost(db="gene", id=','.join(id_list))
    try:
        result = Entrez.read(request)
    except RuntimeError as e:
        # FIXME: How generate NAs instead of causing an error with invalid IDs?
        print("An error occurred while retrieving the annotations.")
        print("The error returned was %s" % e)
        print(','.join(id_list))
        return []

    webEnv = result["WebEnv"]
    queryKey = result["QueryKey"]
    data = Entrez.esummary(db="gene", webenv=webEnv, query_key=queryKey)
    annotations = Entrez.read(data)

    #print("Retrieved %d annotations for %d genes" % (len(annotations), len(id_list)))
    return get_in(['DocumentSummarySet', 'DocumentSummary'], annotations, no_default=True)

def predicate(itm):
    k,v = itm
    return not v and k is not None

with open('../data/gene-names.yaml') as in_fd:
    name_map = yaml.load(in_fd, yaml.FullLoader)

skipped = 0
to_update = itemfilter(predicate, name_map)
ids = {}
with tqdm(desc='searching symbols', total=len(to_update)) as pbar:
    for symbols in partition_all(100, to_update):
        ids.update({
            search_entrez(symbol) : symbol
                for symbol in tqdm(symbols, leave=False)
        })
        pbar.update(n=len(symbols))
        
with tqdm(desc='Updating Names', total=len(ids)) as pbar:
    for id_batch in tqdm(partition_all(100, ids), total=(len(ids)+50)//100, leave=False):
        resp = retrieve_annotation(filter(bool, id_batch))
        
        for this_id,doc in tqdm(zip(id_batch, resp), leave=False, total=len(id_batch)):
            pbar.update()
            name_map[ids[this_id]] = str(doc['Name'])
            
        with open('../data/gene-names.yaml', 'wt') as out_fd:
            yaml.dump(name_map, stream=out_fd)

searching symbols:   0%|          | 0/1484 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

An error occurred while retrieving the annotations.
The error returned was list index out of range
LOC157542
An error occurred while retrieving the annotations.
The error returned was list index out of range
2017/12/01


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

An error occurred while retrieving the annotations.
The error returned was list index out of range
FLJ10038
An error occurred while retrieving the annotations.
The error returned was list index out of range
FLJ26850
An error occurred while retrieving the annotations.
The error returned was list index out of range
FLJ33360
An error occurred while retrieving the annotations.
The error returned was list index out of range
FLJ34503
An error occurred while retrieving the annotations.
The error returned was list index out of range
FLJ36777
An error occurred while retrieving the annotations.
The error returned was list index out of range
LOC100130417
An error occurred while retrieving the annotations.
The error returned was list index out of range
FLJ41941
An error occurred while retrieving the annotations.
The error returned was list index out of range
FLJ45079


  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

An error occurred while retrieving the annotations.
The error returned was list index out of range
LOC100126784
An error occurred while retrieving the annotations.
The error returned was list index out of range
LOC100128239
An error occurred while retrieving the annotations.
The error returned was list index out of range
LOC100129055
An error occurred while retrieving the annotations.
The error returned was list index out of range
LOC100129550
An error occurred while retrieving the annotations.
The error returned was list index out of range
LOC100130264
An error occurred while retrieving the annotations.
The error returned was list index out of range
LOC100132111
An error occurred while retrieving the annotations.
The error returned was list index out of range
LOC100132215
An error occurred while retrieving the annotations.
The error returned was list index out of range
LOC100132831
An error occurred while retrieving the annotations.
The error returned was list index out of range
LOC10

  0%|          | 0/100 [00:00<?, ?it/s]

An error occurred while retrieving the annotations.
The error returned was list index out of range
LOC151174
An error occurred while retrieving the annotations.
The error returned was list index out of range
LOC152225
An error occurred while retrieving the annotations.
The error returned was list index out of range
LOC153684
An error occurred while retrieving the annotations.
The error returned was list index out of range
LOC220729
An error occurred while retrieving the annotations.
The error returned was list index out of range
LOC221122
An error occurred while retrieving the annotations.
The error returned was list index out of range
LOC283332
An error occurred while retrieving the annotations.
The error returned was list index out of range
LOC283856
An error occurred while retrieving the annotations.
The error returned was list index out of range
LOC283922
An error occurred while retrieving the annotations.
The error returned was list index out of range
LOC284023
An error occurred w

  0%|          | 0/100 [00:00<?, ?it/s]

An error occurred while retrieving the annotations.
The error returned was list index out of range
2017/03/11
An error occurred while retrieving the annotations.
The error returned was list index out of range
2017/03/01
An error occurred while retrieving the annotations.
The error returned was list index out of range
2017/03/02
An error occurred while retrieving the annotations.
The error returned was list index out of range
2017/03/04
An error occurred while retrieving the annotations.
The error returned was list index out of range
2017/03/05
An error occurred while retrieving the annotations.
The error returned was list index out of range
2017/03/06
An error occurred while retrieving the annotations.
The error returned was list index out of range
2017/03/07
An error occurred while retrieving the annotations.
The error returned was list index out of range
2017/03/08
An error occurred while retrieving the annotations.
The error returned was list index out of range
2017/03/09
An error o

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

An error occurred while retrieving the annotations.
The error returned was list index out of range
PP14571
An error occurred while retrieving the annotations.
The error returned was list index out of range
LOC391003


  0%|          | 0/100 [00:00<?, ?it/s]

An error occurred while retrieving the annotations.
The error returned was list index out of range
RhDVa(TT)


  0%|          | 0/100 [00:00<?, ?it/s]

An error occurred while retrieving the annotations.
The error returned was list index out of range
2017/02/03
An error occurred while retrieving the annotations.
The error returned was list index out of range
2017/09/15
An error occurred while retrieving the annotations.
The error returned was list index out of range
2017/09/10
An error occurred while retrieving the annotations.
The error returned was list index out of range
2017/09/11
An error occurred while retrieving the annotations.
The error returned was list index out of range
2017/09/12
An error occurred while retrieving the annotations.
The error returned was list index out of range
2017/09/14
An error occurred while retrieving the annotations.
The error returned was list index out of range
2017/09/01
An error occurred while retrieving the annotations.
The error returned was list index out of range
2017/09/02
An error occurred while retrieving the annotations.
The error returned was list index out of range
2017/09/03
An error o

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/84 [00:00<?, ?it/s]

Updating Names:   0%|          | 0/1357 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/57 [00:00<?, ?it/s]

In [3]:
with open('../data/gene-names.yaml') as in_fd:
    name_map = yaml.load(in_fd, yaml.FullLoader)

def predicate(itm):
    k,v = itm
    return not v

to_update = itemfilter(predicate, name_map)
print('names left unresolved', len(to_update))
to_update

names left unresolved 0


{}

In [None]:
%watermark \
--python \
--packages Bio  \
--date --updated