In [1]:
import bz2
import json
import os
import sys
import traceback
from pymongo import MongoClient
from tqdm import tqdm
from datetime import datetime
from requests import get


In [2]:

def create_indexes(db):
    # Specify the collections and their respective fields to be indexed
    index_specs = {
        'cache': ['cell', 'lastAccessed'],  # Example: Indexing 'cell' and 'type' fields in 'cache' collection
        'items': ['id_entity', 'entity', 'category', 'popularity'],
        'literals': ['id_entity', 'entity'],
        'mappings': ['curid', 'wikipedia_id', 'wikidata_id', 'dbpedia_id'],
        'objects': ['id_entity', 'entity'],
        'types': ['id_entity', 'entity']
    }

    for collection, fields in index_specs.items():
        if collection == "cache":
            db[collection].create_index([('cell', 1), ('fuzzy', 1), ('type', 1), ('kg', 1), ('limit', 1)], unique=True)
        elif collection == "items":
            db[collection].create_index([('entity', 1), ('category', 1)], unique=True)    
        for field in fields:
            db[collection].create_index([(field, 1)])  # 1 for ascending order


# Initial Estimation
initial_estimated_average_size = 800  # Initial average size in bytes, can be adjusted
BATCH_SIZE = 100 # Number of entities to insert in a single batch

if len(sys.argv) < 2:
    print("Usage: python script_name.py <path_to_wikidata_dump>")
    sys.exit(1)

file_path = './my-data/latest-all.json.bz2'  # Get the file path from command line argument
compressed_file_size = os.path.getsize(file_path)
initial_total_lines_estimate = compressed_file_size / initial_estimated_average_size

file = bz2.BZ2File(file_path, "r")

# MongoDB connection setup
MONGO_ENDPOINT, MONGO_ENDPOINT_PORT = os.environ["MONGO_ENDPOINT"].split(":")
MONGO_ENDPOINT_PORT = int(MONGO_ENDPOINT_PORT)
MONGO_ENDPOINT_USERNAME = os.environ["MONGO_INITDB_ROOT_USERNAME"]
MONGO_ENDPOINT_PASSWORD = os.environ["MONGO_INITDB_ROOT_PASSWORD"]
current_date = datetime.now()
formatted_date = current_date.strftime("%d%m%Y")
DB_NAME = f"wikidata{formatted_date}"

client = MongoClient(MONGO_ENDPOINT, MONGO_ENDPOINT_PORT, username=MONGO_ENDPOINT_USERNAME, password=MONGO_ENDPOINT_PASSWORD)
log_c = client.wikidata.log
items_c = client[DB_NAME].items
objects_c = client[DB_NAME].objects
literals_c = client[DB_NAME].literals
types_c = client[DB_NAME].types

c_ref = {
    "items": items_c,
    "objects":objects_c, 
    "literals":literals_c, 
    "types":types_c
}

create_indexes(client[DB_NAME])

buffer = {
    "items": [],
    "objects": [], 
    "literals": [], 
    "types": []
}

DATATYPES_MAPPINGS = {
    'external-id':'STRING',
    'quantity': 'NUMBER',
    'globe-coordinate': 'STRING',
    'string': 'STRING',
    'monolingualtext': 'STRING',
    'commonsMedia': 'STRING',
    'time': 'DATETIME',
    'url': 'STRING',
    'geo-shape': 'GEOSHAPE',
    'math': 'MATH',
    'musical-notation': 'MUSICAL_NOTATION',
    'tabular-data': 'TABULAR_DATA'
}
DATATYPES = list(set(DATATYPES_MAPPINGS.values()))
total_size_processed = 0
num_entities_processed = 0



def update_average_size(new_size):
    global total_size_processed, num_entities_processed
    total_size_processed += new_size
    num_entities_processed += 1
    return total_size_processed / num_entities_processed


def check_skip(obj, datatype):
    temp = obj.get("mainsnak", obj)
    if "datavalue" not in temp:
        return True

    skip = {
        "wikibase-lexeme",
        "wikibase-form",
        "wikibase-sense"
    }
    
    return datatype in skip


def get_value(obj, datatype):
    temp = obj.get("mainsnak", obj)
    if datatype == "globe-coordinate":
        latitude = temp["datavalue"]["value"]["latitude"]
        longitude = temp["datavalue"]["value"]["longitude"]
        value = f"{latitude},{longitude}"
    else:
        keys = {
            "quantity": "amount",
            "monolingualtext": "text",
            "time": "time",
        }
        if datatype in keys:
            key = keys[datatype]
            value = temp["datavalue"]["value"][key]
        else:
            value = temp["datavalue"]["value"]
    return value


def flush_buffer(buffer):
    for key in buffer:
        if len(buffer[key]) > 0:
            c_ref[key].insert_many(buffer[key])
            buffer[key] = []
            
def get_wikidata_item_tree_item_idsSPARQL(root_items, forward_properties=None, backward_properties=None):
    """Return ids of WikiData items, which are in the tree spanned by the given root items and claims relating them
        to other items.

    :param root_items: iterable[int] One or multiple item entities that are the root elements of the tree
    :param forward_properties: iterable[int] | None property-claims to follow forward; that is, if root item R has
        a claim P:I, and P is in the list, the search will branch recursively to item I as well.
    :param backward_properties: iterable[int] | None property-claims to follow in reverse; that is, if (for a root
        item R) an item I has a claim P:R, and P is in the list, the search will branch recursively to item I as well.
    :return: iterable[int]: List with ids of WikiData items in the tree
    """

    query = '''PREFIX wikibase: <http://wikiba.se/ontology#>
            PREFIX wd: <http://www.wikidata.org/entity/>
            PREFIX wdt: <http://www.wikidata.org/prop/direct/>
            PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>'''
    if forward_properties:
        query +='''SELECT ?WD_id WHERE {
                  ?tree0 (wdt:P%s)* ?WD_id .
                  BIND (wd:%s AS ?tree0)
                  }'''%( ','.join(map(str, forward_properties)),','.join(map(str, root_items)))
    elif backward_properties:
        query+='''SELECT ?WD_id WHERE {
                    ?WD_id (wdt:P%s)* wd:Q%s .
                    }'''%(','.join(map(str, backward_properties)), ','.join(map(str, root_items)))
    #print(query)

    url = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql'
    data = get(url, params={'query': query, 'format': 'json'}).json()
    
    ids = []
    for item in data['results']['bindings']:
        this_id=item["WD_id"]["value"].split("/")[-1].lstrip("Q")
        #print(item)
        try:
            this_id = int(this_id)
            ids.append(this_id)
            #print(this_id)
        except ValueError:
            #print("exception")
            continue
    return ids
            
def parse_data(item, i, geolocation_subclass, organization_subclass):
    entity = item["id"]
    labels = item.get("labels", {})
    aliases = item.get("aliases", {})
    description = item.get('descriptions', {}).get('en', {})
    category = "entity"
    sitelinks = item.get("sitelinks", {})
    popularity = len(sitelinks) if len(sitelinks) > 0 else 1
    
    all_labels = {}
    for lang in labels:
        all_labels[lang] = labels[lang]["value"]

    all_aliases = {}
    for lang in aliases:
        all_aliases[lang] = []
        for alias in aliases[lang]:
            all_aliases[lang].append(alias["value"])
        all_aliases[lang] = list(set(all_aliases[lang]))

    found = False
    for predicate in item["claims"]:
        if predicate == "P279":
            found = True

    if found:
        category = "type"
    if entity[0] == "P":
        category = "predicate"

    ###############################################################
    # ORGANIZATION EXTRACTION
    # All items with the root class Organization (Q43229) excluding country (Q6256), city (Q515), capitals (Q5119), 
    # administrative territorial entity of a single country (Q15916867), venue (Q17350442), sports league (Q623109) 
    # and family (Q8436)
    
    # LOCATION EXTRACTION
    # All items with the root class Geographic Location (Q2221906) excluding: food (Q2095), educational institution (Q2385804), 
    # government agency (Q327333), international organization (Q484652) and time zone (Q12143)
    
    # PERSON EXTRACTION
    # All items with the statement is instance of (P31) human (Q5) are classiﬁed as person.

    NERtype = None

    if item.get("type") == "item" and "claims" in item:
        p31_claims = item["claims"].get("P31", [])
        
        if len(p31_claims) != 0:           
            for claim in p31_claims:
                mainsnak = claim.get("mainsnak", {})
                datavalue = mainsnak.get("datavalue", {})
                numeric_id = datavalue.get("value", {}).get("numeric-id")
                
                if numeric_id == 5:
                    NERtype = "PERS" 
                elif numeric_id in geolocation_subclass or any(k.lower() in description.get('value', '').lower() for k in ["district", "city", "country", "capital"]):
                    NERtype = "LOC"
                elif numeric_id in organization_subclass:
                    NERtype = "ORG"  
                else:
                    NERtype = "OTHERS"
        else:
            NERtype = "OTHERS"
    
    ################################################################

    ################################################################   
    # URL EXTRACTION

    try:
        lang = labels.get("en", {}).get("language", "")
        tmp={}
        tmp["WD_id"] = item['id']
        tmp["WP_id"] = labels.get("en", {}).get("value", "")

        url_dict={}
        url_dict["WD_id_URL"] = "http://www.wikidata.org/wiki/"+tmp["WD_id"]
        url_dict["WP_id_URL"] = "http://"+lang+".wikipedia.org/wiki/"+tmp["WP_id"].replace(" ","_")
        url_dict["dbpedia_URL"] = "http://dbpedia.org/resource/"+tmp["WP_id"].capitalize().replace(" ","_")
        

    except json.decoder.JSONDecodeError:
       pass
    
    ################################################################    
    print(f"Item: {labels.get('en', {}).get('value', '')} --> NERtype: {NERtype}, kind: {category}, description: {description.get('value', '')}")

    objects = {}
    literals = {datatype: {} for datatype in DATATYPES}
    types = {"P31": []}
    join = {
        "items": {
            "id_entity": i,
            "entity": entity,
            "description": description,
            "labels": all_labels,
            "aliases": all_aliases,
            "types": types,
            "popularity": popularity,
            "category": category,   # kind (entity, type or predicate)
            ######################
            # new updates
            "NERtype": NERtype, # (ORG, LOC, PER or OTHERS)
            "URLs" : url_dict
            ######################
        },
        "objects": { 
            "id_entity": i,
            "entity": entity,
            "objects":objects
        },
        "literals": { 
            "id_entity": i,
            "entity": entity,
            "literals": literals
        },
        "types": { 
            "id_entity": i,
            "entity": entity,
            "types": types
        },
    }

    predicates = item["claims"]
    for predicate in predicates:
        for obj in predicates[predicate]:
            datatype = obj["mainsnak"]["datatype"]

            if check_skip(obj, datatype):
                continue

            if datatype == "wikibase-item" or datatype == "wikibase-property":
                value = obj["mainsnak"]["datavalue"]["value"]["id"]

                if predicate == "P31" or predicate == "P106":
                    types["P31"].append(value)

                if value not in objects:
                    objects[value] = []
                objects[value].append(predicate)    
            else:
                value = get_value(obj, datatype)                
                lit = literals[DATATYPES_MAPPINGS[datatype]]

                if predicate not in lit:
                    lit[predicate] = []
                lit[predicate].append(value)   

     

    for key in buffer:
        buffer[key].append(join[key])            

    if len(buffer["items"]) == BATCH_SIZE:
        flush_buffer(buffer)


def parse_wikidata_dump():            
    global initial_total_lines_estimate

    try:
        geolocation_subclass = get_wikidata_item_tree_item_idsSPARQL([2221906], backward_properties=[279])
        food_subclass =  get_wikidata_item_tree_item_idsSPARQL([2095], backward_properties=[279])
        edInst_subclass =  get_wikidata_item_tree_item_idsSPARQL([2385804], backward_properties=[279])
        govAgency_subclass =  get_wikidata_item_tree_item_idsSPARQL([327333], backward_properties=[279])
        intOrg_subclass =  get_wikidata_item_tree_item_idsSPARQL([484652], backward_properties=[279])
        timeZone_subclass =  get_wikidata_item_tree_item_idsSPARQL([12143], backward_properties=[279])    
        geolocation_subclass = list(set(geolocation_subclass)-set(food_subclass)-set(edInst_subclass)-set(govAgency_subclass)-
                                set(intOrg_subclass)-set(timeZone_subclass))
        
        organization_subclass=get_wikidata_item_tree_item_idsSPARQL([43229], backward_properties=[279])    
        country_subclass =  get_wikidata_item_tree_item_idsSPARQL([6256], backward_properties=[279])    
        city_subclass =  get_wikidata_item_tree_item_idsSPARQL([515], backward_properties=[279])    
        capitals_subclass =  get_wikidata_item_tree_item_idsSPARQL([5119], backward_properties=[279])
        admTerr_subclass =  get_wikidata_item_tree_item_idsSPARQL([15916867], backward_properties=[279])
        family_subclass =  get_wikidata_item_tree_item_idsSPARQL([17350442], backward_properties=[279])
        sportLeague_subclass =  get_wikidata_item_tree_item_idsSPARQL([623109], backward_properties=[279])
        venue_subclass =  get_wikidata_item_tree_item_idsSPARQL([8436], backward_properties=[279])
        organization_subclass = list(set(organization_subclass)-set(country_subclass)-set(city_subclass)-
                                set(capitals_subclass)-set(admTerr_subclass)-set(family_subclass) -
                                set(sportLeague_subclass)-set(venue_subclass))
        
    except json.decoder.JSONDecodeError:
        pass

    pbar = tqdm(total=initial_total_lines_estimate)
    for i, line in enumerate(file):
        try:
            item = json.loads(line[:-2])  # Remove the trailing characters
            line_size = len(line)
            current_average_size = update_average_size(line_size)

            # Dynamically update the total based on the current average size
            pbar.total = round(compressed_file_size / current_average_size)
            pbar.update(1)

            parse_data(item, i, geolocation_subclass, organization_subclass)
        except json.decoder.JSONDecodeError:
            continue
        except Exception as e:
            traceback_str = traceback.format_exc()
            log_c.insert_one({"entity": item["id"], "error": str(e), "traceback_str": traceback_str})

    if len(buffer["items"]) > 0:
        flush_buffer(buffer)

    pbar.close()

parse_wikidata_dump()
final_average_size = total_size_processed / num_entities_processed
print(f"Final average size of an entity: {final_average_size} bytes")
# Optionally store this value for future use



  0%|          | 12/408160 [00:00<2:53:09, 39.29it/s]

Item: Belgium --> NERtype: LOC, kind: entity, description: country in western Europe
Item: happiness --> NERtype: OTHERS, kind: type, description: mental or emotional state of well-being characterized by pleasant emotions
Item: George Washington --> NERtype: PERS, kind: entity, description: president of the United States from 1789 to 1797
Item: Jack Bauer --> NERtype: OTHERS, kind: entity, description: character from the television series 24
Item: Douglas Adams --> NERtype: PERS, kind: entity, description: English author and humourist (1952–2001)
Item: Paul Otlet --> NERtype: PERS, kind: entity, description: Belgian author, librarian and anti-colonial thinker
Item: Wikidata --> NERtype: OTHERS, kind: entity, description: free knowledge graph hosted by Wikimedia and edited by volunteers
Item: Portugal --> NERtype: LOC, kind: entity, description: country in Southwestern Europe
Item: Antarctica --> NERtype: LOC, kind: entity, description: polar continent in the Southern Hemisphere
Item: p

  0%|          | 19/427271 [00:00<2:36:53, 45.39it/s]

Item: November --> NERtype: OTHERS, kind: type, description: eleventh month in the Julian and Gregorian calendars
Item: lion --> NERtype: OTHERS, kind: type, description: species of big cat
Item: dog --> NERtype: OTHERS, kind: type, description: domestic animal
Item: kitten --> NERtype: OTHERS, kind: type, description: young cat
Item: People's Republic of China --> NERtype: LOC, kind: entity, description: country in East Asia
Item: Brazil --> NERtype: LOC, kind: entity, description: country in South America
Item: Yorkshire --> NERtype: LOC, kind: entity, description: historic county of England
Item: pizza --> NERtype: OTHERS, kind: type, description: popular Italian dish with a flat dough-based base and toppings
Item: pasta --> NERtype: OTHERS, kind: type, description: Italian food made from flour, eggs and water and shaped in different forms, usually cooked and served with a sauce


  0%|          | 30/345330 [00:00<2:48:42, 34.11it/s]

Item: Germany --> NERtype: LOC, kind: entity, description: country in Central Europe
Item: George W. Bush --> NERtype: PERS, kind: entity, description: President of the United States from 2001 to 2009
Item: right angle --> NERtype: OTHERS, kind: type, description: 90° angle (π/2 radians): an angle that bisects the angle formed by two halves of a straight line
Item: Malta --> NERtype: OTHERS, kind: entity, description: sovereign state in Southern Europe situated on an archipelago in the Mediterranean Sea
Item: Talisker distillery --> NERtype: ORG, kind: entity, description: whisky distillery in Highland, Scotland, UK
Item: Tours --> NERtype: LOC, kind: entity, description: city and commune in Indre-et-Loire, Centre-Val de Loire, France
Item: Diego Velázquez --> NERtype: PERS, kind: entity, description: Spanish painter (1599-1660)
Item: Chile --> NERtype: LOC, kind: entity, description: country in South America
Item: dictatorship --> NERtype: OTHERS, kind: type, description: autocratic f

  0%|          | 51/451086 [00:01<2:00:13, 62.53it/s]

Item: World War II --> NERtype: OTHERS, kind: entity, description: 1939–1945 global conflict
Item: Augusto Pinochet --> NERtype: PERS, kind: entity, description: dictator of Chile from 1973 to 1990
Item: Bahrain --> NERtype: LOC, kind: entity, description: country in the Persian Gulf
Item: astrobiology --> NERtype: OTHERS, kind: type, description: study of the formation of life on Earth and elsewhere
Item: Pioneer plaque --> NERtype: OTHERS, kind: entity, description: plaque attached to the Pioneer 10 and Pioneer 11 spacecraft in case extraterrestrial life finds them
Item: zoology --> NERtype: OTHERS, kind: type, description: scientific study of animals
Item: Gmina Kurów --> NERtype: LOC, kind: entity, description: Polish rural gmina in Lublin Voivodeship
Item: Ich bin ein Berliner --> NERtype: OTHERS, kind: entity, description: speech given by John F. Kennedy in West Berlin in June 1963
Item: Encyclopédie --> NERtype: OTHERS, kind: entity, description: general encyclopedia published i

  0%|          | 65/486758 [00:01<2:17:02, 59.19it/s]

Item: Lisbon --> NERtype: LOC, kind: entity, description: capital city of Portugal
Item: Beaujolais wine --> NERtype: OTHERS, kind: type, description: wine from the Beaujolais region of France
Item: Nicolaus Copernicus --> NERtype: PERS, kind: entity, description: Polish mathematician and astronomer (1473–1543)
Item: Neil Young --> NERtype: PERS, kind: entity, description: Canadian singer, songwriter and filmmaker (born 1945)
Item: planet --> NERtype: OTHERS, kind: type, description: celestial body directly orbiting a star or stellar remnant
Item: Harald Krichel --> NERtype: PERS, kind: entity, description: German photographer; former vice chair of Wikimedia Deutschland
Item: Po --> NERtype: LOC, kind: entity, description: longest river in Italy
Item: Rennes --> NERtype: LOC, kind: entity, description: capital city of the region of Brittany, France
Item: Lille --> NERtype: LOC, kind: entity, description: city and commune in Nord, Hauts-de-France, Northern France
Item: north --> NERtype

  0%|          | 87/505239 [00:01<1:59:08, 70.66it/s]

Item: vanadium --> NERtype: OTHERS, kind: type, description: chemical element with symbol V and atomic number 23
Item: Francesinha --> NERtype: OTHERS, kind: type, description: Portuguese sandwich
Item: croque-madame --> NERtype: OTHERS, kind: type, description: baked or fried ham and cheese sandwich served with a poached or lightly fried egg on top
Item: Pierre Corneille --> NERtype: PERS, kind: entity, description: French tragedian (1606–1684)
Item: Groningen --> NERtype: LOC, kind: entity, description: capital city of the province of Groningen, the Netherlands
Item: fungus --> NERtype: OTHERS, kind: type, description: biological kingdom, separate from plants and animals
Item: Massachusetts --> NERtype: LOC, kind: entity, description: state of the United States of America
Item: Israel --> NERtype: LOC, kind: entity, description: country in Western-Asia
Item: Lausanne --> NERtype: LOC, kind: entity, description: capital city of the canton of Vaud, Switzerland
Item: Gabriel Gonzáles Vi

  0%|          | 95/494080 [00:01<2:26:33, 56.18it/s]

Item: Thailand --> NERtype: LOC, kind: entity, description: country in Southeast Asia
Item: Meryl Streep --> NERtype: PERS, kind: entity, description: American actress (born 1949)
Item: United Arab Emirates --> NERtype: LOC, kind: entity, description: sovereign state in Southwest Asia
Item: platinum --> NERtype: OTHERS, kind: type, description: chemical element with symbol Pt and atomic number 78
Item: Novosibirsk --> NERtype: LOC, kind: entity, description: Russian city; administrative center of Siberian Federal District
Item: Nizhny Novgorod --> NERtype: LOC, kind: entity, description: capital of the Nizhny Novgorod Oblast and the Volga Federal District in central Russia
Item: Omsk --> NERtype: LOC, kind: entity, description: city in Russia
Item: Suez Canal --> NERtype: LOC, kind: entity, description: artificial sea-level waterway in Egypt
Item: scientist --> NERtype: OTHERS, kind: type, description: person who use scientific methods to study in an area of interest
Item: Erta Ale -->

  0%|          | 102/499365 [00:02<2:45:37, 50.24it/s]

Item: Mali --> NERtype: LOC, kind: entity, description: country in West Africa
Item: Angola --> NERtype: LOC, kind: entity, description: country on the west coast of Southern Africa
Item: Brač --> NERtype: LOC, kind: entity, description: island of Croatia
Item: yellow --> NERtype: OTHERS, kind: type, description: color
Item: Donald Tusk --> NERtype: PERS, kind: entity, description: Polish politician, prime minister of Poland since 2023 and between 2007–2014, president of the European Council in the period 2014–2019 (8th European legislature)
Item: toilet paper orientation --> NERtype: OTHERS, kind: entity, description: orientation of the free end of a roll of toilet paper, either over or under the roll, when used with a holder with an axle parallel to both the wall and floor
Item: Reggiolo --> NERtype: LOC, kind: entity, description: town in the province of Reggio Emilia, Emilia-Romagna, Italy
Item: list of lists of lists --> NERtype: OTHERS, kind: entity, description: Wikimedia list o

  0%|          | 125/536636 [00:02<2:04:30, 71.81it/s]

Item: Sierra Leone --> NERtype: LOC, kind: entity, description: sovereign state in West Africa
Item: Sudan --> NERtype: LOC, kind: entity, description: country in Northeast Africa
Item: Italo Balbo --> NERtype: PERS, kind: entity, description: Italian Marshal of the Air Force and minister
Item: metabolism --> NERtype: OTHERS, kind: type, description: set of chemical reactions and pathways by which living organisms transform chemical substances
Item: Narendra Modi --> NERtype: PERS, kind: entity, description: 14th and current Prime Minister of India
Item: geography --> NERtype: OTHERS, kind: type, description: science that studies the terrestrial surface, the societies that inhabit it and the territories, landscapes, places or regions that form it when interacting with each other
Item: Star Trek --> NERtype: OTHERS, kind: entity, description: science fiction media franchise
Item: Limburg --> NERtype: LOC, kind: entity, description: province of the Netherlands
Item: antimony --> NERtype:

  0%|          | 142/562065 [00:02<2:17:09, 68.28it/s]

Item: dubnium --> NERtype: OTHERS, kind: type, description: chemical element with the atomic number of 105
Item: Cottian Alps --> NERtype: LOC, kind: entity, description: mountain range in the South-Western part of the Alps
Item: Ban Ki-moon --> NERtype: PERS, kind: entity, description: 8th Secretary-General of the United Nations
Item: Kofi Annan --> NERtype: PERS, kind: entity, description: 7th Secretary-General of the United Nations (1938-2018)
Item: meitnerium --> NERtype: OTHERS, kind: entity, description: chemical element with atomic number 109 and symbol Mt
Item: jam --> NERtype: OTHERS, kind: type, description: spread made from fruit
Item: Pennine Alps --> NERtype: LOC, kind: entity, description: mountain range in the Western part of the Alps
Item: Leonard Cohen --> NERtype: PERS, kind: entity, description: Canadian poet and singer-songwriter (1934–2016)
Item: Rhaetian Alps --> NERtype: LOC, kind: entity, description: mountain range in the Central-Eastern Alps
Item: Category:Con

  0%|          | 163/587640 [00:02<2:01:06, 80.84it/s]

Item: Tripura --> NERtype: LOC, kind: entity, description: Indian state
Item: Dave Arneson --> NERtype: PERS, kind: entity, description: American game designer (1947-2009)
Item: Uetersen --> NERtype: LOC, kind: entity, description: German town in Schleswig-Holstein
Item: Otho --> NERtype: PERS, kind: entity, description: seventh Emperor of Ancient Rome (32-69)
Item: Titus --> NERtype: PERS, kind: entity, description: emperor of Ancient Rome
Item: field hockey --> NERtype: OTHERS, kind: type, description: team sport version of hockey played on grass or artificial turf with sticks and a round ball
Item: Eschwege --> NERtype: LOC, kind: entity, description: seat of Werra-Meißner-Kreis and town in Hesse, Germany
Item: Loire --> NERtype: LOC, kind: entity, description: longest river in France
Item: hacker --> NERtype: OTHERS, kind: type, description: person who uses technical knowledge to achieve a goal within a computerized system by non-standard means
Item: Mexico City --> NERtype: LOC, k

  0%|          | 197/638467 [00:03<1:32:40, 114.78it/s]

Item: Elbe --> NERtype: LOC, kind: entity, description: major river in Central Europe
Item: Dejen Gebremeskel --> NERtype: PERS, kind: entity, description: Ethiopian long-distance runner
Item: Neckar --> NERtype: LOC, kind: entity, description: right tributary of Rhine river in Germany
Item: Abel Mutai --> NERtype: PERS, kind: entity, description: Kenyan long-distance runner
Item: North Sea --> NERtype: LOC, kind: entity, description: marginal sea of the Atlantic Ocean
Item: Tabaco --> NERtype: LOC, kind: entity, description: city of the Philippines in the province of Albay
Item: Kiel --> NERtype: LOC, kind: entity, description: German city, capital of Schleswig-Holstein
Item: Potsdam --> NERtype: LOC, kind: entity, description: capital city of the German state of Brandenburg
Item: Erick Barrondo --> NERtype: PERS, kind: entity, description: Guatemalan racewalker
Item: Düsseldorf --> NERtype: LOC, kind: entity, description: capital city of the German federated state of North Rhine-West

  0%|          | 232/701827 [00:03<1:28:42, 131.81it/s]

Item: English --> NERtype: OTHERS, kind: type, description: West Germanic language
Item: Bangka Belitung Islands --> NERtype: LOC, kind: entity, description: province of Indonesia, on Bangka and Belitung, and several smaller islands
Item: Diplomacy --> NERtype: OTHERS, kind: entity, description: strategic board game
Item: Bengkulu --> NERtype: LOC, kind: entity, description: province of Indonesia
Item: Olivier Giroud --> NERtype: PERS, kind: entity, description: French association football player
Item: Yann M'Vila --> NERtype: PERS, kind: entity, description: French association football player
Item: Sidney Govou --> NERtype: PERS, kind: entity, description: French association football player
Item: Djibril Cissé --> NERtype: PERS, kind: entity, description: French association football player
Item: Bacary Sagna --> NERtype: PERS, kind: entity, description: French association football player
Item: list of moons of Jupiter --> NERtype: OTHERS, kind: entity, description: Wikimedia list arti

  0%|          | 265/738623 [00:03<1:31:03, 135.15it/s]

Item: Arica y Parinacota Region --> NERtype: LOC, kind: entity, description: administrative division in Chile
Item: Tarapacá Region --> NERtype: LOC, kind: entity, description: administrative division in Chile
Item: Mannheim --> NERtype: LOC, kind: entity, description: city in Baden-Württemberg, Germany
Item: Victoria --> NERtype: LOC, kind: entity, description: capital city of the province of British Columbia, Canada
Item: Totma --> NERtype: LOC, kind: entity, description: town in Vologda Oblast, Russia
Item: Goku --> NERtype: OTHERS, kind: entity, description: protagonist of the Dragon Ball franchise
Item: Government of the Soviet Union --> NERtype: ORG, kind: type, description: main body of the executive branch of government in the Soviet Union
Item: XXX --> NERtype: OTHERS, kind: entity, description: Wikimedia disambiguation page
Item: January 2 --> NERtype: OTHERS, kind: type, description: day of the year
Item: 1922 --> NERtype: OTHERS, kind: entity, description: year
Item: Naryan

  0%|          | 286/768889 [00:03<1:22:50, 154.63it/s]

Item: Altengamme --> NERtype: LOC, kind: entity, description: quarter in Hamburg, Germany
Item: March 4 --> NERtype: OTHERS, kind: type, description: day of the year
Item: March 10 --> NERtype: OTHERS, kind: type, description: day of the year
Item: ¿Dónde Están Corazón? --> NERtype: OTHERS, kind: entity, description: single released by Enrique Iglesias
Item: Reitbrook --> NERtype: LOC, kind: entity, description: suburb in the district Bergedorf, Germany
Item: Yaroslavl --> NERtype: LOC, kind: entity, description: city in the Yaroslavl Oblast, Russia
Item: March 19 --> NERtype: OTHERS, kind: type, description: day of the year
Item: March 22 --> NERtype: OTHERS, kind: type, description: day of the year
Item: Heroes of Might and Magic V: Hammers of Fate --> NERtype: OTHERS, kind: entity, description: first expansion pack to the video game Heroes of Might and Magic V
Item: Sanaa --> NERtype: LOC, kind: entity, description: capital city of Yemen
Item: 1970 --> NERtype: OTHERS, kind: entity,

  0%|          | 325/812812 [00:03<1:31:02, 148.73it/s]

Item: 1965 --> NERtype: OTHERS, kind: entity, description: year
Item: arithmetic coding --> NERtype: OTHERS, kind: entity, description: form of entropy encoding used in lossless data compression
Item: June 25 --> NERtype: OTHERS, kind: type, description: day of the year
Item: June 23 --> NERtype: OTHERS, kind: type, description: day of the year
Item: Palermo F.C. --> NERtype: ORG, kind: entity, description: association football club in Palermo, Sicily, Italy
Item: Wilhelm II --> NERtype: PERS, kind: entity, description: German Emperor and King of Prussia from 1888 to 1918
Item: Parma --> NERtype: LOC, kind: entity, description: city in the northern Italian region of Emilia-Romagna
Item: July 1 --> NERtype: OTHERS, kind: type, description: day of the year
Item: epoch --> NERtype: OTHERS, kind: type, description: moment in time used as a reference point for some time-varying astronomical quantity
Item: Implode --> NERtype: OTHERS, kind: entity, description: data compression algorithm
Ite

  0%|          | 387/899137 [00:04<1:10:21, 212.90it/s]

Item: November 29 --> NERtype: OTHERS, kind: type, description: day of the year
Item: November 20 --> NERtype: OTHERS, kind: type, description: day of the year
Item: November 22 --> NERtype: OTHERS, kind: type, description: day of the year
Item: November 16 --> NERtype: OTHERS, kind: type, description: day of the year
Item: Mack Wilberg --> NERtype: PERS, kind: entity, description: American conductor
Item: cassoulet --> NERtype: OTHERS, kind: type, description: french cuisine
Item: Aqueduct of the Gier --> NERtype: LOC, kind: entity, description: ancient Roman aqueduct
Item: Aqueduc de l'Yzeron --> NERtype: LOC, kind: entity, description: Ancient aqueduct in France
Item: Aisne --> NERtype: LOC, kind: entity, description: department of France
Item: Georg Henrik Lybecker --> NERtype: PERS, kind: entity, description: early 18th century Swedish officer
Item: Lyon Tablet --> NERtype: OTHERS, kind: entity, description: bronze tablet with part of a speech by Roman emperor Claudius
Item: Circu

  0%|          | 433/950216 [00:04<1:15:18, 210.21it/s]

Item: Justin Chadwick --> NERtype: PERS, kind: entity, description: British actor and director
Item: Kashi Vishwanath Temple --> NERtype: OTHERS, kind: entity, description: Hindu temple in Varanasi, Uttar Pradesh, India
Item: Anceriz --> NERtype: LOC, kind: entity, description: locality and former civil parish in Portugal
Item: Krasnodar Krai --> NERtype: LOC, kind: entity, description: administrative division (krai) in the North Caucasus region of southern Russia
Item: Caldera --> NERtype: LOC, kind: entity, description: Chilean city
Item: Carahue --> NERtype: LOC, kind: entity, description: city and commune in southern Chile
Item: Casablanca --> NERtype: LOC, kind: entity, description: Chilean commune
Item: Cauquenes --> NERtype: LOC, kind: entity, description: city in Chile
Item: Gera --> NERtype: LOC, kind: entity, description: town in Thuringia, Germany
Item: Praia --> NERtype: LOC, kind: entity, description: capital of Cape Verde
Item: Maykop --> NERtype: LOC, kind: entity, descr

  0%|          | 455/970530 [00:04<1:27:25, 184.95it/s]

Item: Weimar --> NERtype: LOC, kind: entity, description: city in the federal state of Thuringia, Germany
Item: V. P. Kalairajan --> NERtype: PERS, kind: entity, description: Indian politician
Item: Nalai Manithan --> NERtype: OTHERS, kind: entity, description: 1989 Tamil film directed by Velu Prabhakaran
Item: Mahajana College --> NERtype: ORG, kind: entity, description: provincial school in Tellippalai, Sri Lanka
Item: Rozpor --> NERtype: ORG, kind: entity, description: punk band from Bratislava
Item: Category:Armenian genocide memorials --> NERtype: OTHERS, kind: entity, description: Wikimedia category
Item: map --> NERtype: OTHERS, kind: type, description: visual representation of a concept space; symbolic depiction emphasizing relationships between elements of some space, such as objects, regions, or themes
Item: Ramesses IX --> NERtype: PERS, kind: entity, description: Egyptian pharaoh of the 20th dynasty
Item: Mravenečník --> NERtype: LOC, kind: entity, description: mountain in 

  0%|          | 501/1018219 [00:04<1:38:09, 172.80it/s]

Item: Sauerland --> NERtype: LOC, kind: entity, description: rural, hilly area in North Rhine-Westphalia, Germany
Item: 42 BC --> NERtype: OTHERS, kind: entity, description: year
Item: 47 BC --> NERtype: OTHERS, kind: entity, description: year
Item: Rothaar Mountains --> NERtype: LOC, kind: entity, description: mountain range in North Rhine-Westphalia and Hesse, Germany
Item: Union of South American Nations --> NERtype: ORG, kind: entity, description: intergovernmental union
Item: Category:Moldavian Soviet Socialist Republic --> NERtype: OTHERS, kind: entity, description: Wikimedia category
Item: Place Carnot --> NERtype: LOC, kind: entity, description: square in Lyon, Auvergne-Rhône-Alpes, France
Item: Michel Platini --> NERtype: PERS, kind: entity, description: French association football player, manager and executive
Item: Category:History of Benin --> NERtype: OTHERS, kind: entity, description: Wikimedia category
Item: Adémar de Chabannes --> NERtype: PERS, kind: entity, descriptio

  0%|          | 561/1091982 [00:04<1:17:51, 233.62it/s]

Item: Komodo National Park --> NERtype: LOC, kind: entity, description: national park in Indonesia
Item: Category:Political parties by ideology --> NERtype: OTHERS, kind: entity, description: Wikimedia category
Item: Berlin Recycling Volleys --> NERtype: ORG, kind: entity, description: German volleyball club
Item: flag of the Faroe Islands --> NERtype: OTHERS, kind: entity, description: national flag
Item: Rancagua --> NERtype: LOC, kind: entity, description: city in O'Higgins Region, Chile
Item: Solomon Islands dollar --> NERtype: OTHERS, kind: entity, description: currency of the Solomon Islands
Item: University of Southern California --> NERtype: ORG, kind: entity, description: private university in Los Angeles, California, United States
Item: Kölsch --> NERtype: OTHERS, kind: type, description: dialect of the Ripuarian Central German group of languages
Item: Category:History of the Philippines --> NERtype: OTHERS, kind: entity, description: Wikimedia category
Item: Mary Stewart -->

  0%|          | 586/1115968 [00:05<1:18:12, 237.67it/s]

Item: Portuguese --> NERtype: OTHERS, kind: type, description: Western Romance language of the Indo-European language family
Item: Template:Footy-stub --> NERtype: OTHERS, kind: entity, description: Wikimedia template
Item: 2007 Alpine Skiing World Cup --> NERtype: OTHERS, kind: entity, description: 2006–2007 season of the FIS Alpine Skiing World Cup
Item: Blondel de Nesle --> NERtype: PERS, kind: entity, description: French trouvère
Item: Kinorhyncha --> NERtype: OTHERS, kind: type, description: phylum of small marine pseudocoelomate invertebrates
Item: Onychophora --> NERtype: OTHERS, kind: type, description: phylum of animals
Item: Walter of Châtillon --> NERtype: PERS, kind: entity, description: 12th-century French writer and theologian
Item: Stavropol --> NERtype: LOC, kind: entity, description: capital city of Stavropol Krai in North Caucaus, Russia
Item: 1956 --> NERtype: OTHERS, kind: entity, description: year
Item: François-Édouard Picot --> NERtype: PERS, kind: entity, descri

  0%|          | 634/1136469 [00:05<1:48:49, 173.94it/s]

Item: Great Dane --> NERtype: OTHERS, kind: type, description: German dog breed
Item: 1231 --> NERtype: OTHERS, kind: entity, description: year
Item: Yoshkar-Ola --> NERtype: LOC, kind: entity, description: capital city of the Mari El Republic, Russia
Item: Ars Magica --> NERtype: OTHERS, kind: entity, description: tabletop role-playing game (generic rpg family)
Item: Isabella Teotochi Albrizzi --> NERtype: PERS, kind: entity, description: Italian author (1760-1836)
Item: 1247 --> NERtype: OTHERS, kind: entity, description: year
Item: 1254 --> NERtype: OTHERS, kind: entity, description: year
Item: 1256 --> NERtype: OTHERS, kind: entity, description: year
Item: 1258 --> NERtype: OTHERS, kind: entity, description: year
Item: 1259 --> NERtype: OTHERS, kind: entity, description: year
Item: 1274 --> NERtype: OTHERS, kind: entity, description: year
Item: 1275 --> NERtype: OTHERS, kind: entity, description: year
Item: 1278 --> NERtype: OTHERS, kind: entity, description: year
Item: 1290 --> NE

  0%|          | 654/1141605 [00:05<1:47:38, 176.67it/s]

Item: 1305 --> NERtype: OTHERS, kind: entity, description: year
Item: Fulcher of Chartres --> NERtype: PERS, kind: entity, description: French chronicler of the First Crusade
Item: Meschede --> NERtype: LOC, kind: entity, description: town in the Hochsauerland district, in North Rhine-Westphalia, Germany
Item: Conon de Béthune --> NERtype: PERS, kind: entity, description: crusader and "trouvère" poet, born in 1150 in north of France
Item: Fra Angelico --> NERtype: PERS, kind: entity, description: Italian Early Renaissance painter
Item: Abu Nuwas --> NERtype: PERS, kind: entity, description: Iraqi poet
Item: Matsuo Bashō --> NERtype: PERS, kind: entity, description: Japanese poet
Item: South Savo --> NERtype: LOC, kind: entity, description: region of Finland
Item: Ur --> NERtype: LOC, kind: entity, description: ancient Mesopotamian city-state
Item: Bjelovar --> NERtype: LOC, kind: entity, description: city and settlement in Bjelovar-Bilogora County, Croatia
Item: Bashkortostan --> NERty

  0%|          | 691/1157627 [00:05<1:57:50, 163.64it/s]

Item: North Padang Lawas --> NERtype: LOC, kind: entity, description: regency of Indonesia, on the island of Sumatra
Item: Lhasa --> NERtype: LOC, kind: entity, description: prefecture-level city and capital of Tibet
Item: giant star --> NERtype: OTHERS, kind: type, description: type of star with a radius 10-100 times, and luminosity 10-1000x that of the Sun
Item: red dwarf --> NERtype: OTHERS, kind: type, description: type of small and relatively-cool star
Item: Red Dwarf --> NERtype: OTHERS, kind: entity, description: BBC science-fiction comedy drama television programme
Item: Aachen Cathedral --> NERtype: LOC, kind: entity, description: Roman-Catholic cathedral in Aachen, Germany
Item: West Nias --> NERtype: LOC, kind: entity, description: regency of Indonesia, on the island of Nias
Item: spaceflight --> NERtype: OTHERS, kind: type, description: flight into or through outer space
Item: Chuck Berry --> NERtype: PERS, kind: entity, description: American musician (1926–2017)
Item: Osna

  0%|          | 735/1194443 [00:06<1:48:17, 183.72it/s]

Item: Teuva --> NERtype: LOC, kind: entity, description: municipality in the region of Southern Ostrobothnia in Finland
Item: 1509 --> NERtype: OTHERS, kind: entity, description: year
Item: 1415 --> NERtype: OTHERS, kind: entity, description: year
Item: Plotius Tucca --> NERtype: PERS, kind: entity, description: Roman poet
Item: Steinfurt --> NERtype: LOC, kind: entity, description: German district of North Rhine-Westphalia
Item: 1516 --> NERtype: OTHERS, kind: entity, description: year
Item: Coventry --> NERtype: LOC, kind: entity, description: city in the West Midlands in England, UK
Item: Lippe --> NERtype: LOC, kind: entity, description: German district in North Rhine-Westphalia
Item: Rhein-Kreis Neuss --> NERtype: LOC, kind: entity, description: district in North Rhine-Westphalia, Germany
Item: Kitos War --> NERtype: OTHERS, kind: entity, description: Jewish-Roman conflict (115-117)
Item: Pavia --> NERtype: LOC, kind: entity, description: Italian comune
Item: 1517 --> NERtype: OTH

  0%|          | 785/1230801 [00:06<1:37:12, 210.87it/s]

Item: Maçanet de Cabrenys --> NERtype: LOC, kind: entity, description: municipality in the comarca of Alt Empordà, Girona, Catalonia, Spain
Item: 1564 --> NERtype: OTHERS, kind: entity, description: year
Item: Pokhara --> NERtype: LOC, kind: entity, description: metropolitan city of Nepal
Item: Canigou --> NERtype: LOC, kind: entity, description: mountain in the Pyrenees of southern France
Item: 1569 --> NERtype: OTHERS, kind: entity, description: year
Item: Croatian --> NERtype: OTHERS, kind: type, description: standardized variety of Serbo-Croatian language, used by Croats
Item: Dorfhain --> NERtype: LOC, kind: entity, description: municipality in the Sächsische Schweiz-Osterzgebirge district, in Saxony, Germany
Item: UTC+01:30 --> NERtype: OTHERS, kind: entity, description: Identifier for a time offset from UTC of +1:30 (no longer used)
Item: Hartmannsdorf-Reichenau --> NERtype: LOC, kind: entity, description: municipality in the Sächsische Schweiz-Osterzgebirge district, in Saxony,

  0%|          | 807/1248793 [00:06<1:48:07, 192.36it/s]

Item: Tess Gerritsen --> NERtype: PERS, kind: entity, description: Chinese-American novelist
Item: 1652 --> NERtype: OTHERS, kind: entity, description: year
Item: 1732 --> NERtype: OTHERS, kind: entity, description: year
Item: UTC+07:30 --> NERtype: OTHERS, kind: entity, description: Identifier for a time offset from UTC of +7:30 (no longer used)
Item: Human herpesvirus 6 --> NERtype: OTHERS, kind: entity, description: virus which caused human herpesvirus 6 Infection
Item: 1821 --> NERtype: OTHERS, kind: entity, description: calendar year
Item: Franxault --> NERtype: LOC, kind: entity, description: commune in Côte-d'Or, France
Item: 1656 --> NERtype: OTHERS, kind: entity, description: year
Item: Alsdorf --> NERtype: LOC, kind: entity, description: municipality in the district of Aachen, in North Rhine-Westphalia, Germany
Item: Freising --> NERtype: LOC, kind: entity, description: town in Upper Bavaria, Germany
Item: Bitterfeld-Wolfen --> NERtype: LOC, kind: entity, description: town in

  0%|          | 844/1254922 [00:06<2:17:56, 151.52it/s]

Item: ecology --> NERtype: OTHERS, kind: type, description: scientific study of the relationships between living organisms
Item: genetics --> NERtype: OTHERS, kind: type, description: science of genes, heredity, and variation in living organisms
Item: Qin dynasty --> NERtype: OTHERS, kind: entity, description: dynasty that ruled in China from 221 to 206 BC
Item: Marie Curie --> NERtype: PERS, kind: entity, description: Polish and French physicist and chemist (1867–1934)
Item: Province of Girona --> NERtype: LOC, kind: entity, description: province of Catalonia, Spain
Item: Simone de Beauvoir --> NERtype: PERS, kind: entity, description: French philosopher, social theorist and activist (1908–1986)
Item: Ziying --> NERtype: PERS, kind: entity, description: king of Qin in 207 BC
Item: Pirates II: Stagnetti's Revenge --> NERtype: OTHERS, kind: entity, description: 2008 American pornographic movie directed by Joone
Item: Emperor Zhao of Han --> NERtype: PERS, kind: entity, description: empe

  0%|          | 879/1267752 [00:06<2:15:03, 156.34it/s]

Item: sheep --> NERtype: OTHERS, kind: type, description: domesticated ruminant bred for meat, wool, and milk
Item: Muaro Jambi --> NERtype: LOC, kind: entity, description: regency of Indonesia, on the island of Sumatra
Item: Sarolangun --> NERtype: LOC, kind: entity, description: regency of Indonesia, on the island of Sumatra
Item: Emperor Yuan of Jin --> NERtype: PERS, kind: entity, description: First Emperor of Eastern Jin Dynasty freom 318 to 323
Item: Emperor Cheng of Jin --> NERtype: PERS, kind: entity, description: emperor of the Jin Dynasty (265–420) (321-342)
Item: Emperor Mu of Jin --> NERtype: PERS, kind: entity, description: emperor of the Jin Dynasty (265–420)
Item: Kathryn Uhrich --> NERtype: PERS, kind: entity, description: American chemist
Item: Emperor Xianzong of Western Xia --> NERtype: PERS, kind: entity, description: emperor of the Western Xia Dynasty
Item: Tenderloin --> NERtype: LOC, kind: entity, description: neighborhood in downtown San Francisco, California
It

  0%|          | 918/1285776 [00:07<2:12:57, 161.05it/s]

Item: Heidi Weng --> NERtype: PERS, kind: entity, description: Norwegian cross-country skier and fell runner (1991-)
Item: 1765 --> NERtype: OTHERS, kind: entity, description: year
Item: 1770 --> NERtype: OTHERS, kind: entity, description: year
Item: 1692 --> NERtype: OTHERS, kind: entity, description: year
Item: 1773 --> NERtype: OTHERS, kind: entity, description: year
Item: water polo --> NERtype: OTHERS, kind: type, description: ballgame-team sport played in water by teams competing to put the ball into the opponent's goal
Item: 1699 --> NERtype: OTHERS, kind: entity, description: year
Item: Grazia Deledda --> NERtype: PERS, kind: entity, description: Italian novelist (1871-1936)
Item: Rokan Hulu --> NERtype: LOC, kind: entity, description: regency of Indonesia, on the island of Sumatra
Item: 1788 --> NERtype: OTHERS, kind: entity, description: year
Item: 1876 --> NERtype: OTHERS, kind: entity, description: year
Item: Birobidzhan --> NERtype: LOC, kind: entity, description: town and

  0%|          | 944/1291997 [00:07<2:16:09, 158.03it/s]

Item: Landkreis Bergstraße --> NERtype: LOC, kind: entity, description: district in Hesse, Germany
Item: Bulgarian --> NERtype: OTHERS, kind: type, description: South Slavic language
Item: pedagogy --> NERtype: OTHERS, kind: type, description: theory and practice of education
Item: Düsseldorf Government Region --> NERtype: LOC, kind: entity, description: government region of North Rhine-Westphalia, Germany
Item: Kirovsky District --> NERtype: LOC, kind: entity, description: district of Leningrad Oblast, Russia
Item: truth --> NERtype: OTHERS, kind: type, description: what is in accord with fact or reality
Item: Luzhsky District --> NERtype: LOC, kind: entity, description: district in Leningrad Oblast, Russia
Item: Magadan Oblast --> NERtype: LOC, kind: entity, description: federal subject of Russia
Item: mouse --> NERtype: OTHERS, kind: type, description: hand-held device used to move a pointer on a computer display
Item: Ivan IV Vasilyevich --> NERtype: PERS, kind: entity, description

  0%|          | 977/1303472 [00:07<2:27:11, 147.48it/s]

Item: Enz --> NERtype: LOC, kind: entity, description: district of Baden-Württemberg, Germany
Item: Ortenau --> NERtype: LOC, kind: entity, description: district of Baden-Württemberg, Germany
Item: Iława --> NERtype: LOC, kind: entity, description: town and administrative center of Iława district in Warmian-Masurian Voivodeship of northeastern Poland
Item: Les Cordeliers --> NERtype: LOC, kind: entity, description: quarter in the 2nd arrondissement of Lyon, France
Item: Cyrillic script --> NERtype: OTHERS, kind: type, description: writing system developed in Bulgaria and used for various oriental Eurasian languages
Item: Rosenthal-Bielatal --> NERtype: LOC, kind: entity, description: municipality in the Sächsische Schweiz-Osterzgebirge district, in Saxony, Germany
Item: Struppen --> NERtype: LOC, kind: entity, description: municipality in the Sächsische Schweiz-Osterzgebirge district, in Saxony, Germany
Item: Biratnagar --> NERtype: LOC, kind: entity, description: metropolitan city in 

  0%|          | 1013/1317651 [00:07<2:19:17, 157.54it/s]

Item: Peter the Great --> NERtype: PERS, kind: entity, description: tsar and 1st Emperor, founder of the Russian Empire
Item: Logar River --> NERtype: LOC, kind: entity, description: Afghan river
Item: 1st Chess Olympiad --> NERtype: OTHERS, kind: entity, description: FIDE chess tournament for national teams
Item: Ghorband River --> NERtype: LOC, kind: entity, description: river of Afghanistan
Item: 1992 Summer Olympics --> NERtype: OTHERS, kind: entity, description: Games of the XXV Olympiad, in Barcelona, Spain
Item: Visual Effects Society Awards 2008 --> NERtype: OTHERS, kind: entity, description: award for the best visual effects in film and television
Item: Visual Effects Society Awards 2009 --> NERtype: OTHERS, kind: entity, description: award for the best visual effects in film and television
Item: Open Sud de France --> NERtype: OTHERS, kind: type, description: tennis tournament
Item: mountain --> NERtype: OTHERS, kind: type, description: large natural elevation of the Earth's 

  0%|          | 1053/1332143 [00:07<2:08:02, 173.27it/s]

Item: 2006 Venezuelan presidential election --> NERtype: OTHERS, kind: entity, description: presidential election of Venezuela
Item: Deportivo de La Coruña --> NERtype: ORG, kind: entity, description: Spanish association football club
Item: RCD Espanyol de Barcelona --> NERtype: ORG, kind: entity, description: sports club in Spain
Item: Schönfeld --> NERtype: LOC, kind: entity, description: municipality in the district of Meissen, in Saxony, Germany
Item: Boeing 747-8 --> NERtype: OTHERS, kind: type, description: wide-body airliner, current production series of the 747
Item: Aemilius Macer --> NERtype: PERS, kind: entity, description: Roman poet
Item: 2002 Venezuelan coup d'état attempt --> NERtype: ORG, kind: entity, description: Venezuelan coup attempt of 2002
Item: HTML --> NERtype: OTHERS, kind: type, description: family of markup languages for displaying information viewable in a web browser
Item: Augustin-Louis Cauchy --> NERtype: PERS, kind: entity, description: French mathemati

  0%|          | 1099/1346424 [00:08<2:08:31, 174.46it/s]

Item: Sileshi Sihine --> NERtype: PERS, kind: entity, description: Ethiopian long-distance runner
Item: Gandaki Zone --> NERtype: LOC, kind: entity, description: former administrative zone of Nepal
Item: Richard Mateelong --> NERtype: PERS, kind: entity, description: Kenyan long-distance runner
Item: Candiolo --> NERtype: LOC, kind: entity, description: Italian comune
Item: David Oliver --> NERtype: PERS, kind: entity, description: American hurdler
Item: Lumbini Zone --> NERtype: LOC, kind: entity, description: former administrative zone of Nepal
Item: Carema --> NERtype: LOC, kind: entity, description: Italian comune
Item: religion --> NERtype: OTHERS, kind: type, description: social-cultural system
Item: United States Grand Prix West --> NERtype: OTHERS, kind: type, description: Formula 1 Grand Prix
Item: Castelnuovo Nigra --> NERtype: LOC, kind: entity, description: Italian comune
Item: Hungarian Grand Prix --> NERtype: OTHERS, kind: type, description: Formula 1 Grand Prix
Item: Cer

  0%|          | 1117/1344000 [00:08<2:31:20, 147.89it/s]

Item: Sima Qian --> NERtype: PERS, kind: entity, description: Chinese historian and writer
Item: kidney --> NERtype: OTHERS, kind: type, description: internal organ in most animals, including vertebrates and some invertebrates
Item: rabbit --> NERtype: OTHERS, kind: type, description: mammals of the family Leporidae
Item: Deutsche Telekom --> NERtype: ORG, kind: entity, description: partially privatized German telecommunications company
Item: Isolabella --> NERtype: LOC, kind: entity, description: Italian comune
Item: Timbuktu --> NERtype: LOC, kind: entity, description: city in Mali
Item: Arnold Palmer --> NERtype: PERS, kind: entity, description: American golfer
Item: Lothal --> NERtype: LOC, kind: entity, description: prominent city of the ancient Indus valley civilization (present-day Gujarat, India)
Item: Muhammad --> NERtype: PERS, kind: entity, description: founder and main prophet of Islam (c. 570–632)
Item: Moncalieri --> NERtype: LOC, kind: entity, description: Italian comune

  0%|          | 1153/1337013 [00:08<2:29:01, 149.40it/s]

Item: Michael Schumacher --> NERtype: PERS, kind: entity, description: German racing driver
Item: 2006 Winter Olympics --> NERtype: OTHERS, kind: entity, description: 20th edition of Winter Olympics, in Turin, Italy
Item: Kelly Smith --> NERtype: PERS, kind: entity, description: English footballer (born 1978)
Item: The New York Times --> NERtype: OTHERS, kind: entity, description: American daily newspaper (founded 1851)
Item: Berlin-Tegel Airport --> NERtype: OTHERS, kind: entity, description: former international airport in Berlin, Germany
Item: Amsterdam Airport Schiphol --> NERtype: LOC, kind: entity, description: airport in Haarlemmermeer, Netherlands
Item: traffic separation scheme --> NERtype: OTHERS, kind: type, description: maritime traffic-management route-system
Item: Îles Belep --> NERtype: LOC, kind: entity, description: commune in Nouvelle-Calédonie, France
Item: Boulouparis --> NERtype: LOC, kind: entity, description: commune in Nouvelle-Calédonie, France
Item: Noumea -->

  0%|          | 1201/1348714 [00:08<2:09:06, 173.96it/s]

Item: Fanta --> NERtype: OTHERS, kind: type, description: line of fruit-flavored carbonated beverages
Item: Hollands Kroon --> NERtype: LOC, kind: entity, description: municipality in the Netherlands
Item: N --> NERtype: OTHERS, kind: entity, description: letter of the Latin alphabet
Item: S --> NERtype: OTHERS, kind: entity, description: 19th letter in the English and Latin alphabet
Item: Schermer --> NERtype: LOC, kind: entity, description: former municipality in the Netherlands
Item: W --> NERtype: OTHERS, kind: entity, description: letter of the Latin alphabet
Item: Zhengtong Emperor --> NERtype: PERS, kind: entity, description: emperor of the Ming Dynasty
Item: Â --> NERtype: OTHERS, kind: entity, description: letter of the Latin alphabet
Item: Hongzhi Emperor --> NERtype: PERS, kind: entity, description: 9th emperor of the Ming Dynasty (1470–1505)
Item: Dutch Wikipedia --> NERtype: OTHERS, kind: entity, description: Dutch-language edition of Wikipedia, the free encyclopedia
Item:

  0%|          | 1219/1355589 [00:08<2:16:58, 164.80it/s]

Item: Frankfurter Allgemeine Zeitung --> NERtype: ORG, kind: entity, description: German daily newspaper
Item: Daïtro --> NERtype: ORG, kind: entity, description: French band
Item: Romano Canavese --> NERtype: LOC, kind: entity, description: Italian comune
Item: Rosta --> NERtype: LOC, kind: entity, description: Italian comune
Item: Salza di Pinerolo --> NERtype: LOC, kind: entity, description: Italian comune
Item: San Giorgio Canavese --> NERtype: LOC, kind: entity, description: Italian comune
Item: San Maurizio Canavese --> NERtype: LOC, kind: entity, description: Italian comune
Item: barrel --> NERtype: OTHERS, kind: type, description: hollow cylindrical container
Item: Sauze d'Oulx --> NERtype: LOC, kind: entity, description: Italian comune
Item: Sciolze --> NERtype: LOC, kind: entity, description: Italian comune
Item: Jegenstorf --> NERtype: LOC, kind: entity, description: municipality in the Bern-Mittelland administrative district, Switzerland
Item: AlunaGeorge --> NERtype: ORG, 

  0%|          | 1255/1368132 [00:09<2:20:42, 161.91it/s]

Item: Ebersberg --> NERtype: LOC, kind: entity, description: district of Bavaria, Germany
Item: Hersiliidae --> NERtype: OTHERS, kind: entity, description: family of arachnids
Item: Lower Bavaria --> NERtype: LOC, kind: entity, description: administrative regions of Bavaria, Germany
Item: Upper Bavaria --> NERtype: LOC, kind: entity, description: administrative regions of Bavaria, Germany
Item: Lupus --> NERtype: OTHERS, kind: entity, description: constellation in the southern celestial hemisphere
Item: Horologium --> NERtype: OTHERS, kind: entity, description: constellation in the southern celestial hemisphere
Item: Tom Cleverley --> NERtype: PERS, kind: entity, description: English association football player (born 1989)
Item: Idiopidae --> NERtype: OTHERS, kind: entity, description: family of armoured trapdoor spiders
Item: Gavin Mahon --> NERtype: PERS, kind: entity, description: English association football player (born 1977)
Item: Category:Draco (constellation) --> NERtype: OTHER

  0%|          | 1323/1398658 [00:09<1:42:21, 227.51it/s]

Item: Bondowoso --> NERtype: LOC, kind: entity, description: regency of Indonesia, on island of Java
Item: Muḥammad ibn Jābir al-Ḥarrānī al-Battānī --> NERtype: PERS, kind: entity, description: Islamic astronomer and astrologer from Harran (before 858–929)
Item: Nganjuk --> NERtype: LOC, kind: entity, description: regency of Indonesia, on island of Java
Item: Pasuruan --> NERtype: LOC, kind: entity, description: regency of Indonesia, on island of Java
Item: Stephen Breyer --> NERtype: PERS, kind: entity, description: US Supreme Court justice from 1994 to 2022
Item: Situbondo --> NERtype: LOC, kind: entity, description: regency of Indonesia, on island of Java
Item: Tuban --> NERtype: LOC, kind: entity, description: regency of Indonesia, on island of Java
Item: The Independent --> NERtype: ORG, kind: entity, description: British national daily newspaper
Item: Border Gateway Protocol --> NERtype: OTHERS, kind: entity, description: protocol for communicating routing information on the Inte

  0%|          | 1372/1409190 [00:09<1:46:46, 219.75it/s]

Item: temperature --> NERtype: OTHERS, kind: type, description: physical property of matter that quantitatively expresses the common notions of hot and cold
Item: Sue Black --> NERtype: PERS, kind: entity, description: British computer scientist
Item: Gianyar --> NERtype: LOC, kind: entity, description: regency of Indonesia, on island of Bali
Item: Jembrana --> NERtype: LOC, kind: entity, description: regency of Indonesia, on island of Bali
Item: !DonnerwetteR! --> NERtype: OTHERS, kind: entity, description: 2006 album by Prinz Pi
Item: Xanana Gusmão --> NERtype: PERS, kind: entity, description: Prime Minister and former President of East Timor
Item: 100 Hekate --> NERtype: OTHERS, kind: entity, description: main-belt asteroid
Item: 1006 Lagrangea --> NERtype: OTHERS, kind: entity, description: asteroid
Item: cosmic radiation --> NERtype: OTHERS, kind: type, description: high-speed, i.e. high-energy particle, mainly originating in outer space, outside the Solar system
Item: 10195 Nebra

  0%|          | 1397/1411439 [00:09<1:56:29, 201.75it/s]

Item: Bafétimbi Gomis --> NERtype: PERS, kind: entity, description: French-Senegalese association football player
Item: Saint-Maurice-de-Beynost --> NERtype: LOC, kind: entity, description: commune in Ain, France
Item: CrunchBang Linux --> NERtype: OTHERS, kind: type, description: lightweight Debian based Linux distribution
Item: Symphony No. 9 --> NERtype: OTHERS, kind: entity, description: choral symphony by Ludwig van Beethoven
Item: CD Mirandés --> NERtype: ORG, kind: entity, description: Spanish football club based in Miranda de Ebro
Item: Quercus --> NERtype: OTHERS, kind: entity, description: tree or shrub in the genus Quercus
Item: Ell & Nikki --> NERtype: ORG, kind: entity, description: Azerbaijani pop duo
Item: 8½ --> NERtype: OTHERS, kind: entity, description: 1963 film directed by Federico Fellini
Item: Tiana --> NERtype: LOC, kind: entity, description: town in Catalonia, Spain
Item: addiction --> NERtype: OTHERS, kind: type, description: state characterized by compulsive e

  0%|          | 1444/1414321 [00:10<1:57:12, 200.90it/s]

Item: Brest --> NERtype: LOC, kind: entity, description: port city in the Finistère department, Brittany, France
Item: sexually transmitted infection --> NERtype: OTHERS, kind: type, description: infection transmitted through human sexual behavior
Item: Template:Location map Germany North Rhine-Westphalia --> NERtype: OTHERS, kind: entity, description: Wikimedia template
Item: Starobrno Brewery --> NERtype: ORG, kind: entity, description: Czech brewery
Item: Template:Location map Iceland --> NERtype: OTHERS, kind: entity, description: Wikimedia template
Item: Template:Location map Sweden, 40south --> NERtype: OTHERS, kind: entity, description: Wikimedia template
Item: Watson --> NERtype: OTHERS, kind: entity, description: artificial intelligence computer system made by IBM
Item: Template:Location map Andorra --> NERtype: OTHERS, kind: entity, description: Wikimedia template
Item: Template:Location map Spain --> NERtype: OTHERS, kind: entity, description: Wikimedia template
Item: Templa

  0%|          | 1485/1405044 [00:10<2:08:12, 182.45it/s]

Item: Castellfollit de la Roca --> NERtype: LOC, kind: entity, description: municipality in the comarca of Garrotxa, in the province of Girona, Catalonia, Spain
Item: Mieres --> NERtype: LOC, kind: entity, description: municipality in the province of Girona and autonomous community of Catalonia, Spain
Item: Montagut i Oix --> NERtype: LOC, kind: entity, description: municipality in the province of Girona and autonomous community of Catalonia, Spain
Item: Bossòst --> NERtype: LOC, kind: entity, description: municipality in the Aran Valley, province of Lleida (Catalonia), Northern Spain
Item: Mayenne --> NERtype: LOC, kind: entity, description: department in northwest France
Item: perpetual motion --> NERtype: OTHERS, kind: type, description: work continuously done without an external supply of energy
Item: Canejan --> NERtype: LOC, kind: entity, description: municipality in the comarca of the Aran Valley in Catalonia, Spain
Item: Kálmán Kalocsay --> NERtype: PERS, kind: entity, descript

  0%|          | 1504/1404216 [00:10<2:29:05, 156.80it/s]

Item: Santa --> NERtype: LOC, kind: entity, description: municipality of the Philippines in the province of Ilocos Sur
Item: tank --> NERtype: OTHERS, kind: type, description: armoured fighting vehicle intended as a primary offensive weapon in front-line ground combat
Item: Detlev Blanke --> NERtype: PERS, kind: entity, description: German interlinguistics lecturer
Item: Franko Luin --> NERtype: PERS, kind: entity, description: Swedish type designer (1941-2005)
Item: Sinait --> NERtype: LOC, kind: entity, description: municipality of the Philippines in the province of Ilocos Sur
Item: Antwerp --> NERtype: LOC, kind: entity, description: municipality in the province of Antwerp, Belgium
Item: Tagudin --> NERtype: LOC, kind: entity, description: municipality of the Philippines in the province of Ilocos Sur
Item: Assenede --> NERtype: LOC, kind: entity, description: municipality in East Flanders, Belgium
Item: Hisashi Inoue --> NERtype: PERS, kind: entity, description: Japanese novelist, p

  0%|          | 1542/1405452 [00:10<2:33:18, 152.62it/s]

Item: Brunyola --> NERtype: LOC, kind: entity, description: village in the province of Girona and autonomous community of Catalonia, Spain
Item: Mathieu Gorgelin --> NERtype: PERS, kind: entity, description: French footballer
Item: Futaleufú --> NERtype: LOC, kind: entity, description: town in Chile
Item: Delta Andromedae --> NERtype: OTHERS, kind: entity, description: multiple star system in the northern constellation of Andromeda
Item: Galvarino --> NERtype: LOC, kind: entity, description: town in Chile
Item: Bellatrix --> NERtype: OTHERS, kind: entity, description: star in the
Item: Hualpén --> NERtype: LOC, kind: entity, description: city in Chile
Item: Algol --> NERtype: OTHERS, kind: entity, description: triple star system in the constellation Perseus
Item: nuclear fusion --> NERtype: OTHERS, kind: type, description: nuclear reaction in which atomic nuclei combine
Item: georgerobinsonite --> NERtype: OTHERS, kind: type, description: hydrous chromate mineral
Item: motukoreaite -->

  0%|          | 1576/1407837 [00:10<2:35:29, 150.73it/s]

Item: Epsilon Cygni --> NERtype: OTHERS, kind: entity, description: star in the constellation Cygnus
Item: Piacenza --> NERtype: LOC, kind: entity, description: Italian comune
Item: University of Orléans --> NERtype: ORG, kind: entity, description: French university in Orléans created in 1966
Item: Zeta Herculis --> NERtype: OTHERS, kind: entity, description: multiple star system in the constellation Hercules
Item: Lengue --> NERtype: OTHERS, kind: type, description: Bantu language of southern Equatorial Guinea
Item: Delta Herculis --> NERtype: OTHERS, kind: entity, description: multiple-star system in the constellation Hercules
Item: Domaine universitaire de Talence Pessac Gradignan --> NERtype: LOC, kind: entity, description: campus of University of Bordeaux
Item: Montserrat --> NERtype: LOC, kind: entity, description: British overseas territory in the Caribbean
Item: Meänkieli --> NERtype: OTHERS, kind: type, description: de jure one of the official minority languages of Sweden spok

  0%|          | 1612/1410539 [00:11<2:29:44, 156.81it/s]

Item: Berlin Conference --> NERtype: OTHERS, kind: entity, description: international conference that regulated the distribution of European colonization and trade in Africa
Item: Verges --> NERtype: LOC, kind: entity, description: municipality in Catalonia, Spain
Item: Ultramort --> NERtype: LOC, kind: entity, description: village in the province of Girona and autonomous community of Catalonia, Spain
Item: Yopal --> NERtype: LOC, kind: entity, description: capital of the Colombian department of Casanare
Item: Beta Cassiopeiae --> NERtype: OTHERS, kind: entity, description: star in the constellation Cassiopeia
Item: Vaulx-en-Velin --> NERtype: LOC, kind: entity, description: commune in the metropolis of Lyon, France
Item: Palol de Revardit --> NERtype: LOC, kind: entity, description: village in the province of Girona and autonomous community of Catalonia, Spain
Item: Crespià --> NERtype: LOC, kind: entity, description: village in the province of Girona and autonomous community of Catal

  0%|          | 1628/1402306 [00:11<3:02:48, 127.70it/s]

Item: Llívia --> NERtype: LOC, kind: entity, description: town of Cerdanya, province of Girona, Catalonia, Spain
Item: Bohol --> NERtype: LOC, kind: entity, description: province of the Philippines
Item: P Cygni --> NERtype: OTHERS, kind: entity, description: variable star in the constellation Cygnus
Item: Centelles --> NERtype: LOC, kind: entity, description: municipality in the comarca of Osona in Catalonia, Spain
Item: Folgueroles --> NERtype: LOC, kind: entity, description: municipality in the comarca of Osona in Catalonia, Spain
Item: Davao del Sur --> NERtype: LOC, kind: entity, description: province of the Philippines
Item: Oristà --> NERtype: LOC, kind: entity, description: municipality in the comarca of Lluçanès in Catalonia, Spain
Item: Prats de Lluçanès --> NERtype: LOC, kind: entity, description: Spanish municipality
Item: Torelló --> NERtype: LOC, kind: entity, description: municipality in the Province of Barcelona, Catalonia, Spain
Item: Vic --> NERtype: LOC, kind: entity

  0%|          | 1670/1409659 [00:11<2:22:06, 165.13it/s]

Item: Whirlpool Galaxy --> NERtype: OTHERS, kind: entity, description: galaxy
Item: Messier 52 --> NERtype: OTHERS, kind: entity, description: open cluster in the constellation Cassiopeia
Item: probation --> NERtype: OTHERS, kind: type, description: court-ordered correctional supervision in the community; either community sentence (alternative to incarceration) or parole
Item: Tartu --> NERtype: LOC, kind: entity, description: second most populous city in Estonia
Item: NBC --> NERtype: ORG, kind: entity, description: American television and radio network
Item: Bloomberg Television --> NERtype: ORG, kind: entity, description: financial and business cable news channel
Item: Bloomberg L.P. --> NERtype: ORG, kind: entity, description: American multinational mass media corporation
Item: Alberto Bayo --> NERtype: PERS, kind: entity, description: Cuban military leader of the defeated left-wing Loyalists in the Spanish Civil War (1892-1967)
Item: Shikoku --> NERtype: LOC, kind: entity, descrip

  0%|          | 1706/1417167 [00:11<2:21:43, 166.46it/s]

Item: William Herschel --> NERtype: PERS, kind: entity, description: German-born British astronomer and composer (1738–1822)
Item: Ï --> NERtype: OTHERS, kind: entity, description: letter of the Latin alphabet
Item: Nicolas Leblanc --> NERtype: PERS, kind: entity, description: French chemist
Item: Vitoria-Gasteiz --> NERtype: LOC, kind: entity, description: municipality of Álava, autonomous community of the Basque Country, Spain
Item: wolverine --> NERtype: OTHERS, kind: entity, description: largest land-dwelling species of the family Mustelidae
Item: Teruel --> NERtype: LOC, kind: entity, description: municipality of Aragon, Spain, in the province of Teruel and the comarca of Communidad de Teruel
Item: Limmat --> NERtype: LOC, kind: entity, description: river in Switzerland
Item: Asón River --> NERtype: LOC, kind: entity, description: river in the Cantabria autonomous region of Spain
Item: Messier 102 --> NERtype: OTHERS, kind: entity, description: Galaxy
Item: neoclassicism --> NERty

  0%|          | 1772/1445176 [00:11<1:38:21, 244.57it/s]

Item: Zapardiel river --> NERtype: LOC, kind: entity, description: river in Spain
Item: Sunken Ships Monument --> NERtype: OTHERS, kind: entity, description: monument in Sevastopol
Item: Luga --> NERtype: LOC, kind: entity, description: river in Leningrad and Novgorod Oblasts, Russia
Item: fstab --> NERtype: OTHERS, kind: type, description: system configuration file
Item: Veikko Iivanainen --> NERtype: PERS, kind: entity, description: Finnish ski jumper and Nordic combined athlete
Item: Keijo Korhonen --> NERtype: PERS, kind: entity, description: Finnish ski jumper
Item: Tormes --> NERtype: LOC, kind: entity, description: river tributary to Douro, in the provinces of Ávila, Salamanca and Zamora, Castile and León, Spain
Item: Template: Location map Western Sahara --> NERtype: OTHERS, kind: entity, description: Wikimedia template
Item: Template: Location map Libya --> NERtype: OTHERS, kind: entity, description: Wikimedia template
Item: Template:Location map Mali --> NERtype: OTHERS, kind

  0%|          | 1824/1451965 [00:12<1:50:49, 218.08it/s]

Item: Ciudad Real --> NERtype: LOC, kind: entity, description: municipality in Castile–La Mancha, Spain
Item: Port Lincoln Airport --> NERtype: OTHERS, kind: entity, description: airport in Port Lincoln, South Australia
Item: Renmark Airport --> NERtype: OTHERS, kind: entity, description: airport in Renmark, South Australia
Item: Whyalla Airport --> NERtype: OTHERS, kind: entity, description: airport serving Whyalla, South Australia
Item: Val Soana --> NERtype: LOC, kind: entity, description: valley in Piemont, Italy
Item: Ringelspitz --> NERtype: LOC, kind: entity, description: mountain of the Glarus Alps
Item: José Batlle y Ordóñez --> NERtype: PERS, kind: entity, description: president of Uruguay in 1903-1907 and 1911-1915 (1856-1929)
Item: ו --> NERtype: OTHERS, kind: entity, description: sixth letter in the Hebrew alphabet
Item: Dindigul district --> NERtype: LOC, kind: entity, description: district of Tamil Nadu, India
Item: Sheung Wan station --> NERtype: OTHERS, kind: entity, d

  0%|          | 1883/1462104 [00:12<1:38:10, 247.91it/s]

Item: Tagamanent --> NERtype: LOC, kind: entity, description: village of Catalonia, Spain
Item: William John Hamilton --> NERtype: PERS, kind: entity, description: British geologist (1805-1867)
Item: William Jack --> NERtype: PERS, kind: entity, description: Scottish botanist and physician (1795-1822)
Item: Casserres --> NERtype: LOC, kind: entity, description: municipality in Catalonia, Spain
Item: 1971 Austrian Grand Prix --> NERtype: OTHERS, kind: entity, description: motor car race
Item: Bershawn Jackson --> NERtype: PERS, kind: entity, description: American hurdler
Item: Montmajor --> NERtype: LOC, kind: entity, description: municipality in Catalonia, Spain
Item: Smithton Airport --> NERtype: OTHERS, kind: entity, description: airport in Smithton, Tasmania, Australia
Item: Puig-reig --> NERtype: LOC, kind: entity, description: municipality in Catalonia, Spain
Item: Denis Nizhegorodov --> NERtype: PERS, kind: entity, description: Russian Olympic race walker
Item: 1981 Austrian Gran

  0%|          | 1909/1455634 [00:12<1:55:51, 209.13it/s]

Item: Castellolí --> NERtype: LOC, kind: entity, description: municipality in the comarca of the Anoia in Catalonia, Spain
Item: Émile Durkheim --> NERtype: PERS, kind: entity, description: French sociologist
Item: Montesquieu --> NERtype: PERS, kind: entity, description: French social commentator and political thinker (1689-1755)
Item: Charly Gaul --> NERtype: PERS, kind: entity, description: professional cyclist
Item: Gaston Geens --> NERtype: PERS, kind: entity, description: Belgian politician and former minister-president of Flanders (1931-2002)
Item: Maurice Gourdault-Montagne --> NERtype: PERS, kind: entity, description: French diplomat
Item: Fürstenfeldbruck --> NERtype: LOC, kind: entity, description: city in Bavaria, Germany
Item: Rainbow --> NERtype: ORG, kind: entity, description: English rock band
Item: Schwedt/Oder --> NERtype: LOC, kind: entity, description: town in Brandenburg state, Germany
Item: Dudelange --> NERtype: LOC, kind: entity, description: commune in Luxembou

  0%|          | 1932/1451655 [00:12<2:19:07, 173.67it/s]

Item: Province of Monza and Brianza --> NERtype: LOC, kind: entity, description: province of Italy
Item: Province of Novara --> NERtype: LOC, kind: entity, description: province of Italy
Item: Chaudenay-la-Ville --> NERtype: LOC, kind: entity, description: commune in Côte-d'Or, France
Item: Province of Ogliastra --> NERtype: LOC, kind: entity, description: province of Italy
Item: Écutigny --> NERtype: LOC, kind: entity, description: commune in Côte-d'Or, France
Item: Painblanc --> NERtype: LOC, kind: entity, description: commune in Côte-d'Or, France
Item: province of Perugia --> NERtype: LOC, kind: entity, description: province of Italy
Item: Province of Piacenza --> NERtype: LOC, kind: entity, description: province of Italy
Item: Veuvey-sur-Ouche --> NERtype: LOC, kind: entity, description: commune in Côte-d'Or, France
Item: Province of Ragusa --> NERtype: LOC, kind: entity, description: former province of Italy
Item: Province of Reggio Calabria --> NERtype: LOC, kind: entity, descrip

  0%|          | 1970/1457828 [00:13<2:24:24, 168.03it/s]

Item: Province of Venice --> NERtype: LOC, kind: entity, description: former province in the region of Veneto, Italy
Item: Thury --> NERtype: LOC, kind: entity, description: commune in Côte-d'Or, France
Item: XML Schema --> NERtype: OTHERS, kind: type, description: W3C XML schema language
Item: aerobraking --> NERtype: OTHERS, kind: type, description: spaceflight maneuver
Item: aeronomy --> NERtype: OTHERS, kind: entity, description: science of the upper region of the atmosphere
Item: Couverture maladie universelle --> NERtype: OTHERS, kind: type, description: French social welfare programme
Item: Sabarmati Riverfront, Ahmedabad --> NERtype: LOC, kind: entity, description: development at Ahmedabad, India
Item: Vijaygupta Maurya --> NERtype: PERS, kind: entity, description: Gujarati Science writer and story writer from India
Item: Charles K. Kao --> NERtype: PERS, kind: entity, description: Hong Kong-British-American physicist
Item: Franche-Comté --> NERtype: LOC, kind: entity, descript

  0%|          | 2008/1461700 [00:13<2:26:52, 165.63it/s]

Item: Wildhuser Schafberg --> NERtype: LOC, kind: entity, description: mountain of the Appenzell Alps
Item: Marganell --> NERtype: LOC, kind: entity, description: municipality in Catalonia, Spain
Item: 433 Eros --> NERtype: OTHERS, kind: entity, description: near-Earth asteroid
Item: Mura --> NERtype: LOC, kind: entity, description: municipality in Catalonia.
Item: Navàs --> NERtype: LOC, kind: entity, description: municipality in Catalonia, Spain
Item: El Pont de Vilomara i Rocafort --> NERtype: LOC, kind: entity, description: municipality in Catalonia, Spain
Item: Talamanca --> NERtype: LOC, kind: entity, description: municipality in Catalonia, Spain
Item: Meilly-sur-Rouvres --> NERtype: LOC, kind: entity, description: commune in Côte-d'Or, France
Item: Metis --> NERtype: OTHERS, kind: entity, description: moon of Jupiter
Item: Andy Lau --> NERtype: PERS, kind: entity, description: Chinese Hong Kong actor, film producer and singer
Item: canton of Beynat --> NERtype: LOC, kind: entity


KeyboardInterrupt

  0%|          | 2023/1466690 [00:29<2:27:22, 165.63it/s]