In [1]:
import csv
import pandas as pd
from owlready2 import *

from rdflib import Graph
from rdflib import URIRef, BNode, Literal
from rdflib import Namespace
from rdflib.namespace import OWL, RDF, RDFS, FOAF, XSD


In [7]:
def loadOntology(urionto):
    
    #Method from owlready
    onto = get_ontology(urionto).load()
    
    print("Classes in Ontology: " + str(len(list(onto.classes()))))
    for cls in onto.classes():                
            print("\t"+cls.iri)
    return onto


In [7]:
# using_builtin_libary("worldcities-free-100.csv")
df = pd.read_csv("worldcities-free.csv", sep=',', quotechar='"',escapechar="\\")

In [9]:
#Load ontology
onto = loadOntology("ex.owl")

Classes in Ontology: 4
	http://www.semanticweb.org/zacharias.detorakis/ontologies/2021/2/lab6#ex:country
	http://www.semanticweb.org/zacharias.detorakis/ontologies/2021/2/lab6#ex:capital
	http://www.semanticweb.org/zacharias.detorakis/ontologies/2021/2/lab6#ex:city
	http://www.semanticweb.org/zacharias.detorakis/ontologies/2021/2/lab6#ex:not_capital


In [9]:
g = Graph()
    
ex = Namespace("http://example.org/")
g.bind("ex", ex)

for index, row in df.iterrows():
    
    http://example.org/Tokyo
    http://example.org/Japan
    
    # Create the URIs for the cities and countries
    city = URIRef("http://example.org/"+row['city'].replace(" ", "_"))
    country = URIRef("http://example.org/"+row['country'].replace(" ", "_"))
    g.add((city, RDF.type, ex.city))
    g.add((country, RDF.type, ex.country))
    
    # Add city properties
    g.add((city, ex.name, Literal(row['city'])))
    g.add((city, ex.city_ascii, Literal(row['city_ascii'])))
    g.add((city, ex.admin_name, Literal(row['admin_name'])))
    g.add((city, ex.lattitude, Literal(row['lat'])))
    g.add((city, ex.longitude, Literal(row['lng'])))
    g.add((city, ex.population, Literal(row['population'])))
    g.add((city, ex.has_country, country))
    if (row['capital'] == row['capital']): #check for nan
        g.add((city, ex.capital_status, Literal(row['capital'])))

    # Add country data properties
    g.add((country, ex.iso2_code,  Literal(row['iso2'])))
    g.add((country, ex.iso3_code,  Literal(row['iso3'])))
    g.add((country, ex.name,  Literal(row['country'])))
    
    if (row['capital'] == 'primary'):
        g.add((country, ex.has_capital, city))
    
# print(g.serialize(format="turtle").decode("utf-8"))
g.serialize(destination='lab3_data_graph-full.ttl', format='ttl')

In [21]:
# Initialise the graph
g = Graph()

# parse the data created in task 3
g.parse("lab3_data_graph.ttl", format="ttl")
# g.parse("lab3_data_graph-full.ttl", format="ttl")

# construct and run the SPARQL query
qres = g.query(
    """SELECT ?name ?capital ?population ?country_name where {
      ?city rdf:type ex:city;
              ex:name ?name;
              ex:population ?population;
              ex:capital_status ?capital;
              ex:has_country ?country.
      ?country ex:name ?country_name.
              
      FILTER (STR(?capital)="primary").
      FILTER (?population>5000000).
      
    }
    ORDER BY ?country_name
    """)
print(len(qres))

print(f"'country_name','city_name','capital','population'")
for row in qres:
#     Row is a list of matched RDF terms: URIs, literals or blank nodes
    print(f"'{str(row.country_name)}','{str(row.name)}','{str(row.capital)}',{str(row.population)}")

24
'country_name','city_name','capital','population'
'Angola','Luanda','primary',8417000
'Argentina','Buenos Aires','primary',16157000
'Bangladesh','Dhaka','primary',15443000
'Chile','Santiago','primary',7007000
'China','Beijing','primary',19433000
'Colombia','Bogotá','primary',9464000
'Congo (Kinshasa)','Kinshasa','primary',13528000
'Egypt','Cairo','primary',19372000
'France','Paris','primary',11020000
'Indonesia','Jakarta','primary',34540000
'Iran','Tehran','primary',13633000
'Japan','Tokyo','primary',37977000
'Korea, South','Seoul','primary',21794000
'Malaysia','Kuala Lumpur','primary',8285000
'Mexico','Mexico City','primary',20996000
'Peru','Lima','primary',9848000
'Philippines','Manila','primary',23088000
'Russia','Moscow','primary',17125000
'Saudi Arabia','Riyadh','primary',6881000
'Sudan','Khartoum','primary',7282000
'Tanzania','Dar es Salaam','primary',6698000
'Thailand','Bangkok','primary',17066000
'United Kingdom','London','primary',10979000
'Vietnam','Hanoi','primary',778500

In [6]:
len(qres)

24

In [1]:
'''
Created on 19 Mar 2019

@author: ejimenez-ruiz
'''

'''
Parent lookup class
'''

import json
from pprint import pprint
import time
from urllib import parse, request

from entity import KGEntity 


class Lookup(object):
    '''
    classdocs
    '''
    def __init__(self, lookup_url):
        self.service_url = lookup_url
        
        
    def getJSONRequest(self, params, attempts=3):
        
        
        try:
            #urllib has been split up in Python 3. 
            #The urllib.urlencode() function is now urllib.parse.urlencode(), 
            #and the urllib.urlopen() function is now urllib.request.urlopen().
            #url = service_url + '?' + urllib.urlencode(params)
            url = self.service_url + '?' + parse.urlencode(params)
            #print(url)
            #response = json.loads(urllib.urlopen(url).read())
            
            
            req = request.Request(url)
            #Customize headers. For example dbpedia lookup returns xml by default
            req.add_header('Accept', 'application/json')
            
            
            #print(request.urlopen(req).read())
            response = json.loads(request.urlopen(req).read())
            
            return response
        
        except:
            
            print("Lookup '%s' failed. Attempts: %s" % (url, str(attempts)))
            time.sleep(60) #to avoid limit of calls, sleep 60s
            attempts-=1
            if attempts>0:
                return self.getJSONRequest(params, attempts)
            else:
                return None
    
        
        
'''
DBpedia lookup access
'''
class DBpediaLookup(Lookup):
    '''
    classdocs
    
    '''
    
    def __init__(self):
        '''
        Constructor
        '''
        super().__init__(self.getURL())
        
        
    def getURL(self):
        #OLD lookup: https://github.com/dbpedia/lookup
        #return "http://lookup.dbpedia.org/api/search/KeywordSearch"
        
        #NEW lookup: https://github.com/dbpedia/lookup-application
        #return "http://akswnc7.informatik.uni-leipzig.de/lookup/api/search"
        return "http://lookup.dbpedia.org/api/search"
        
        #TODO: prefix search allows for partial searches
        #return "http://lookup.dbpedia.org/api/search/PrefixSearch"
        
        
        
        
    
    def __createParams(self, query, limit, query_cls=''):
        
        if query_cls=='':
            params = {
                'query': query,
                'maxResults': limit,
                'format': 'json',
            }
            
        else:
            params = {
                'typeName' : query_cls,
                'query': query,
                'maxResults': limit,
                'format': 'json'
            }
            #'QueryClass' : query_cls,
            #'QueryString': query,
            #'MaxHits': limit,
        
        return params
        
        
    def getKGName(self):
        return 'DBpedia'
    
    
    
     
    
    
    '''
    Returns list of ordered entities according to relevance: dbpedia
    '''
    def __extractKGEntities(self, json, filter=''):
        
        entities = list()
        
        for element in json['docs']:
            
            types = set()
            
            #print(element)
            
            if 'type' in element:
                for t in element['type']:
                    if t != 'http://www.w3.org/2002/07/owl#Thing':
                        if t.startswith('http://dbpedia.org/ontology/') or t.startswith('http://www.wikidata.org/entity/') or t.startswith('http://schema.org/'): 
                            types.add(t)
                
            description=''
            if 'comment' in element:
                description = element['comment']
                
            ##Expected only one
            uri=''
            if 'resource' in element:
                for u in element['resource']:
                    uri=u
                
            ##Expected only one
            label=''
            if 'label' in element:
                for l in element['label']:
                    label=l
            
            kg_entity = KGEntity(
                uri,
                label,
                description,
                types,
                self.getKGName()
                )
            
            #We filter according to given URI
            if filter=='' or uri==filter:
                entities.append(kg_entity)
            #print(kg_entity)
        
        #for entity in entities:
        #    print(entity)    
        return entities
    
    
    def getKGEntities(self, query, limit, filter=''):        
        json = self.getJSONRequest(self.__createParams(query, limit), 3)
        
        if json==None:
            print("None results for", query)
            return list()
        
        return self.__extractKGEntities(json, filter) #Optionally filter by URI
    
    
    
    
'''
Wikidata web search API
'''
class WikidataAPI(Lookup):
    '''
    classdocs
    
    '''
    
    def __init__(self):
        '''
        Constructor
        '''
        super().__init__(self.getURL())
        
        
    def getURL(self):
        return "https://www.wikidata.org/w/api.php"
    
    
    
    
    
    
    def __createParams(self, query, limit, type='item'):
        
        params = {
            'action': 'wbsearchentities',
            'format' : 'json',
            'search': query,
            'type': type,
            'limit': limit,
            'language' : 'en'
        
            
        }
        
        return params
    
    
    def getKGName(self):
        return 'Wikidata'
    
    '''
    Returns list of ordered entities according to relevance: wikidata
    '''
    def __extractKGEntities(self, json, filter=''):
        
        entities = list()
        
        for element in json['search']:
            
            #empty list of type from wikidata lookup
            types = set()
            
            description=''
            if 'description' in element:
                description = element['description']
            
            kg_entity = KGEntity(
                element['concepturi'],
                element['label'],
                description,
                types,
                self.getKGName()
                )
            
            
            #We filter according to givem URI
            if filter=='' or element['concepturi']==filter:
                entities.append(kg_entity)
            
            
            
        #for entity in entities:
        #    print(entity)    
        return entities
    
    
    
    def getKGEntities(self, query, limit, type='item', filter=''):        
        json = self.getJSONRequest(self.__createParams(query, limit, type), 3)     
        
        if json==None:
            print("None results for", query)
            return list()
           
        return self.__extractKGEntities(json, filter) #Optionally filter by URI
    
        
    
    
'''    
Entity search for Google KG
'''
class GoogleKGLookup(Lookup):
    
    
    def __init__(self):
        '''
        Constructor
        '''
        super().__init__(self.getURL())
         
    
        self.api_key = 'AIzaSyA6Bf9yuMCCPh7vpElzrfBvE2ENCVWr-84'
        #open('.api_key').read()
    
        

        
    #def getAPIKey(self):
    #    return self.api_key

    def getURL(self):
        return 'https://kgsearch.googleapis.com/v1/entities:search'

    def __createParams(self, query, limit):
        
        params = {
            'query': query,
            'limit': limit,
            'indent': True,
            'key': self.api_key,
        }
        
        return params
    
    
    def getKGName(self):
        return 'GoogleKG'
    
    
    
    '''
    Returns list of ordered entities according to relevance: google
    '''
    def __extractKGEntities(self, json, filter=''):
        
        entities = list()
        
        for element in json['itemListElement']:
            
            types = set()
            
            for t in element['result']['@type']:
                if t != 'Thing':
                    types.add("http://schema.org/"+t)
            
            
            description=''
            if 'description' in  element['result']:
                description = element['result']['description']
            
            
            kg_entity = KGEntity(
                element['result']['@id'],
                element['result']['name'],
                description,
                types,
                self.getKGName()
                )
            
            #We filter according to givem URI
            if filter=='' or element['result']['@id']==filter:
                entities.append(kg_entity)
            #print(kg_entity)
        
        #for entity in entities:
        #    print(entity)    
        return entities
    
    
    
    def getKGEntities(self, query, limit, filter=''):    
        json = self.getJSONRequest(self.__createParams(query, limit), 3)
        
        if json==None:
            print("None results for", query)
            return list()
        
        return self.__extractKGEntities(json, filter) #Optionally filter by URI
    
    
        

if __name__ == '__main__':
    
    
    #query="Chicago Bulls"
    query="Congo"
    
    #Max entities to be returned
    limit=10
    
    print("Entities from Google KG:")
    kg = GoogleKGLookup()
    entities = kg.getKGEntities(query, limit)
    for ent in  entities:
        print(ent)
    
    print("\n")
    
    dbpedia = DBpediaLookup()
    entities = dbpedia.getKGEntities(query, limit)
    print("Entities from DBPedia:")
    for ent in  entities:
        print(ent)
    
    
    print("\n")
    
    
    type="item"
    #type="property"
    wikidata = WikidataAPI()
    entities = wikidata.getKGEntities(query, limit, "item")
    print("Entities from Wikidata:")
    for ent in  entities:
        print(ent)
        
        

Entities from Google KG:
<id: kg:/m/0c2ty, label: Congo River, description: River in Africa, types: {'http://schema.org/RiverBodyOfWater', 'http://schema.org/BodyOfWater', 'http://schema.org/Place', 'http://schema.org/TouristAttraction'}, source: GoogleKG>
<id: kg:/m/088xp, label: Democratic Republic of the Congo, description: Country in Central Africa, types: {'http://schema.org/Country', 'http://schema.org/AdministrativeArea', 'http://schema.org/Place'}, source: GoogleKG>
<id: kg:/m/07x7c8, label: Convict cichlid, description: Fish, types: set(), source: GoogleKG>
<id: kg:/m/01rxw, label: Republic of the Congo, description: Country in Central Africa, types: {'http://schema.org/Country', 'http://schema.org/AdministrativeArea', 'http://schema.org/Place'}, source: GoogleKG>
<id: kg:/m/02mymm, label: Kongo language, description: Spoken language, types: set(), source: GoogleKG>
<id: kg:/m/041dk7, label: Congo national football team, description: Football team, types: {'http://schema.org/O

In [28]:
entities[2].id

AttributeError: 'KGEntity' object has no attribute 'id'

In [17]:
entities[0]

<id: http://www.wikidata.org/entity/Q974, label: Democratic Republic of the Congo, description: sovereign state in Central Africa, types: set(), source: Wikidata>

In [37]:
for p in entities:
        print(p.ident)

http://www.wikidata.org/entity/Q974
http://www.wikidata.org/entity/Q3503
http://www.wikidata.org/entity/Q35930
http://www.wikidata.org/entity/Q640090
http://www.wikidata.org/entity/Q2009615
http://www.wikidata.org/entity/Q598860
http://www.wikidata.org/entity/Q775697
http://www.wikidata.org/entity/Q1125771
http://www.wikidata.org/entity/Q2597556
http://www.wikidata.org/entity/Q971
