In [1]:
import csv
import pandas as pd
from owlready2 import *

from rdflib import Graph
from rdflib import URIRef, BNode, Literal
from rdflib import Namespace
from rdflib.namespace import OWL, RDF, RDFS, FOAF, XSD


In [2]:
def loadOntology(urionto):
    
    #Method from owlready
    onto = get_ontology(urionto).load()
    
    print("Classes in Ontology: " + str(len(list(onto.classes()))))
    for cls in onto.classes():                
            print("\t"+cls.iri)
    return onto


In [3]:
# using_builtin_libary("worldcities-free-100.csv")
df = pd.read_csv("worldcities-free-100.csv", sep=',', quotechar='"',escapechar="\\")

In [101]:
#Load ontology
onto = loadOntology("ex.owl")

Classes in Ontology: 4
	http://www.semanticweb.org/zacharias.detorakis/ontologies/2021/2/lab6#ex:country
	http://www.semanticweb.org/zacharias.detorakis/ontologies/2021/2/lab6#ex:capital
	http://www.semanticweb.org/zacharias.detorakis/ontologies/2021/2/lab6#ex:city
	http://www.semanticweb.org/zacharias.detorakis/ontologies/2021/2/lab6#ex:not_capital


In [4]:
g = Graph()
    
ex = Namespace("http://example.org/")
g.bind("ex", ex)

for index, row in df.iterrows():
    
    # Create the URIs for the cities and countries
    city = URIRef("http://example.org/"+row['city'].replace(" ", "_"))
    country = URIRef("http://example.org/"+row['country'].replace(" ", "_"))
    g.add((city, RDF.type, ex.city))
    g.add((country, RDF.type, ex.country))
    
    # Add city properties
    g.add((city, ex.name, Literal(row['city'])))
    g.add((city, ex.city_ascii, Literal(row['city_ascii'])))
    g.add((city, ex.admin_name, Literal(row['admin_name'])))
    g.add((city, ex.lattitude, Literal(row['lat'])))
    g.add((city, ex.longitude, Literal(row['lng'])))
    g.add((city, ex.population, Literal(row['population'])))
    g.add((city, ex.has_country, country))
    if (row['capital'] == row['capital']): #check for nan
        g.add((city, ex.capital_status, Literal(row['capital'])))

    # Add country data properties
    g.add((country, ex.iso2_code,  Literal(row['iso2'])))
    g.add((country, ex.iso3_code,  Literal(row['iso3'])))
    g.add((country, ex.name,  Literal(row['country'])))
    
    if (row['capital'] == 'primary'):
        g.add((country, ex.has_capital, city))
    
# print(g.serialize(format="turtle").decode("utf-8"))
g.serialize(destination='lab3_data_graph.ttl', format='ttl')

In [5]:

qres = g.query(
    """SELECT ?name ?capital ?population ?country_name where {
      ?city rdf:type ex:city;
              ex:name ?name;
              ex:population ?population;
              ex:capital_status ?capital;
              ex:has_country ?country.
      ?country ex:name ?country_name.
              
      FILTER (STR(?capital)="primary").
      FILTER (?population>5000000).
      
    }""")

print(f"'country_name','city_name','capital','{str(row.population)}'")
for row in qres:
#     Row is a list of matched RDF terms: URIs, literals or blank nodes
    print(f"'{str(row.country_name)}','{str(row.name)}','{str(row.capital)}',{str(row.population)}")

'country_name','city_name','capital','6048700'
'Argentina','Buenos Aires','primary',16157000
'Korea, South','Seoul','primary',21794000
'Russia','Moscow','primary',17125000
'Philippines','Manila','primary',23088000
'Mexico','Mexico City','primary',20996000
'Chile','Santiago','primary',7007000
'Egypt','Cairo','primary',19372000
'Indonesia','Jakarta','primary',34540000
'Sudan','Khartoum','primary',7282000
'Saudi Arabia','Riyadh','primary',6881000
'Malaysia','Kuala Lumpur','primary',8285000
'Vietnam','Hanoi','primary',7785000
'Peru','Lima','primary',9848000
'Thailand','Bangkok','primary',17066000
'Japan','Tokyo','primary',37977000
'Bangladesh','Dhaka','primary',15443000
'Tanzania','Dar es Salaam','primary',6698000
'Angola','Luanda','primary',8417000
'Iran','Tehran','primary',13633000
'Congo (Kinshasa)','Kinshasa','primary',13528000
'France','Paris','primary',11020000
'Colombia','Bogotá','primary',9464000
'United Kingdom','London','primary',10979000
'China','Beijing','primary',19433000
