In [3]:
# import data from csv.gz
import pandas as pd

# import data
file = 'vertiefungsbeispiel-gdp.csv.gz'

# renaming columns with predicate name
column_names = ['default:rank', 'country', 'default:imfGDP', 'default:unGDP', 'default:gdpPerCapita', 'dpo:populationTotal']

# read csv
df = pd.read_csv(file, compression='gzip', names=column_names, skiprows=1)

from rdflib import Namespace, Graph, Literal
from rdflib.collection import Collection
from rdflib import FOAF, DC

DPO = Namespace('https://dbpedia.org/')
default = Namespace('http://moodle.fhgr.ch/mod/resource/view.php/')

g = Graph()
for idx, (rank, country, imfGDP, unGDP, gdpPerCapita, populationTotal) in df.iterrows():
    country = country.replace(' ', '_')
    g.add((getattr(default, country), default.rank, Literal(rank)))
    g.add((getattr(default, country), default.imfGDP, Literal(imfGDP)))
    g.add((getattr(default, country), default.unGDP, Literal(unGDP)))
    g.add((getattr(default, country), default.gdpPerCapita, Literal(gdpPerCapita)))
    g.add((getattr(default, country), DPO.populationTotal, Literal(populationTotal)))
    print(((getattr(default, country), default.rank, Literal(rank))))

print(g)

(rdflib.term.URIRef('http://moodle.fhgr.ch/mod/resource/view.php/United_States'), rdflib.term.URIRef('http://moodle.fhgr.ch/mod/resource/view.php/rank'), rdflib.term.Literal('1', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer')))
(rdflib.term.URIRef('http://moodle.fhgr.ch/mod/resource/view.php/China'), rdflib.term.URIRef('http://moodle.fhgr.ch/mod/resource/view.php/rank'), rdflib.term.Literal('2', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer')))
(rdflib.term.URIRef('http://moodle.fhgr.ch/mod/resource/view.php/Japan'), rdflib.term.URIRef('http://moodle.fhgr.ch/mod/resource/view.php/rank'), rdflib.term.Literal('3', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer')))
(rdflib.term.URIRef('http://moodle.fhgr.ch/mod/resource/view.php/Germany'), rdflib.term.URIRef('http://moodle.fhgr.ch/mod/resource/view.php/rank'), rdflib.term.Literal('4', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer')))
(rdflib.te

In [1]:
from rdflib import Graph, Namespace, BNode, Literal
from rdflib import FOAF, DC
import gzip
import csv
import os


if __name__ == "__main__":


    input_file_path = "./vertiefungsbeispiel-gdp.csv.gz"

    # Check if expected input file was provided...
    if not os.path.isfile(input_file_path):
       print(f"Expected to find file {input_file_path} but it was not found")
       exit()

    # Create a graph object which can be queried against
    g = Graph()

    # Note that we need to read this file in 'text mode' therefore 'rt'
    with gzip.open(input_file_path, "rt") as f:
        csv_reader = csv.reader(f)

        # Advance the iterator once so we skip the first row which contains the headers
        next(csv_reader)

        # Define namespaces.
        default = Namespace("http://www.fhgr.ch/ke-e/gdp/2023")
        dpo = Namespace("https://dbpedia.org/")

        g.bind("default", default)
        g.bind("dpo", dpo)

        QUERY = '''
            SELECT ?name ?rank
            WHERE {
                ?s default:rank ?rank .
                ?s foaf:name ?name .
            }
            ORDER BY ASC (?rank)
        '''
        # Note that the order must be the same as the headers in the csv file...
        for rank, country, imf_gdp, un_gdp, gdp_per_capita, pop in csv_reader:
            # Convert e.g "United States" into "United_States"
            country_name = country.strip().replace(' ', '_').replace('-', '_')
            country_node = BNode()
            country_name = Literal(country_name)
            rank = Literal(int(rank))
            gdp_per_capita = Literal(gdp_per_capita)
            imf_gdp = Literal(imf_gdp)
            un_gdp = Literal(un_gdp)
            population_total = Literal(pop)

            g.add((country_node, default.rank, rank))
            g.add((country_node, FOAF.name, country_name))
            g.add((country_node, default.gdpPerCapita, gdp_per_capita))
            g.add((country_node, default.imfGDP, imf_gdp))
            g.add((country_node, default.unGDP, un_gdp))
            g.add((country_node, dpo.populationTotal, population_total))

        #for _, _, name in g.triples((None, FOAF.name, None)):
            #print(name)

        for name, rank in g.query(QUERY):
            print(f"Name: {name} Rank: {rank}")

Name: United_States Rank: 1
Name: China Rank: 2
Name: Japan Rank: 3
Name: Germany Rank: 4
Name: India Rank: 5
Name: United_Kingdom Rank: 6
Name: France Rank: 7
Name: Italy Rank: 8
Name: Brazil Rank: 9
Name: Canada Rank: 10
Name: South_Korea Rank: 11
Name: Russia Rank: 12
Name: Spain Rank: 13
Name: Australia Rank: 14
Name: Mexico Rank: 15
Name: Indonesia Rank: 16
Name: Netherlands Rank: 17
Name: Turkey Rank: 18
Name: Saudi_Arabia Rank: 19
Name: Switzerland Rank: 20
Name: Poland Rank: 21
Name: Taiwan Rank: 22
Name: Sweden Rank: 23
Name: Belgium Rank: 24
Name: Thailand Rank: 25
Name: Argentina Rank: 26
Name: Nigeria Rank: 27
Name: Iran Rank: 28
Name: Austria Rank: 29
Name: United_Arab_Emirates Rank: 30
Name: Norway Rank: 31
Name: Ireland Rank: 32
Name: Israel Rank: 33
Name: Hong_Kong Rank: 34
Name: Malaysia Rank: 35
Name: Singapore Rank: 36
Name: Philippines Rank: 37
Name: South_Africa Rank: 38
Name: Denmark Rank: 39
Name: Colombia Rank: 40
Name: Bangladesh Rank: 41
Name: Egypt Rank: 42
N