In [14]:
import csv, ujson, time
import pandas as pd
from rdflib import Graph
from pathlib import Path

In [2]:
EXAMPLE_DIR = Path("/workspace/drepr/drepr/examples/life_expectancy")

In [3]:
!ls {EXAMPLE_DIR}

life_expectancy.csv     rdf_data_cube.model.yml rdf_data_cube.out


In [5]:
pd.read_csv(EXAMPLE_DIR / "life_expectancy.csv", header=None)

Unnamed: 0,0,1,2,3,4,5,6
0,,2004-2006,,2005-2007,,2006-2008,
1,,Male,Female,Male,Female,Male,Female
2,Newport,76.7,80.7,77.1,80.9,77.0,81.5
3,Cardiff,78.7,83.3,78.6,83.7,78.7,83.4
4,Monmouthshire,76.6,81.3,76.5,81.5,76.6,81.7
5,Merthyr Tydfil,75.5,79.1,75.5,79.4,74.9,79.6


In [80]:
!cargo build 2>/dev/null

In [7]:
!cargo build 2>/dev/null
!RUST_BACKTRACE=1 PYTHONHOME=/Users/rook/anaconda3/ cargo run -- \
    {EXAMPLE_DIR}/rdf_data_cube.model.yml \
    ttl:file:{EXAMPLE_DIR}/rdf_data_cube.out \
    default:{EXAMPLE_DIR}/life_expectancy.csv

[0m[0m[1m[32m    Finished[0m dev [unoptimized + debuginfo] target(s) in 0.25s
[0m[0m[1m[32m     Running[0m `/workspace/drepr/drepr/target/debug/drepr /workspace/drepr/drepr/examples/life_expectancy/rdf_data_cube.model.yml 'ttl:file:/workspace/drepr/drepr/examples/life_expectancy/rdf_data_cube.out' 'default:/workspace/drepr/drepr/examples/life_expectancy/life_expectancy.csv'`
>>> [D-REPR] runtime: 22.019915ms


In [6]:
!RUST_BACKTRACE=1 PYTHONHOME=/Users/rook/anaconda3/ cargo run --release -- \
    {EXAMPLE_DIR}/rdf_data_cube.model.yml \
    graph_json:file:/tmp/rdf_data_cube.out \
    default:/tmp/life_expectancy.csv

[0m[0m[1m[32m    Finished[0m release [optimized] target(s) in 0.31s
[0m[0m[1m[32m     Running[0m `/workspace/drepr/drepr/target/release/drepr /workspace/drepr/drepr/examples/life_expectancy/rdf_data_cube.model.yml 'graph_json:file:/tmp/rdf_data_cube.out' 'default:/tmp/life_expectancy.csv'`
>>> [D-REPR] runtime: 7.431387299s


In [16]:
start = time.time()
with open("/tmp/life_expectancy.csv", "r") as f:
    rows = csv.reader(f, delimiter=",")
    year = next(rows)
    for i, v in enumerate(year[1:], start=1):
        if v == "":
            v = year[i-1]
        v = "http://reference.data.gov.uk/id/gregorian-interval/" + v.split("-")[0] + "-01-01T00:00:00/P3Y"
        year[i] = v

    gender = next(rows)
    records = []
    id = 0
    for line in rows:
        for i in range(1, len(line)):
            record = {
                "id": id,
                "data": {
                    "@type": "qb:Observation",
                    "eg:refArea": line[0],
                    "eg:gender": gender[i],
                    "eg:refPeriod": year[i],
                    "smdx-measure:obsValue": line[i]
                },
                "outlinks": []
            }
            id += 1
            records.append(record)
with open("/tmp/out.json", "w") as f:
    for r in records:
        s = ujson.dumps(r)
        f.write(s)
        f.write("\n")
end = time.time()
print('runtime', end - start, "seconds")

runtime 2.761937141418457 seconds


In [8]:
!cat {EXAMPLE_DIR}/rdf_data_cube.out

@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix qb: <http://purl.org/linked-data/cube#> .
@prefix eg: <http://example.org/> .
@prefix smdx-measure: <http://purl.org/linked-data/sdmx/2009/measure#> .


_:Observation0_2_1 a qb:Observation;
	eg:gender "Male";
	eg:refArea "Newport";
	smdx-measure:obsValue 76.7;
	eg:refPeriod <http://reference.data.gov.uk/id/gregorian-interval/2004-01-01T00:00:00/P3Y>;
	.
_:Observation0_2_2 a qb:Observation;
	eg:gender "Female";
	eg:refArea "Newport";
	smdx-measure:obsValue 80.7;
	eg:refPeriod <http://reference.data.gov.uk/id/gregorian-interval/2004-01-01T00:00:00/P3Y>;
	.
_:Observation0_2_3 a qb:Observation;
	eg:gender "Male";
	eg:refArea "Newport";
	smdx-measure:obsValue 77.1;
	eg:refPeriod <http://reference.data.gov.uk/id/gregorian-interval/2005-01-01T00:00:00/P3Y>;
	.
_:Obse

In [171]:
g = Graph()
g.parse(source=str(EXAMPLE_DIR / "rdf_data_cube.out"), format="ttl")
print(g.serialize(format="n3").decode())

@prefix eg: <http://example.org/> .
@prefix qb: <http://purl.org/linked-data/cube#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix smdx-measure: <http://purl.org/linked-data/sdmx/2009/measure#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

[] a qb:Observation ;
    eg:gender "Male" ;
    eg:refArea "Newport" ;
    eg:refPeriod <http://reference.data.gov.uk/id/gregorian-interval/2004-01-01T00:00:00/P3Y> ;
    smdx-measure:obsValue 76.7 .

[] a qb:Observation ;
    eg:gender "Female" ;
    eg:refArea "Cardiff" ;
    eg:refPeriod <http://reference.data.gov.uk/id/gregorian-interval/2005-01-01T00:00:00/P3Y> ;
    smdx-measure:obsValue 83.7 .

[] a qb:Observation ;
    eg:gender "Male" ;
    eg:refArea "Cardiff" ;
    eg:refPeriod <http://reference.data.gov.uk/id/gregorian-interval/2006-01-01T00:00:00/P3Y> ;
    smdx-measure:obsValue 78.7 .

[] a qb:Ob