# Add Derived Graphs To The Tutorial Graph



In [1]:
import io
import os
import subprocess
import sys

import numpy as np
import pandas as pd
from IPython.display import display, HTML

import papermill as pm

sys.path.insert(0,'..')
from configure_kgtk_notebooks import ConfigureKGTK

User home: /Users/pedroszekely
Current dir: /Users/pedroszekely/Documents/GitHub/kgtk/tutorial
Use-cases dir: /Users/pedroszekely/Documents/GitHub/kgtk/use-cases


In [2]:
# Parameters

# Folder on local machine where to create the output and temporary folders
input_path = "/Users/pedroszekely/Downloads/kypher/projects/build-tutorial"
output_path = "/Users/pedroszekely/Downloads/kypher/projects"
project_name = "tutorial-derived-graphs"

In [3]:
files = [
    "all"
]
ck = ConfigureKGTK()
ck.configure_kgtk(input_graph_path=input_path,
                  output_path=output_path,
                  project_name=project_name)

In [4]:
os.environ['KGTK_LABEL_FILE'] = "{}".format(os.environ['label']) 

In [5]:
ck.print_env_variables(files)

USE_CASES_DIR: /Users/pedroszekely/Documents/GitHub/kgtk/use-cases
GRAPH: /Users/pedroszekely/Downloads/kypher/projects/build-tutorial
STORE: /Users/pedroszekely/Downloads/kypher/projects/tutorial-derived-graphs/temp.tutorial-derived-graphs/wikidata.sqlite3.db
OUT: /Users/pedroszekely/Downloads/kypher/projects/tutorial-derived-graphs
kgtk: kgtk
TEMP: /Users/pedroszekely/Downloads/kypher/projects/tutorial-derived-graphs/temp.tutorial-derived-graphs
EXAMPLES_DIR: /Users/pedroszekely/Documents/GitHub/kgtk/examples
kypher: kgtk query --graph-cache /Users/pedroszekely/Downloads/kypher/projects/tutorial-derived-graphs/temp.tutorial-derived-graphs/wikidata.sqlite3.db
all: /Users/pedroszekely/Downloads/kypher/projects/build-tutorial/all.tsv.gz


Turn on debugging for kypher

In [6]:
os.environ['kypher'] = "kgtk --debug query --graph-cache " + os.environ['STORE']

In [7]:
!echo "$kypher"

kgtk --debug query --graph-cache /Users/pedroszekely/Downloads/kypher/projects/tutorial-derived-graphs/temp.tutorial-derived-graphs/wikidata.sqlite3.db


Load all my files into the kypher cache so that all graph aliases are defined

In [8]:
ck.load_files_into_cache(file_list=files)

kgtk --debug query --graph-cache /Users/pedroszekely/Downloads/kypher/projects/tutorial-derived-graphs/temp.tutorial-derived-graphs/wikidata.sqlite3.db -i "/Users/pedroszekely/Downloads/kypher/projects/build-tutorial/all.tsv.gz" --as all  --limit 3
[2021-10-03 17:15:03 query]: SQL Translation:
---------------------------------------------
  SELECT *
     FROM graph_1 AS graph_1_c1
     LIMIT ?
  PARAS: [3]
---------------------------------------------
node1	label	node2	id
P10	P31	Q18610173	P10-P31-Q18610173-85ef4d24-0
P1000	P31	Q18608871	P1000-P31-Q18608871-093affb5-0
P1001	P1647	P276	P1001-P1647-P276-e4e44f83-0


In [9]:
%cd {os.environ['OUT']}

/Users/pedroszekely/Downloads/kypher/projects/tutorial-derived-graphs


In [10]:
!ls

[34mtemp.tutorial-derived-graphs[m[m


In [11]:
!ls {os.environ['all']}

/Users/pedroszekely/Downloads/kypher/projects/build-tutorial/all.tsv.gz


In [12]:
!zcat < {os.environ['all']} | grep data | head

P1001	datatype	wikibase-item	P1001-datatype
P1003	datatype	external-id	P1003-datatype
P1004	datatype	external-id	P1004-datatype
P1005	datatype	external-id	P1005-datatype
P1006	datatype	external-id	P1006-datatype
P101	datatype	wikibase-item	P101-datatype
P1010	datatype	external-id	P1010-datatype
P1011	datatype	wikibase-item	P1011-datatype
P1012	datatype	wikibase-item	P1012-datatype
P1013	datatype	wikibase-item	P1013-datatype


In [30]:
!kgtk filter --verbose=False --use-mgzip=True --first-match-only \
--input-file /Users/pedroszekely/Downloads/kypher/projects/tutorial-derived-graphs/parts/all.tsv.gz \
-p '; datatype ;' \
-o /Users/pedroszekely/Downloads/kypher/projects/tutorial-derived-graphs/parts/metadata.property.datatypes.tsv.gz 

In [13]:
pm.execute_notebook(
    os.environ["EXAMPLES_DIR"] + "/partition-wikidata.ipynb",
    os.environ["TEMP"] + "/partition-wikidata.out.ipynb",
    parameters=dict(
        wikidata_input_path = input_path + "/all.tsv.gz",
        wikidata_parts_path = os.environ["OUT"] + "/parts",
        temp_folder_path = os.environ["OUT"] + "/parts/temp",
        sort_extras = "--buffer-size 30% --temporary-directory $OUT/parts/temp",
        verbose = False,
        gzip_command = 'gzip'
    )
)
;

Executing:   0%|          | 0/49 [00:00<?, ?cell/s]

''

In [18]:
!echo $OUT

/Users/pedroszekely/Downloads/kypher/projects/tutorial-derived-graphs


In [12]:
os.environ["TEMP"]

'/Users/pedroszekely/Downloads/kypher/projects/tutorial-derived-graphs/temp.tutorial-derived-graphs'

In [17]:
pm.execute_notebook(
    os.environ["USE_CASES_DIR"] + "/Wikidata Useful Files.ipynb",
    os.environ["TEMP"] + "/Wikidata Useful Files Out.ipynb",
    parameters=dict(
        output_path = os.environ["OUT"],
        output_folder = "useful_files",
        temp_folder = "temp.useful_files",
        wiki_root_folder = os.environ["OUT"] + "/parts/",
        cache_path = os.environ["TEMP"],
        languages = 'en',
        compute_pagerank = True,
        delete_database = False,
        debug = "true"
    )
)
;

Executing:   0%|          | 0/157 [00:00<?, ?cell/s]

''

In [20]:
!ls $OUT/parts

aliases.en.tsv.gz                   metadata.property.datatypes.tsv.gz
aliases.tsv.gz                      metadata.types.tsv.gz
all.tsv.gz                          qualifiers.commonsMedia.tsv.gz
claims.commonsMedia.tsv.gz          qualifiers.external-id.tsv.gz
claims.external-id.tsv.gz           qualifiers.geo-shape.tsv.gz
claims.geo-shape.tsv.gz             qualifiers.globe-coordinate.tsv.gz
claims.globe-coordinate.tsv.gz      qualifiers.math.tsv.gz
claims.math.tsv.gz                  qualifiers.monolingualtext.tsv.gz
claims.monolingualtext.tsv.gz       qualifiers.musical-notation.tsv.gz
claims.musical-notation.tsv.gz      qualifiers.quantity.tsv.gz
claims.other.tsv.gz                 qualifiers.string.tsv.gz
claims.quantity.tsv.gz              qualifiers.tabular-data.tsv.gz
claims.string.tsv.gz                qualifiers.time.tsv.gz
claims.tabular-data.tsv.gz          qualifiers.tsv.gz
claims.time.tsv.gz                  qualifiers.url.tsv.gz
claims.tsv.gz                       quali

In [28]:
!cp -p $OUT/parts/*.tsv.gz $OUT/../tutorial-files

In [29]:
!ls $OUT/useful_files

aliases.en.tsv.gz                      dwd_isa_class_count.compact.tsv.gz
derived.P279.tsv.gz                    item.property.count.compact.tsv.gz
derived.P279star.tsv.gz                labels.en.tsv.gz
derived.P31.tsv.gz                     metadata.in_degree.tsv.gz
derived.P31P279star.tsv.gz             metadata.out_degree.tsv.gz
derived.P31_39_106_279star.tsv.gz      metadata.pagerank.directed.tsv.gz
derived.dwd.count.tsv.gz               metadata.pagerank.undirected.tsv.gz
derived.dwd_isa.tsv.gz                 statistics.in_degree.distribution.tsv
derived.isa.tsv.gz                     statistics.out_degree.distribution.tsv
derived.isastar.tsv.gz                 [34mtemp.useful_files[m[m
descriptions.en.tsv.gz


In [30]:
!cp $OUT/useful_files/*.tsv.gz $OUT/../tutorial-files