In [1]:
# Parameters

# Folder on local machine where to create the output and temporary folders
output_path = "/Users/pedroszekely/Downloads/kypher"

# The names of the output and temporary folders
output_folder = "kgtk"
temp_folder = "temp.kgtk"

# The location of input Wikidata files
wikidata_folder = "/Volumes/GoogleDrive/Shared drives/KGTK/datasets/wikidata-20210215/data/"

# TDM data
tdm_folder = "/Volumes/GoogleDrive/Shared drives/FAAST-ISI/TDM-data/data/kgtk/"

# FactSet data
factset_folder = "/Volumes/GoogleDrive/Shared drives/FAAST-ISI/FactSet-data/kgtk_files/"

# Location of the cache database for kypher
cache_path = "/Users/pedroszekely/Downloads/kypher"

# shortcuts to commands
kgtk = "time kgtk --debug"
# kgtk = "kgtk --debug"

In [17]:
import io
import os
import subprocess
import sys

import numpy as np
import pandas as pd

import papermill as pm

In [3]:
# The names of files in the KGTK Wikidata distirbution that we will use in this notebook.
file_names = {
    "claims": "claims.tsv.gz",
    "quantity": "claims.quantity.tsv.gz",
    "time": "claims.time.tsv.gz",
    "label": "labels.en.tsv.gz",
    "alias": "aliases.en.tsv.gz",
    "description": "descriptions.en.tsv.gz",
    "item": "claims.wikibase-item.tsv.gz",
    "property": "claims.wikibase-property.tsv.gz",
    "monolingualtext": "claims.monolingualtext.tsv.gz",
    "string": "claims.string.tsv.gz",
    "external_id": "claims.external-id.tsv.gz",
    "qualifiers": "qualifiers.tsv.gz",
    "sitelinks": "sitelinks.tsv.gz",
    "qualifiers_time": "qualifiers.time.tsv.gz",
    "coordinates": "claims.globe-coordinate.tsv.gz",
    "property_datatypes": "metadata.property.datatypes.tsv.gz",
    "isa": "derived.isa.tsv.gz",
    "isastar": "derived.isastar.tsv.gz", 
    "p279star": "derived.P279star.tsv.gz",
    "p279": "derived.P279.tsv.gz",
    "p31": "derived.P31.tsv.gz",
    "dwd_isa": "derived.dwd_isa.tsv.gz"
}

# We will define environment variables to hold the full paths to the files as we will use them in the shell commands
kgtk_environment_variables = []

os.environ['WIKIDATA'] = wikidata_folder
kgtk_environment_variables.append('WIKIDATA')

os.environ['TDM'] = tdm_folder
kgtk_environment_variables.append('TDM')

os.environ['FS'] = factset_folder
kgtk_environment_variables.append('FS')

for key, value in file_names.items():
    variable = key.upper()
    os.environ[variable] = wikidata_folder + value
    kgtk_environment_variables.append(variable)
    
# KGTK creates a SQLite database to index the knowledge graph.
if cache_path:
    os.environ['STORE'] = "{}/wikidata.sqlite3.db".format(cache_path)
else:
    os.environ['STORE'] = "{}/{}/wikidata.sqlite3.db".format(output_path, temp_folder)
kgtk_environment_variables.append('STORE')

# We will create many temporary files, so set up a folder for outputs and one for the temporary files.
os.environ['TEMP'] = "{}/{}".format(output_path, temp_folder) 
os.environ['OUT'] = "{}/{}".format(output_path, output_folder) 
kgtk_environment_variables.append('TEMP')
kgtk_environment_variables.append('OUT')

# Envronment variables with shortcuts to the commands we use often
os.environ['kgtk'] = kgtk
# Use for debugging, but careful as it causes import to dataframes to break
os.environ['kypher'] = "time kgtk --debug query --graph-cache " + os.environ['STORE']
# os.environ['kypher'] = "time kgtk query --graph-cache " + os.environ['STORE']
#os.environ['kypher'] = "kgtk query --graph-cache " + os.environ['STORE']
kgtk_environment_variables.append('kgtk')
kgtk_environment_variables.append('kypher')

# We'll save the current working directory so we can call into other example notebooks later
os.environ["EXAMPLES_DIR"] = os.getcwd()
kgtk_environment_variables.append('EXAMPLES_DIR')

kgtk_environment_variables.sort()
for variable in kgtk_environment_variables:
    print("{}: \"{}\"".format(variable, os.environ[variable]))

ALIAS: "/Volumes/GoogleDrive/Shared drives/KGTK/datasets/wikidata-20210215/data/aliases.en.tsv.gz"
CLAIMS: "/Volumes/GoogleDrive/Shared drives/KGTK/datasets/wikidata-20210215/data/claims.tsv.gz"
COORDINATES: "/Volumes/GoogleDrive/Shared drives/KGTK/datasets/wikidata-20210215/data/claims.globe-coordinate.tsv.gz"
DESCRIPTION: "/Volumes/GoogleDrive/Shared drives/KGTK/datasets/wikidata-20210215/data/descriptions.en.tsv.gz"
DWD_ISA: "/Volumes/GoogleDrive/Shared drives/KGTK/datasets/wikidata-20210215/data/derived.dwd_isa.tsv.gz"
EXAMPLES_DIR: "/Users/pedroszekely/Documents/GitHub/kgtk/examples"
EXTERNAL_ID: "/Volumes/GoogleDrive/Shared drives/KGTK/datasets/wikidata-20210215/data/claims.external-id.tsv.gz"
FS: "/Volumes/GoogleDrive/Shared drives/FAAST-ISI/FactSet-data/kgtk_files/"
ISA: "/Volumes/GoogleDrive/Shared drives/KGTK/datasets/wikidata-20210215/data/derived.isa.tsv.gz"
ISASTAR: "/Volumes/GoogleDrive/Shared drives/KGTK/datasets/wikidata-20210215/data/derived.isastar.tsv.gz"
ITEM: "/Vol

In [4]:
def kgtk_to_dataframe(kgtk):
    columns = kgtk[0].split("\t")
    data = []
    for line in kgtk[1:]:
        data.append(line.encode('utf-8').decode('utf-8').split("\t"))
    return pd.DataFrame(data, columns=columns)    

In [5]:
%cd {output_path}

/Users/pedroszekely/Downloads/kypher


Define the shortcuts for Kypher

In [6]:
!$kypher \
-i "$P31" --as p31 \
-i "$P279" --as p279 \
-i "$LABEL" --as labels \
-i "$ALIAS" --as aliases \
-i "$DESCRIPTION" --as descriptions \
-i "$P279STAR" --as p279star \
-i "$QUALIFIERS" --as qualifiers \
-i "$ITEM" --as items \
-i "$CLAIMS" --as claims \
-i "$PROPERTY" --as properties \
-i "$PROPERTY_DATATYPES" --as datatypes \
-i "$QUANTITY" --as quantities \
-i "$TIME" --as times \
-i "$EXTERNAL_ID" --as external_ids \
-i "$COORDINATES" --as coordinates \
-i "$MONOLINGUALTEXT" --as monolingual \
-i "$STRING" --as string \
--limit 3

[2021-09-27 21:55:41 query]: SQL Translation:
---------------------------------------------
  SELECT *
     FROM graph_1 AS graph_1_c1
     LIMIT ?
  PARAS: [3]
---------------------------------------------
id	node1	label	node2
P10-P31-Q18610173-85ef4d24-0	P10	P31	Q18610173
P1000-P31-Q18608871-093affb5-0	P1000	P31	Q18608871
P1001-P31-Q15720608-deeedec9-0	P1001	P31	Q15720608
        1.07 real         0.86 user         0.18 sys


# Construct The Arnold Schwarzenegger Graph `Q2685`

Approach:
- Select a subgraph of full Wikidata that includes people (Q5), organizations (Q43229), geographic regions (Q82794), and awards (Q618779). This graph contains all edges that connect instances of the target classes listed above. Output the graph using a single relation we call `link`.
- Starting from Schwarzenegger Q2685, compute reachable nodes in the graph computed in the previous step. This step will produce the collection of nodes that will be part of the Schwarzenegger graph.
- Extract from Wikidata all the edges that connect nodes from the previous step.
- Extract from Wikidata the time, quantity, monolingual and string properties.
- Extract from Wikidata the qualifiers for the edges computed in the previous steps.
- Extract from Wikidata the labels, aliases and descriptions for the Schwarzenegger nodes.

## Extract a subset ofg Wikidata to use as the base for the Schewarzenegger graph

In [127]:
!$kypher -i p31 -i items -i p279star \
--match ' \
    p31: (n1)-[]->(n1_class), \
    items: (n1)-[]->(n2), \
    p31: (n2)-[]->(n2_class), \
    p279star: (n1_class)-[]->(n1_superclass), \
    p279star: (n2_class)-[]->(n2_superclass)' \
--where 'n1_superclass in ["Q11424", "Q5", "Q43229", "Q82794", "Q618779"] and n2_superclass in ["Q11424", "Q5", "Q43229", "Q82794", "Q618779"]' \
--return 'distinct n1 as node1, "link" as label, n2 as node2' \
-o "$TEMP"/item.per.org.cw.geo.award.link.tsv.gz 

[2021-09-23 20:06:18 query]: SQL Translation:
---------------------------------------------
  SELECT DISTINCT graph_1_c1."node1" "_aLias.node1", ? "_aLias.label", graph_8_c2."node2" "_aLias.node2"
     FROM graph_1 AS graph_1_c1
     INNER JOIN graph_1 AS graph_1_c3, graph_6 AS graph_6_c4, graph_6 AS graph_6_c5, graph_8 AS graph_8_c2
     ON graph_1_c1."node1" = graph_8_c2."node1"
        AND graph_1_c1."node2" = graph_6_c4."node1"
        AND graph_1_c3."node2" = graph_6_c5."node1"
        AND graph_8_c2."node2" = graph_1_c3."node1"
        AND ((graph_6_c4."node2" IN (?, ?, ?, ?, ?)) AND (graph_6_c5."node2" IN (?, ?, ?, ?, ?)))
  PARAS: ['link', 'Q11424', 'Q5', 'Q43229', 'Q82794', 'Q618779', 'Q11424', 'Q5', 'Q43229', 'Q82794', 'Q618779']
---------------------------------------------
     8593.94 real      2910.33 user       726.15 sys


Starting from `Q2685` traverse links forward in breadfirst mode up to a fixed number of levels to build the graph

In [7]:
!$kgtk reachable-nodes \
    --root Q2685 \
    --prop link \
    --label "reachable" \
    --selflink \
    --breadth-first --depth-limit 4 \
    -i "$TEMP"/item.per.org.cw.geo.award.link.tsv.gz  \
    -o "$TEMP"/Q2685.reachable.per.org.cw.geo.award.tsv.gz

      292.56 real       285.02 user         5.55 sys


In [8]:
!$kgtk head -i "$TEMP"/Q2685.reachable.per.org.cw.geo.award.tsv.gz

node1	label	node2
Q2685	reachable	Q2685
Q2685	reachable	Q29468
Q2685	reachable	Q24004771
Q2685	reachable	Q2526255
Q2685	reachable	Q1841
Q2685	reachable	Q4220901
Q2685	reachable	Q602299
Q2685	reachable	Q10855271
Q2685	reachable	Q1153891
Q2685	reachable	Q1426204
        1.25 real         0.81 user         0.18 sys


Index the resulting file in kypher

In [9]:
!$kypher -i $TEMP/Q2685.reachable.per.org.cw.geo.award.tsv.gz --as nodesQ2685 --limit 2

[2021-09-26 10:19:12 sqlstore]: DROP graph data table graph_35 from nodesQ2685
[2021-09-26 10:19:12 sqlstore]: IMPORT graph directly into table graph_35 from /Users/pedroszekely/Downloads/kypher/temp.kgtk/Q2685.reachable.per.org.cw.geo.award.tsv.gz ...
[2021-09-26 10:19:12 query]: SQL Translation:
---------------------------------------------
  SELECT *
     FROM graph_35 AS graph_35_c1
     LIMIT ?
  PARAS: [2]
---------------------------------------------
node1	label	node2
Q2685	reachable	Q2685
Q2685	reachable	Q29468
        1.48 real         1.22 user         0.31 sys


## Build initial graph containing the item edges

Figure out which properties are used so so that we can add them as node1s and get all the info about them.

In [10]:
!$kypher -i nodesQ2685 -i properties -i datatypes -i claims \
--match ' \
    nodesQ2685: ()-[]->(n1), \
    claims: (n1)-[l {label: property}]->(), \
    datatypes: (property)-[:datatype]->(datatype) \
    ' \
--where 'datatype in ["wikibase-item", "string", "quantity", "time", "monolingualtext"]' \
--return 'distinct "Q2685" as node1, "link" as label, property as node2' \
-o "$TEMP"/Q2685.nodes.property.tsv.gz

[2021-09-26 10:19:13 query]: SQL Translation:
---------------------------------------------
  SELECT DISTINCT ? "_aLias.node1", ? "_aLias.label", graph_19_c3."node1" "_aLias.node2"
     FROM graph_19 AS graph_19_c3
     INNER JOIN graph_35 AS graph_35_c1, graph_9 AS graph_9_c2
     ON graph_19_c3."node1" = graph_9_c2."label"
        AND graph_35_c1."node2" = graph_9_c2."node1"
        AND graph_19_c3."label" = ?
        AND graph_9_c2."label" = graph_19_c3."node1"
        AND (graph_19_c3."node2" IN (?, ?, ?, ?, ?))
  PARAS: ['Q2685', 'link', 'datatype', 'wikibase-item', 'string', 'quantity', 'time', 'monolingualtext']
---------------------------------------------
[2021-09-26 10:19:13 sqlstore]: CREATE INDEX on table graph_35 column node2 ...
[2021-09-26 10:19:13 sqlstore]: ANALYZE INDEX on table graph_35 column node2 ...
       67.19 real         6.17 user         8.05 sys


Concatenate the new nodes with the ones we found via reachability

In [11]:
!kgtk cat -i "$TEMP"/Q2685.nodes.property.tsv.gz -i "$TEMP"/Q2685.reachable.per.org.cw.geo.award.tsv.gz \
-o "$TEMP"/Q2685.nodes.all.tsv.gz

Print number of nodes that we have so far for the Schw graph

In [14]:
!zcat < "$TEMP"/Q2685.nodes.all.tsv.gz | wc -l

  103799


Update the Kypher database

In [12]:
!$kypher -i "$TEMP"/Q2685.nodes.all.tsv.gz --as nodesQ2685 --limit 2

[2021-09-26 10:20:22 sqlstore]: DROP graph data table graph_35 from nodesQ2685
[2021-09-26 10:20:22 sqlstore]: IMPORT graph directly into table graph_35 from /Users/pedroszekely/Downloads/kypher/temp.kgtk/Q2685.nodes.all.tsv.gz ...
[2021-09-26 10:20:22 query]: SQL Translation:
---------------------------------------------
  SELECT *
     FROM graph_35 AS graph_35_c1
     LIMIT ?
  PARAS: [2]
---------------------------------------------
node1	label	node2
Q2685	link	P1082
Q2685	link	P112
        1.17 real         1.11 user         0.20 sys


Extract the item to item edges connecting the nodes in the Schwarzenegger graph

In [13]:
!$kypher -i nodesQ2685 -i items \
--match ' \
    nodesQ2685: ()-[]->(n1), \
    nodesQ2685: ()-[]->(n2), \
    items: (n1)-[l]->(n2) \
    ' \
--return 'distinct n1 as node1, l.label as label, n2 as node2, l as id' \
/ sort \
-o $OUT/Q2685.graph.item.tsv.gz

[2021-09-26 10:20:24 query]: SQL Translation:
---------------------------------------------
  SELECT DISTINCT graph_35_c1."node2" "_aLias.node1", graph_8_c3."label" "_aLias.label", graph_35_c2."node2" "_aLias.node2", graph_8_c3."id" "_aLias.id"
     FROM graph_35 AS graph_35_c1
     INNER JOIN graph_35 AS graph_35_c2, graph_8 AS graph_8_c3
     ON graph_35_c1."node2" = graph_8_c3."node1"
        AND graph_35_c2."node2" = graph_8_c3."node2"
  PARAS: []
---------------------------------------------
[2021-09-26 10:20:24 sqlstore]: CREATE INDEX on table graph_35 column node2 ...
[2021-09-26 10:20:24 sqlstore]: ANALYZE INDEX on table graph_35 column node2 ...
       57.68 real         9.87 user         7.10 sys


Add to the kypher database

In [15]:
!$kypher -i $OUT/Q2685.graph.item.tsv.gz --as Q2685items --limit 2

[2021-09-26 10:21:26 sqlstore]: DROP graph data table graph_36 from Q2685items
[2021-09-26 10:21:28 sqlstore]: IMPORT graph directly into table graph_36 from /Users/pedroszekely/Downloads/kypher/kgtk/Q2685.graph.item.tsv.gz ...
[2021-09-26 10:21:31 query]: SQL Translation:
---------------------------------------------
  SELECT *
     FROM graph_36 AS graph_36_c1
     LIMIT ?
  PARAS: [2]
---------------------------------------------
node1	label	node2	id
P1001	P1855	Q181574	P1001-P1855-Q181574-7f428c9b-0
P1001	P1855	Q8901	P1001-P1855-Q8901-15be5b36-0
        5.01 real         5.24 user         1.09 sys


## Extract the other types of edges

Extract the quantities

In [16]:
!$kypher -i quantities -i nodesQ2685 \
--match ' \
    nodesQ2685: ()-[]->(n1), \
    quantities: (n1)-[l]->(n2) \
    ' \
--return 'distinct n1 as node1, l.label as label, n2 as node2, l as id' \
/ sort \
-o $OUT/Q2685.graph.quantity.tsv.gz

[2021-09-26 10:22:24 query]: SQL Translation:
---------------------------------------------
  SELECT DISTINCT graph_35_c1."node2" "_aLias.node1", graph_11_c2."label" "_aLias.label", graph_11_c2."node2" "_aLias.node2", graph_11_c2."id" "_aLias.id"
     FROM graph_11 AS graph_11_c2
     INNER JOIN graph_35 AS graph_35_c1
     ON graph_35_c1."node2" = graph_11_c2."node1"
  PARAS: []
---------------------------------------------
       13.86 real         4.98 user         2.49 sys


Extract the time edges

In [17]:
!$kypher -i times -i nodesQ2685 \
--match ' \
    nodesQ2685: ()-[]->(n1), \
    times: (n1)-[l]->(n2) \
    ' \
--return 'distinct n1 as node1, l.label as label, n2 as node2, l as id' \
/ sort \
-o $OUT/Q2685.graph.time.tsv.gz

[2021-09-26 10:22:38 query]: SQL Translation:
---------------------------------------------
  SELECT DISTINCT graph_35_c1."node2" "_aLias.node1", graph_31_c2."label" "_aLias.label", graph_31_c2."node2" "_aLias.node2", graph_31_c2."id" "_aLias.id"
     FROM graph_31 AS graph_31_c2
     INNER JOIN graph_35 AS graph_35_c1
     ON graph_35_c1."node2" = graph_31_c2."node1"
  PARAS: []
---------------------------------------------
       15.28 real         3.44 user         2.73 sys


Extract the monolingual text edges

In [18]:
!$kypher -i monolingual -i nodesQ2685 \
--match ' \
    nodesQ2685: ()-[]->(n1), \
    monolingual: (n1)-[l]->(n2) \
    ' \
--return 'distinct n1 as node1, l.label as label, n2 as node2, l as id' \
/ sort \
-o $OUT/Q2685.graph.monolingual.tsv.gz

[2021-09-26 10:22:54 query]: SQL Translation:
---------------------------------------------
  SELECT DISTINCT graph_35_c1."node2" "_aLias.node1", graph_14_c2."label" "_aLias.label", graph_14_c2."node2" "_aLias.node2", graph_14_c2."id" "_aLias.id"
     FROM graph_14 AS graph_14_c2
     INNER JOIN graph_35 AS graph_35_c1
     ON graph_35_c1."node2" = graph_14_c2."node1"
  PARAS: []
---------------------------------------------
       12.76 real         3.44 user         2.27 sys


Extract the string edges

In [19]:
!$kypher -i string -i nodesQ2685 \
--match ' \
    nodesQ2685: ()-[]->(n1), \
    string: (n1)-[l]->(n2) \
    ' \
--return 'distinct n1 as node1, l.label as label, n2 as node2, l as id' \
/ sort \
-o $OUT/Q2685.graph.string.tsv.gz

[2021-09-26 10:23:07 query]: SQL Translation:
---------------------------------------------
  SELECT DISTINCT graph_35_c1."node2" "_aLias.node1", graph_33_c2."label" "_aLias.label", graph_33_c2."node2" "_aLias.node2", graph_33_c2."id" "_aLias.id"
     FROM graph_33 AS graph_33_c2
     INNER JOIN graph_35 AS graph_35_c1
     ON graph_35_c1."node2" = graph_33_c2."node1"
  PARAS: []
---------------------------------------------
       26.46 real         4.10 user         3.66 sys


## Complete the graph

In [97]:
!kgtk cat \
-i $OUT/Q2685.graph.item.tsv.gz \
-i $OUT/Q2685.graph.quantity.tsv.gz \
-i $OUT/Q2685.graph.time.tsv.gz \
-i $OUT/Q2685.graph.monolingual.tsv.gz \
-i $OUT/Q2685.graph.string.tsv.gz \
-o $OUT/Q2685.graph.item.quantity.time.monolingual.string.tsv.gz 

!$kypher -i $OUT/Q2685.graph.item.quantity.time.monolingual.string.tsv.gz --as Q2685base --limit 2

[2021-09-26 18:46:07 sqlstore]: DROP graph data table graph_34 from Q2685base
[2021-09-26 18:46:10 sqlstore]: IMPORT graph directly into table graph_44 from /Users/pedroszekely/Downloads/kypher/kgtk/Q2685.graph.item.quantity.time.monolingual.string.tsv.gz ...
[2021-09-26 18:46:16 query]: SQL Translation:
---------------------------------------------
  SELECT *
     FROM graph_44 AS graph_44_c1
     LIMIT ?
  PARAS: [2]
---------------------------------------------
node1	label	node2	id
P1001	P1855	Q181574	P1001-P1855-Q181574-7f428c9b-0
P1001	P1855	Q8901	P1001-P1855-Q8901-15be5b36-0
        9.99 real        11.36 user         1.78 sys


### Collect all the properties

Get edges for the properties

In [98]:
!$kypher -i Q2685base -i properties \
--match ' \
    Q2685base: ()-[l {label: property}]->(), \
    properties: (property)-[lp]->(n) \
    ' \
--return 'distinct property as node1, lp.label as label, n as node2, lp as id' \
/ sort \
-o $OUT/Q2685.graph.property.tsv.gz

[2021-09-26 18:46:19 query]: SQL Translation:
---------------------------------------------
  SELECT DISTINCT graph_10_c2."node1" "_aLias.node1", graph_10_c2."label" "_aLias.label", graph_10_c2."node2" "_aLias.node2", graph_10_c2."id" "_aLias.id"
     FROM graph_10 AS graph_10_c2
     INNER JOIN graph_44 AS graph_44_c1
     ON graph_10_c2."node1" = graph_44_c1."label"
        AND graph_44_c1."label" = graph_10_c2."node1"
  PARAS: []
---------------------------------------------
[2021-09-26 18:46:19 sqlstore]: CREATE INDEX on table graph_44 column label ...
[2021-09-26 18:46:19 sqlstore]: ANALYZE INDEX on table graph_44 column label ...
        7.31 real         7.54 user         0.72 sys


Update the base

In [99]:
!kgtk cat \
-i $OUT/Q2685.graph.item.quantity.time.monolingual.string.tsv.gz \
-i $OUT/Q2685.graph.property.tsv.gz \
/ compact \
-o $OUT/Q2685.graph.item.quantity.time.monolingual.string.property.tsv.gz 

!$kypher -i $OUT/Q2685.graph.item.quantity.time.monolingual.string.property.tsv.gz --as Q2685base --limit 2

[2021-09-26 18:46:58 sqlstore]: DROP graph data table graph_44 from Q2685base
[2021-09-26 18:46:59 sqlstore]: IMPORT graph directly into table graph_44 from /Users/pedroszekely/Downloads/kypher/kgtk/Q2685.graph.item.quantity.time.monolingual.string.property.tsv.gz ...
[2021-09-26 18:47:05 query]: SQL Translation:
---------------------------------------------
  SELECT *
     FROM graph_44 AS graph_44_c1
     LIMIT ?
  PARAS: [2]
---------------------------------------------
node1	label	node2	id
P1001	P1647	P276	P1001-P1647-P276-e4e44f83-0
P1001	P1659	P1269	P1001-P1659-P1269-785921cd-0
        7.54 real        10.30 user         1.14 sys


### Compute qualifiers

In [100]:
!$kypher -i qualifiers -i Q2685base \
--match ' \
    Q2685base: ()-[l]->(), \
    qualifiers: (l)-[lq {label: property}]->(n) \
    ' \
--return 'distinct l as node1, property as label, n as node2, lq as id' \
/ sort \
-o $OUT/Q2685.graph.qualifiers.tsv.gz

[2021-09-26 18:47:07 query]: SQL Translation:
---------------------------------------------
  SELECT DISTINCT graph_44_c1."id" "_aLias.node1", graph_7_c2."label" "_aLias.label", graph_7_c2."node2" "_aLias.node2", graph_7_c2."id" "_aLias.id"
     FROM graph_44 AS graph_44_c1
     INNER JOIN graph_7 AS graph_7_c2
     ON graph_44_c1."id" = graph_7_c2."node1"
        AND graph_7_c2."label" = graph_7_c2."label"
  PARAS: []
---------------------------------------------
[2021-09-26 18:47:07 sqlstore]: CREATE INDEX on table graph_44 column id ...
[2021-09-26 18:47:07 sqlstore]: ANALYZE INDEX on table graph_44 column id ...
       43.89 real        10.37 user         5.98 sys


Update the base again

In [101]:
!kgtk cat \
-i $OUT/Q2685.graph.item.quantity.time.monolingual.string.property.tsv.gz \
-i $OUT/Q2685.graph.qualifiers.tsv.gz \
/ compact \
-o $OUT/Q2685.graph.item.quantity.time.monolingual.string.property.qualifiers.tsv.gz 

!$kypher -i $OUT/Q2685.graph.item.quantity.time.monolingual.string.property.qualifiers.tsv.gz --as Q2685base --limit 2

[2021-09-26 18:48:32 sqlstore]: DROP graph data table graph_44 from Q2685base
[2021-09-26 18:48:33 sqlstore]: IMPORT graph directly into table graph_44 from /Users/pedroszekely/Downloads/kypher/kgtk/Q2685.graph.item.quantity.time.monolingual.string.property.qualifiers.tsv.gz ...
[2021-09-26 18:48:44 query]: SQL Translation:
---------------------------------------------
  SELECT *
     FROM graph_44 AS graph_44_c1
     LIMIT ?
  PARAS: [2]
---------------------------------------------
node1	label	node2	id
P1001	P1647	P276	P1001-P1647-P276-e4e44f83-0
P1001	P1659	P1269	P1001-P1659-P1269-785921cd-0
       12.61 real        17.94 user         1.54 sys


### Make sure that every q-node has at least P31 and P279
need to do it twice, once for node1 and once for node2

In [28]:
!$kypher -i Q2685base -i claims \
--match 'Q2685base: (n)-[]->(), claims: (n)-[l {label: property}]->(n2)' \
--where 'property in ["P31", "P279"]' \
--return 'distinct n as node1, property as label, n2 as node2, l as id' \
-o "$TEMP"/Q2685.node1.P31.P279.tsv.gz

[2021-09-26 10:25:33 query]: SQL Translation:
---------------------------------------------
  SELECT DISTINCT graph_34_c1."node1" "_aLias.node1", graph_9_c2."label" "_aLias.label", graph_9_c2."node2" "_aLias.node2", graph_9_c2."id" "_aLias.id"
     FROM graph_34 AS graph_34_c1
     INNER JOIN graph_9 AS graph_9_c2
     ON graph_34_c1."node1" = graph_9_c2."node1"
        AND graph_9_c2."label" = graph_9_c2."label"
        AND (graph_9_c2."label" IN (?, ?))
  PARAS: ['P31', 'P279']
---------------------------------------------
[2021-09-26 10:25:33 sqlstore]: CREATE INDEX on table graph_34 column node1 ...
[2021-09-26 10:25:33 sqlstore]: ANALYZE INDEX on table graph_34 column node1 ...
      865.92 real        96.54 user       131.06 sys


In [29]:
!$kypher -i Q2685base -i claims \
--match 'Q2685base: ()-[]->(n), claims: (n)-[l {label: property}]->(n2)' \
--where 'property in ["P31", "P279"]' \
--return 'distinct n as node1, property as label, n2 as node2, l as id' \
-o "$TEMP"/Q2685.node2.P31.P279.tsv.gz

[2021-09-26 10:39:59 query]: SQL Translation:
---------------------------------------------
  SELECT DISTINCT graph_34_c1."node2" "_aLias.node1", graph_9_c2."label" "_aLias.label", graph_9_c2."node2" "_aLias.node2", graph_9_c2."id" "_aLias.id"
     FROM graph_34 AS graph_34_c1
     INNER JOIN graph_9 AS graph_9_c2
     ON graph_34_c1."node2" = graph_9_c2."node1"
        AND graph_9_c2."label" = graph_9_c2."label"
        AND (graph_9_c2."label" IN (?, ?))
  PARAS: ['P31', 'P279']
---------------------------------------------
[2021-09-26 10:39:59 sqlstore]: CREATE INDEX on table graph_34 column node2 ...
[2021-09-26 10:40:02 sqlstore]: ANALYZE INDEX on table graph_34 column node2 ...
      911.56 real       100.93 user       135.47 sys


Recreate the base file

In [102]:
!kgtk cat \
-i $OUT/Q2685.graph.item.quantity.time.monolingual.string.property.qualifiers.tsv.gz \
-i "$TEMP"/Q2685.node2.P31.P279.tsv.gz \
/ compact \
-o $OUT/Q2685.graph.item.quantity.time.monolingual.string.property.qualifiers.P31.P279.tsv.gz 

!$kypher -i $OUT/Q2685.graph.item.quantity.time.monolingual.string.property.qualifiers.P31.P279.tsv.gz --as Q2685base --limit 2

[2021-09-26 18:49:30 sqlstore]: DROP graph data table graph_44 from Q2685base
[2021-09-26 18:49:32 sqlstore]: IMPORT graph directly into table graph_44 from /Users/pedroszekely/Downloads/kypher/kgtk/Q2685.graph.item.quantity.time.monolingual.string.property.qualifiers.P31.P279.tsv.gz ...
[2021-09-26 18:49:42 query]: SQL Translation:
---------------------------------------------
  SELECT *
     FROM graph_44 AS graph_44_c1
     LIMIT ?
  PARAS: [2]
---------------------------------------------
node1	label	node2	id
P1000	P31	Q18608871	P1000-P31-Q18608871-093affb5-0
P1001	P1647	P276	P1001-P1647-P276-e4e44f83-0
       12.22 real        17.50 user         1.49 sys


### Incorporate all nodes up to the top of the class hierarchy
When we do a breath first traversal, we may not follow enough links on the P279 hierarchy to reach the top. We need to do a full traversal on the P279 hierarchy to incorporate all the relevant classes.

Approach:
- Create a graph including P31 and P279 to do the traversal
- Create a file of all the nodes in the Schwarzenneger file to use as roots

In [73]:
!$kypher -i claims \
--match '(n1)-[l {label:property}]->(n2)' \
--where 'property in ["P31", "P279"]' \
--return 'distinct n1 as node1, "link" as label, n2 as node2' \
-o "$TEMP"/P31.P279.subgraph.tsv.gz

[2021-09-26 17:25:27 query]: SQL Translation:
---------------------------------------------
  SELECT DISTINCT graph_9_c1."node1" "_aLias.node1", ? "_aLias.label", graph_9_c1."node2" "_aLias.node2"
     FROM graph_9 AS graph_9_c1
     WHERE graph_9_c1."label" = graph_9_c1."label"
        AND (graph_9_c1."label" IN (?, ?))
  PARAS: ['link', 'P31', 'P279']
---------------------------------------------
     1486.66 real       595.18 user       169.53 sys


#### Create the roots

Find roots in node1

> This step is including qualifier ids in node1, which makes reachable nodes have more roots than necessary. Would be nice to eliminate qualifiers here.

In [103]:
!$kypher -i Q2685base \
--match '(n)-[]->()' \
--return 'distinct n as node1' \
-o "$TEMP"/Q2685.node1.tsv.gz

[2021-09-26 18:51:37 query]: SQL Translation:
---------------------------------------------
  SELECT DISTINCT graph_44_c1."node1" "_aLias.node1"
     FROM graph_44 AS graph_44_c1
  PARAS: []
---------------------------------------------
        4.79 real         4.44 user         0.31 sys


Find roots in node2

In [104]:
!$kypher -i Q2685base -i datatypes \
--match ' \
    Q2685base: ()-[l {label: property}]->(n), \
    datatypes: (property)-[:datatype]->(datatype) \
    ' \
--where 'datatype in ["wikibase-item"]' \
--return 'distinct n as node1' \
-o "$TEMP"/Q2685.node2.tsv.gz

[2021-09-26 18:51:42 query]: SQL Translation:
---------------------------------------------
  SELECT DISTINCT graph_44_c1."node2" "_aLias.node1"
     FROM graph_19 AS graph_19_c2
     INNER JOIN graph_44 AS graph_44_c1
     ON graph_19_c2."node1" = graph_44_c1."label"
        AND graph_19_c2."label" = ?
        AND graph_44_c1."label" = graph_19_c2."node1"
        AND (graph_19_c2."node2" IN (?))
  PARAS: ['datatype', 'wikibase-item']
---------------------------------------------
[2021-09-26 18:51:42 sqlstore]: CREATE INDEX on table graph_44 column label ...
[2021-09-26 18:51:44 sqlstore]: ANALYZE INDEX on table graph_44 column label ...
        3.66 real         3.26 user         0.35 sys


Combine the two files to create all the roots

In [105]:
!$kgtk cat --mode=NONE -i "$TEMP"/Q2685.node1.tsv.gz -i "$TEMP"/Q2685.node2.tsv.gz \
/ compact --mode=NONE --columns node1 \
-o "$TEMP"/Q2685.nodes.tsv.gz

!$kypher -i "$TEMP"/Q2685.nodes.tsv.gz --as Q2685node1 --limit 2

        8.95 real         9.79 user         0.61 sys
[2021-09-26 18:51:58 sqlstore]: DROP graph data table graph_37 from Q2685node1
[2021-09-26 18:51:58 sqlstore]: IMPORT graph directly into table graph_45 from /Users/pedroszekely/Downloads/kypher/temp.kgtk/Q2685.nodes.tsv.gz ...
[2021-09-26 18:51:59 query]: SQL Translation:
---------------------------------------------
  SELECT *
     FROM graph_45 AS graph_45_c1
     LIMIT ?
  PARAS: [2]
---------------------------------------------
node1
P1000
P1001
        2.29 real         2.44 user         0.43 sys


Circumvent a problem in `reachable-nodes` where it does not accept a root file with column header `node1`

In [106]:
!$kgtk rename-columns -i "$TEMP"/Q2685.nodes.tsv.gz --output-columns id --mode=NONE \
/ compact -o "$TEMP"/Q2685.roots.tsv.gz

        8.86 real         9.64 user         0.61 sys


Do a depth-first traversal of the P31/P279 graph using as roots all items in the Schewarzenegger graph

In [107]:
!$kgtk reachable-nodes \
    --rootfile "$TEMP"/Q2685.roots.tsv.gz \
    --rootfilecolumn id \
    --prop link \
    --label "reachable" \
    --selflink \
    -i "$TEMP"/P31.P279.subgraph.tsv.gz \
    -o "$TEMP"/P31.P279.reachable.tsv.gz

     3768.66 real      2439.57 user      1324.00 sys


Deduplicate the reachable nodes file

In [33]:
!$kgtk remove-columns -i "$TEMP"/P31.P279.reachable.tsv.gz --columns node1 label \
/ rename-columns --mode=NONE --output-columns node1 \
/ compact --mode=NONE --columns node1 \
-o "$TEMP"/P31.P279.reachable.dedup.tsv.gz

      157.61 real       204.91 user         5.78 sys


Put all the reachable nodes in `Q2685node1`

In [34]:
!$kgtk cat --mode=NONE \
-i "$TEMP"/Q2685.nodes.tsv.gz \
-i "$TEMP"/P31.P279.reachable.dedup.tsv.gz \
/ compact --deduplicate --mode=NONE --columns node1 \
-o "$TEMP"/Q2685.nodes.ontology.tsv.gz

!$kypher -i "$TEMP"/Q2685.nodes.ontology.tsv.gz --as Q2685node1 --limit 2

        9.51 real        10.34 user         0.68 sys
[2021-09-28 09:09:21 sqlstore]: DROP graph data table graph_45 from Q2685node1
[2021-09-28 09:09:21 sqlstore]: IMPORT graph directly into table graph_45 from /Users/pedroszekely/Downloads/kypher/temp.kgtk/Q2685.nodes.ontology.tsv.gz ...
[2021-09-28 09:09:22 query]: SQL Translation:
---------------------------------------------
  SELECT *
     FROM graph_45 AS graph_45_c1
     LIMIT ?
  PARAS: [2]
---------------------------------------------
node1
P1000
P1001
        2.30 real         2.70 user         0.35 sys


Extract all P31/P279 edges from Wikidata for all the nodes in the Schwarzenegger graph and consolidate.

In [35]:
!$kypher -i claims -i Q2685node1 \
--match ' \
    Q2685node1: (n1), \
    claims: (n1)-[l {label:property}]->(n2) \
    ' \
--where 'property in ["P31", "P279"]' \
--return 'distinct n1 as node1, property as label, n2 as node2, l as id' \
/ cat -i - -i $OUT/Q2685.graph.item.quantity.time.monolingual.string.property.qualifiers.P31.P279.tsv.gz \
/ compact \
-o $OUT/Q2685.graph.item.quantity.time.monolingual.string.property.qualifiers.P31.P279.ontology.tsv.gz \

!$kypher -i $OUT/Q2685.graph.item.quantity.time.monolingual.string.property.qualifiers.P31.P279.ontology.tsv.gz --as Q2685base --limit 2

[2021-09-28 09:10:25 query]: SQL Translation:
---------------------------------------------
  SELECT DISTINCT graph_45_c1."node1" "_aLias.node1", graph_9_c2."label" "_aLias.label", graph_9_c2."node2" "_aLias.node2", graph_9_c2."id" "_aLias.id"
     FROM graph_45 AS graph_45_c1
     INNER JOIN graph_9 AS graph_9_c2
     ON graph_45_c1."node1" = graph_9_c2."node1"
        AND graph_9_c2."label" = graph_9_c2."label"
        AND (graph_9_c2."label" IN (?, ?))
  PARAS: ['P31', 'P279']
---------------------------------------------
[2021-09-28 09:10:25 sqlstore]: CREATE INDEX on table graph_45 column node1 ...
[2021-09-28 09:10:26 sqlstore]: ANALYZE INDEX on table graph_45 column node1 ...
      968.70 real       148.87 user       147.10 sys
[2021-09-28 09:26:33 sqlstore]: DROP graph data table graph_37 from Q2685base
[2021-09-28 09:26:36 sqlstore]: IMPORT graph directly into table graph_37 from /Users/pedroszekely/Downloads/kypher/kgtk/Q2685.graph.item.quantity.time.monolingual.string.proper

I am not certain about the need for this cell, whether new nodes can appear after adding P31 and P279.

In [48]:
!$kypher -i Q2685base -i datatypes \
--match ' \
    Q2685base: ()-[l {label: property}]->(n), \
    datatypes: (property)-[:datatype]->(datatype) \
    ' \
--where 'datatype in ["wikibase-item"]' \
--return 'distinct n as node1' \
/ cat -i - -i "$TEMP"/Q2685.nodes.ontology.tsv.gz --mode=NONE \
/ compact --mode=NONE --columns node1 \
-o "$TEMP"/Q2685.nodes.ontology.star.tsv.gz \

!$kypher -i "$TEMP"/Q2685.nodes.ontology.star.tsv.gz --as Q2685node1 --limit 2

[2021-09-28 09:36:31 query]: SQL Translation:
---------------------------------------------
  SELECT DISTINCT graph_37_c1."node2" "_aLias.node1"
     FROM graph_19 AS graph_19_c2
     INNER JOIN graph_37 AS graph_37_c1
     ON graph_19_c2."node1" = graph_37_c1."label"
        AND graph_19_c2."label" = ?
        AND graph_37_c1."label" = graph_19_c2."node1"
        AND (graph_19_c2."node2" IN (?))
  PARAS: ['datatype', 'wikibase-item']
---------------------------------------------
       10.83 real        12.65 user         1.01 sys
[2021-09-28 09:36:41 sqlstore]: DROP graph data table graph_45 from Q2685node1
[2021-09-28 09:36:41 sqlstore]: IMPORT graph directly into table graph_45 from /Users/pedroszekely/Downloads/kypher/temp.kgtk/Q2685.nodes.ontology.star.tsv.gz ...
[2021-09-28 09:36:42 query]: SQL Translation:
---------------------------------------------
  SELECT *
     FROM graph_45 AS graph_45_c1
     LIMIT ?
  PARAS: [2]
---------------------------------------------
node1
P1000

## Add property datatypes

In [49]:
!$kypher -i datatypes -i Q2685base \
--match ' \
    Q2685base: ()-[r {label: property}]->(), \
    datatypes: (property)-[l:datatype]->(datatype) \
    ' \
--return 'distinct property as node1, l.label as label, datatype as node2, l as id' \
/ cat -i - -i $OUT/Q2685.graph.item.quantity.time.monolingual.string.property.qualifiers.P31.P279.ontology.tsv.gz \
/ compact \
-o $OUT/Q2685.graph.item.quantity.time.monolingual.string.property.qualifiers.P31.P279.ontology.datatype.tsv.gz \

!$kypher -i Q2685.graph.item.quantity.time.monolingual.string.property.qualifiers.P31.P279.ontology.datatype.tsv.gz --as Q2685base --limit 2

[2021-09-28 09:36:44 query]: SQL Translation:
---------------------------------------------
  SELECT DISTINCT graph_19_c2."node1" "_aLias.node1", graph_19_c2."label" "_aLias.label", graph_19_c2."node2" "_aLias.node2", graph_19_c2."id" "_aLias.id"
     FROM graph_19 AS graph_19_c2
     INNER JOIN graph_37 AS graph_37_c1
     ON graph_19_c2."node1" = graph_37_c1."label"
        AND graph_19_c2."label" = ?
        AND graph_37_c1."label" = graph_19_c2."node1"
  PARAS: ['datatype']
---------------------------------------------
       47.05 real        49.81 user         2.02 sys
[2021-09-28 09:37:30 query]: SQL Translation:
---------------------------------------------
  SELECT *
     FROM graph_37 AS graph_37_c1
     LIMIT ?
  PARAS: [2]
---------------------------------------------
node1	label	node2	id
P1000	P31	Q18608871	P1000-P31-Q18608871-093affb5-0
P1001	P1647	P276	P1001-P1647-P276-e4e44f83-0
        1.01 real         0.81 user         0.15 sys


## Build labels, aliases and descriptions

Extract the label edges

In [50]:
!$kypher -i labels -i Q2685node1 \
--match ' \
    Q2685node1: (n1)-[]->(), \
    labels: (n1)-[l]->(n2) \
    ' \
--return 'distinct n1 as node1, l.label as label, n2 as node2, l as id' \
/ sort \
-o $OUT/Q2685.graph.label.tsv.gz

[2021-09-28 09:37:33 query]: SQL Translation:
---------------------------------------------
  SELECT DISTINCT graph_45_c1."node1" "_aLias.node1", graph_4_c2."label" "_aLias.label", graph_4_c2."node2" "_aLias.node2", graph_4_c2."id" "_aLias.id"
     FROM graph_4 AS graph_4_c2
     INNER JOIN graph_45 AS graph_45_c1
     ON graph_45_c1."node1" = graph_4_c2."node1"
  PARAS: []
---------------------------------------------
[2021-09-28 09:37:33 sqlstore]: CREATE INDEX on table graph_45 column node1 ...
[2021-09-28 09:37:33 sqlstore]: ANALYZE INDEX on table graph_45 column node1 ...
       32.35 real         5.24 user         4.98 sys


Extract the alias edges

In [51]:
!$kypher -i aliases -i Q2685node1 \
--match ' \
    Q2685node1: (n1)-[]->(), \
    aliases: (n1)-[l]->(n2) \
    ' \
--return 'distinct n1 as node1, l.label as label, n2 as node2, l as id' \
/ sort \
-o $OUT/Q2685.graph.alias.tsv.gz

[2021-09-28 09:38:05 query]: SQL Translation:
---------------------------------------------
  SELECT DISTINCT graph_45_c1."node1" "_aLias.node1", graph_5_c2."label" "_aLias.label", graph_5_c2."node2" "_aLias.node2", graph_5_c2."id" "_aLias.id"
     FROM graph_45 AS graph_45_c1
     INNER JOIN graph_5 AS graph_5_c2
     ON graph_45_c1."node1" = graph_5_c2."node1"
  PARAS: []
---------------------------------------------
        7.92 real         3.98 user         1.70 sys


Extract the description edges

In [52]:
!$kypher -i descriptions -i Q2685node1 \
--match ' \
    Q2685node1: (n1)-[]->(), \
    descriptions: (n1)-[l]->(n2) \
    ' \
--return 'distinct n1 as node1, l.label as label, n2 as node2, l as id' \
/ sort \
-o $OUT/Q2685.graph.description.tsv.gz

[2021-09-28 09:38:13 query]: SQL Translation:
---------------------------------------------
  SELECT DISTINCT graph_45_c1."node1" "_aLias.node1", graph_32_c2."label" "_aLias.label", graph_32_c2."node2" "_aLias.node2", graph_32_c2."id" "_aLias.id"
     FROM graph_32 AS graph_32_c2
     INNER JOIN graph_45 AS graph_45_c1
     ON graph_45_c1."node1" = graph_32_c2."node1"
  PARAS: []
---------------------------------------------
       36.54 real         5.32 user         5.49 sys


## Compute useful derived files

### Inverses of `P279`

> To do: need to define t`P279_` property, it's datatype, label, etc.

In [53]:
!$kypher -i Q2685base \
--match '(n1)-[:P279]->(class)' \
--return 'distinct class as node1, "P279_" as label, n1 as node2' \
/ add-id --id-style wikidata \
/ sort \
-o "$OUT"/Q2685.derived.P279inv.tsv.gz

[2021-09-28 09:38:50 query]: SQL Translation:
---------------------------------------------
  SELECT DISTINCT graph_37_c1."node2" "_aLias.node1", ? "_aLias.label", graph_37_c1."node1" "_aLias.node2"
     FROM graph_37 AS graph_37_c1
     WHERE graph_37_c1."label" = ?
  PARAS: ['P279_', 'P279']
---------------------------------------------
        2.86 real         4.12 user         0.90 sys


## Final files
- base, includes all edges except labeles, aliases and descriptions
- labels
- aliases
- descriptions

In [54]:
!$kgtk cat \
-i "$OUT"/Q2685.graph.item.quantity.time.monolingual.string.property.qualifiers.P31.P279.ontology.datatype.tsv.gz \
-i "$OUT"/Q2685.graph.alias.tsv.gz \
-i "$OUT"/Q2685.graph.label.tsv.gz \
-i "$OUT"/Q2685.graph.description.tsv.gz \
-o "$OUT"/Q2685.graph.all.tsv.gz

       18.98 real        18.61 user         0.27 sys


In [55]:
!$kypher \
-i "$OUT"/Q2685.graph.alias.tsv.gz --as Q2685aliases \
-i "$OUT"/Q2685.graph.label.tsv.gz --as Q2685labels \
-i "$OUT"/Q2685.graph.description.tsv.gz --as Q2685descriptions \
--limit 2

[2021-09-28 09:39:11 sqlstore]: DROP graph data table graph_40 from Q2685aliases
[2021-09-28 09:39:11 sqlstore]: IMPORT graph directly into table graph_40 from /Users/pedroszekely/Downloads/kypher/kgtk/Q2685.graph.alias.tsv.gz ...
[2021-09-28 09:39:12 sqlstore]: DROP graph data table graph_41 from Q2685labels
[2021-09-28 09:39:12 sqlstore]: IMPORT graph directly into table graph_41 from /Users/pedroszekely/Downloads/kypher/kgtk/Q2685.graph.label.tsv.gz ...
[2021-09-28 09:39:12 sqlstore]: DROP graph data table graph_42 from Q2685descriptions
[2021-09-28 09:39:12 sqlstore]: IMPORT graph directly into table graph_42 from /Users/pedroszekely/Downloads/kypher/kgtk/Q2685.graph.description.tsv.gz ...
[2021-09-28 09:39:13 query]: SQL Translation:
---------------------------------------------
  SELECT *
     FROM graph_40 AS graph_40_c1
     LIMIT ?
  PARAS: [2]
---------------------------------------------
node1	label	node2	id
P1001	alias	'belongs to jurisdiction'@en	P1001-alias-en-0dd7ce-0
P1

In [56]:
!$kypher -i Q2685base --match '(n)-[]->()' --return 'count(distinct n)'

[2021-09-28 09:39:14 query]: SQL Translation:
---------------------------------------------
  SELECT count(DISTINCT graph_37_c1."node1")
     FROM graph_37 AS graph_37_c1
  PARAS: []
---------------------------------------------
count(DISTINCT graph_37_c1."node1")
512883
        2.56 real         2.12 user         0.39 sys


In [57]:
!$kypher -i Q2685labels --match '(n)-[]->()' --return 'count(distinct n)'

[2021-09-28 09:39:17 query]: SQL Translation:
---------------------------------------------
  SELECT count(DISTINCT graph_41_c1."node1")
     FROM graph_41 AS graph_41_c1
  PARAS: []
---------------------------------------------
count(DISTINCT graph_41_c1."node1")
126830
        1.19 real         0.98 user         0.18 sys
