In [1]:
!pip install shexer

Collecting shexer
  Downloading shexer-2.5.10-py3-none-any.whl.metadata (29 kB)
Collecting Flask-Cors (from shexer)
  Downloading flask_cors-5.0.1-py3-none-any.whl.metadata (961 bytes)
Collecting rdflib (from shexer)
  Downloading rdflib-7.1.4-py3-none-any.whl.metadata (11 kB)
Collecting SPARQLWrapper (from shexer)
  Downloading SPARQLWrapper-2.0.0-py3-none-any.whl.metadata (2.0 kB)
Collecting wlighter (from shexer)
  Downloading wlighter-1.0.1.tar.gz (5.1 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting plantuml (from shexer)
  Downloading plantuml-0.3.0-py3-none-any.whl.metadata (2.5 kB)
Collecting python-xz (from shexer)
  Downloading python_xz-0.5.0-py3-none-any.whl.metadata (8.5 kB)
Downloading shexer-2.5.10-py3-none-any.whl (175 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m175.3/175.3 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading flask_cors-5.0.1-py3-none-any.whl (11 kB)
Downloading plantuml-0.3.0-py3-none-any.whl (5.8 kB)

In [3]:
from shexer.shaper import Shaper
from shexer.consts import TURTLE
from shexer.model.federated_source import FederatedSource

_S = 0
_O = 2

namespaces_dict = {
   "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
   "http://example.org/": "ex",
   "http://weso.es/shapes/": "shape",
   "http://www.w3.org/2001/XMLSchema#": "xsd",
   "http://www.w3.org/2002/07/owl#": "owl",
   "https://biodatafuse.org/example/" : "graph",
   "http://purl.obolibrary.org/obo/": "obo",
}
toy_data_example = """
@prefix ex: <http://example.org/> .
@prefix schema: <http://schema.org/> .
@prefix wd: <http://www.wikidata.org/entity/> .
@prefix dbr: <http://dbpedia.org/resource/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .

ex:Tokyo a ex:City ;
         rdfs:label "Tokyo" ;
	     schema:sameAs wd:Q1490 ;
		 ex:country ex:Japan .

dbr:Tokyo schema:sameAs ex:Tokyo .

ex:Oviedo a ex:City ;
          rdfs:label "Oviedo" ;
	      schema:sameAs wd:Q14317 ;
          ex:country ex:Spain .

dbr:Oviedo schema:sameAs ex:Oviedo .

ex:Japan a ex:Country ;
         rdfs:label "Japan";
         schema:sameAs wd:Q17 .

dbr:Japan schema:sameAs ex:Japan .

ex:Spain a ex:Country ;
         rdfs:label "Spain";
         schema:sameAs wd:Q29 .

dbr:Spain schema:sameAs ex:Spain .
"""


def wikidata(origin_position_in_triple=_S, link_in_federated_source=False):
    # A Federated shape is representad by one of these objects: FederatedSource
    # They can be found at shexer.model.federated_source.FederatedSource
    #
    # Parameters:
    #
    # endpoint_url: SPARQL endpoint to query the federated source
    #
    # property_link: property used to express synonymity between two URIS
    #
    # link_in_federated_source: bool. It indicates if the triple to express
    #     synonymity is expected to be found in the SPARQL endpoint of the
    #     federated source (True) or in sheXer's original input. In this case,
    #     the data of the input contains triples such as (A schema:sameAs B), so
    #     link_in_federated_source should be False. If those triples were
    #     expected to be found at Wikidata's endpoint, then it should be True.
    #
    # alias: a "label" to use for referring this source. This alias will be
    #     used in comments and to generate URIs for the federated shapes
    #     extracted.
    #
    # origin_position_in_triple: it is an integer representing a position in
    #     a triple. 0 --> subject. 1 --> predicate. 2 --> object. Valid values
    #     for this parameter are only 0 (subject) and 2 (object). This number
    #     indicates the position expected for the node that we are trying to
    #     locate synonyms for in the synonimity triple. In this case, the input
    #     content contains triples such as (ex:Tokyo schema:sameAs wd:wd:Q1490).
    #     The node that we want to locate synonyms for, ex:Tokyo, is the subject
    #     and so are all nodes connecting with Wikidata items. Then, the param
    #     origin_position_in_triple should be set to 0 (subject).
    #     In other methdod to create a FederatedSource representing DBpedia,
    #     the triples are like (dbr:Tokyo schema:sameAs ex:Tokyo). So, in that
    #     case, origin_position_in_triple should be set to 2 (object).


    return FederatedSource(endpoint_url="https://query.wikidata.org/sparql",
                           property_link="http://schema.org/sameAs",
                           link_in_federated_source=link_in_federated_source,
                           alias="Wikidata",
                           origin_position_in_triple=origin_position_in_triple)

def dbpedia(origin_position_in_triple=_O, link_in_federated_source=False):
    return FederatedSource(endpoint_url="https://dbpedia.org/sparql",
                           property_link="http://schema.org/sameAs",
                           link_in_federated_source=link_in_federated_source,
                           alias="DBpedia",
                           origin_position_in_triple=origin_position_in_triple)

def run_single_direct(raw_graph):
    shaper = Shaper(
       all_classes_mode=True,
       raw_graph=raw_graph,
       input_format=TURTLE,
       namespaces_dict=namespaces_dict,
       disable_or_statements=False,
       disable_comments=False,
       federated_sources=[wikidata()]  # federated_sources is the parameter that allows for extracted federated shapes
    )


    graph_shex = shaper.shex_graph(string_output=True)
    print(graph_shex)

def run_single_inverse(raw_graph):
    shaper = Shaper(
       all_classes_mode=True,
       raw_graph=raw_graph,
       input_format=TURTLE,
       namespaces_dict=namespaces_dict,
       disable_or_statements=False,
       disable_comments=False,
       wikidata_annotation=True,
       federated_sources=[dbpedia()]
    )

    graph_shex = shaper.shex_graph(string_output=True)
    print(graph_shex)

def run_double_fed(raw_graph):
    shaper = Shaper(
       all_classes_mode=True,
       raw_graph=raw_graph,
       input_format=TURTLE,
       namespaces_dict=namespaces_dict,
       disable_or_statements=False,
       disable_comments=False,
       wikidata_annotation=True,
       federated_sources=[wikidata(), dbpedia()]
    )


    graph_shex = shaper.shex_graph(string_output=True)
    print(graph_shex)

if __name__ == "__main__":
    run_single_direct(toy_data_example)
    print("\n\n\n\n----------------------\n\n\n\n")
    run_single_inverse(toy_data_example)
    print("\n\n\n\n----------------------\n\n\n\n")
    run_double_fed(toy_data_example)
    print("\n\n\n\n----------------------\n\n\n\n")
    print("Done!")

[1;30;43mSe han truncado las últimas 5000 líneas del flujo de salida.[0m
   <http://www.wikidata.org/prop/P37>  IRI  *;
            # 50.0 % obj: IRI. Cardinality: +
            # 25.0 % obj: IRI. Cardinality: {5}
            # 25.0 % obj: IRI. Cardinality: {1}
            # Constraint only observed in Wikidata
   <http://www.wikidata.org/prop/direct/P10326>  xsd:string  ?;
            # 50.0 % obj: xsd:string. Cardinality: {1}
            # Constraint only observed in Wikidata
   <http://www.wikidata.org/prop/P17>  IRI  ?;
            # 50.0 % obj: IRI. Cardinality: {1}
            # Constraint only observed in Wikidata
   <http://www.wikidata.org/prop/direct/P832>  IRI  *;
            # 50.0 % obj: IRI. Cardinality: +
            # 25.0 % obj: IRI. Cardinality: {9}
            # 25.0 % obj: IRI. Cardinality: {15}
            # Constraint only observed in Wikidata
   <http://www.wikidata.org/prop/P6344>  IRI  *;
            # 50.0 % obj: IRI. Cardinality: {4}
            # Constrain

JSONDecodeError: Expecting value: line 1 column 1 (char 0)