In [1]:
!pip install shexer

Collecting shexer
  Downloading shexer-2.6.1-py3-none-any.whl.metadata (30 kB)
Collecting Flask-Cors (from shexer)
  Downloading flask_cors-5.0.1-py3-none-any.whl.metadata (961 bytes)
Collecting rdflib (from shexer)
  Downloading rdflib-7.1.4-py3-none-any.whl.metadata (11 kB)
Collecting SPARQLWrapper (from shexer)
  Downloading SPARQLWrapper-2.0.0-py3-none-any.whl.metadata (2.0 kB)
Collecting wlighter (from shexer)
  Downloading wlighter-1.0.1.tar.gz (5.1 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting plantuml (from shexer)
  Downloading plantuml-0.3.0-py3-none-any.whl.metadata (2.5 kB)
Collecting python-xz (from shexer)
  Downloading python_xz-0.5.0-py3-none-any.whl.metadata (8.5 kB)
Downloading shexer-2.6.1-py3-none-any.whl (180 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m180.7/180.7 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading flask_cors-5.0.1-py3-none-any.whl (11 kB)
Downloading plantuml-0.3.0-py3-none-any.whl (5.8 kB)
D

# Defining target shapes/node

Three ways of defining target shapes:
* all classes --> each class with at least an instance will get a shape.
* target classes --> only classes in a white list will get a shape.
* shape maps. This is explained in a standalone notebook. Link to [that notebook](https://github.com/weso/shexer/blob/master/doc/shape_maps_to_define_target_nodes_and_shapes.ipynb).

Two ways of restringting nodes to extract target shapes:
* Through shape maps (see the mentioned notebook).
* Sampling.



#  Adding, filtering or tunning constraints

* Enabling inverse paths
* Setting an acceptance threshold
* Constraining possible cardinalities to *, +, ? and 1
* Enabling /disabling OR operator
* Accepting non-universally met constraints.


Exmaples of all those feautures (excepot for the shape map-related ones):

In [11]:
from shexer.shaper import Shaper
from shexer.consts import TURTLE_ITER, ALL_EXAMPLES, TURTLE
import requests

def remote_to_local(url, local_path):
  response = requests.get(url)
  if response.status_code == 200:
      with open(local_path, "w", encoding="utf-8") as out_stream:
          out_stream.write(response.text)

INPUT_GRPAH_PATH = "local_file.ttl"
def default_namespaces():
    return {"http://example.org/": "ex",
            "http://www.w3.org/XML/1998/namespace/": "xml",
            "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
            "http://www.w3.org/2000/01/rdf-schema#": "rdfs",
            "http://www.w3.org/2001/XMLSchema#": "xsd",
            "http://xmlns.com/foaf/0.1/": "foaf"
            }

remote_to_local("https://raw.githubusercontent.com/weso/shexer/refs/heads/master/test/t_files/t_graph_1.ttl",
                INPUT_GRPAH_PATH)

print("# We will work with this graph:\n")

with open(INPUT_GRPAH_PATH) as out_stream:
  print(out_stream.read())

# We will work with this graph:

@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix ex: <http://example.org/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

ex:Jimmy a foaf:Person ;  # Complete
	foaf:age "23"^^xsd:integer ;
	foaf:name "Jimmy" ;
	foaf:familyName "Jones" .

ex:Sarah a foaf:Person ;  # Complete implicit type for age
	foaf:age 22 ;
	foaf:name "Sarah" ;
	foaf:familyName "Salem" .

ex:Bella a foaf:Person ;  # Missing familyName
	foaf:age "56"^^xsd:integer ;
	foaf:name "Isabella" .

ex:David a foaf:Person ;  # Missing age and use knows
	foaf:name "David" ;
	foaf:familyName "Doulofeau" ;
	foaf:knows ex:Sarah .

ex:HumanLike foaf:name "Person" ;  # foaf properties, but not explicit type.
	foaf:familyName "Maybe" ;
	foaf:age 99 ;
	foaf:knows ex:David .

ex:x1 rdf:type foaf:Document ;
	foaf:depiction "A thing that is nice" ;
	foaf:title "A nice thing" .




In [None]:
# All classes get a shape

shaper = Shaper(
            graph_file_input=INPUT_GRPAH_PATH,
            namespaces_dict=default_namespaces(),
            all_classes_mode=True,  # Set this to True to get a shape for each class
            input_format=TURTLE_ITER)

str_result = shaper.shex_graph(string_output=True)
print(str_result)

PREFIX ex: <http://example.org/>
PREFIX xml: <http://www.w3.org/XML/1998/namespace/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX : <http://weso.es/shapes/>

:Person
{
   rdf:type  [foaf:Person]  ;                                  # 100.0 %
   foaf:name  xsd:string  ;                                    # 100.0 %
   foaf:age  xsd:integer  ?;
            # 75.0 % obj: xsd:integer. Cardinality: {1}
   foaf:familyName  xsd:string  ?;
            # 75.0 % obj: xsd:string. Cardinality: {1}
   foaf:knows  @:Person  ?
            # 25.0 % obj: @:Person. Cardinality: {1}
}


:Document
{
   rdf:type  [foaf:Document]  ;                                # 100.0 %
   foaf:title  xsd:string  ;                                   # 100.0 %
   foaf:depiction  xsd:string  ?
            # 50.0 % obj: xsd:string. Cardinality: {1}
}





In [None]:
# Only a set of target classes get a shape

shaper = Shaper(
            graph_file_input=INPUT_GRPAH_PATH,
            namespaces_dict=default_namespaces(),
            target_classes=["foaf:Person"],  # Use this parameter with a list of classes. Note that prefixed URIs will
                                            # be recognized as long as the prefix is defined in the namespaces list
            input_format=TURTLE_ITER)

str_result = shaper.shex_graph(string_output=True)
print(str_result)

PREFIX ex: <http://example.org/>
PREFIX xml: <http://www.w3.org/XML/1998/namespace/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX : <http://weso.es/shapes/>

:Person
{
   rdf:type  [foaf:Person]  ;                                  # 100.0 %
   foaf:name  xsd:string  ;                                    # 100.0 %
   foaf:age  xsd:integer  ?;
            # 75.0 % obj: xsd:integer. Cardinality: {1}
   foaf:familyName  xsd:string  ?;
            # 75.0 % obj: xsd:string. Cardinality: {1}
   foaf:knows  @:Person  ?
            # 25.0 % obj: @:Person. Cardinality: {1}
}





In [None]:
# All classes get a shapE, BUT ONLY USIGN 1 INSTANCE to extract features
# This is extremly useful to extract shapes from large sources or for consuming
# SPARQL endpoints


shaper = Shaper(
            graph_file_input=INPUT_GRPAH_PATH,
            namespaces_dict=default_namespaces(),
            all_classes_mode=True,  # Set this to True to get a shape for each class,
            instances_cap=1,  # Set this value to limit the number of instances used to extract each shape.
            examples_mode=ALL_EXAMPLES,  # I'm setting this so you can check the data of the instance used in the output. It's not neccesary.
            input_format=TURTLE_ITER)

str_result = shaper.shex_graph(string_output=True)
print(str_result)

PREFIX ex: <http://example.org/>
PREFIX xml: <http://www.w3.org/XML/1998/namespace/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX : <http://weso.es/shapes/>

:Person
{
   rdf:type  [foaf:Person]  ;                                  # 100.0 %
   foaf:age  xsd:integer  ;                                    # 100.0 %
            // rdfs:comment "23" ;
   foaf:name  xsd:string  ;                                    # 100.0 %
            // rdfs:comment "Jimmy" ;
   foaf:familyName  xsd:string                                 # 100.0 %
            // rdfs:comment "Jones" ;
} // rdfs:comment ex:Jimmy


:Document
{
   rdf:type  [foaf:Document]  ;                                # 100.0 %
   foaf:depiction  xsd:string  ;                               # 100.0 %
            // rdfs:comment "A thing that is nice" ;
   foaf:title  xsd:string 

In [None]:
# sheXer can extract inverse cosntraints, i.e., those in which the node to be validated act as object instead of subject

shaper = Shaper(
            graph_file_input=INPUT_GRPAH_PATH,
            namespaces_dict=default_namespaces(),
            all_classes_mode=True,
            input_format=TURTLE_ITER,
            inverse_paths=True)  # Set this to True to get inverse constraints.

str_result = shaper.shex_graph(string_output=True)
print(str_result)

PREFIX ex: <http://example.org/>
PREFIX xml: <http://www.w3.org/XML/1998/namespace/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX : <http://weso.es/shapes/>

:Person
{
   rdf:type  [foaf:Person]  ;                                  # 100.0 %
   foaf:name  xsd:string  ;                                    # 100.0 %
   foaf:age  xsd:integer  ?;
            # 75.0 % obj: xsd:integer. Cardinality: {1}
   foaf:familyName  xsd:string  ?;
            # 75.0 % obj: xsd:string. Cardinality: {1}
   ^  foaf:knows  IRI  ?;
            # 50.0 % obj: IRI. Cardinality: {1}
            # 25.0 % obj: @:Person. Cardinality: {1}
   foaf:knows  @:Person  ?
            # 25.0 % obj: @:Person. Cardinality: {1}
}


:Document
{
   rdf:type  [foaf:Document]  ;                                # 100.0 %
   foaf:title  xsd:string  ;                        

In [7]:
# You can stablish an acceptance threshold. This value in [0,1] indicates the
# the ratio of instances in which a certain feature should be observed among
# the examples shapes in order to be part of the result.

# Here you can see some examples of extractions over the same input using
# different acceptance threshols

shaper = Shaper(
            graph_file_input=INPUT_GRPAH_PATH,
            namespaces_dict=default_namespaces(),
            all_classes_mode=True,
            input_format=TURTLE_ITER,
            inverse_paths=True)

str_result = shaper.shex_graph(string_output=True,
                               acceptance_threshold=0)
                               # 0 is the default value. With this, every feature
                               # even if it is observed only once, will make
                               # the results (with a cardinality including 0
                               # occurences))
print("--------------Threshold 0---------------")
print(str_result)

shaper = Shaper(
            graph_file_input=INPUT_GRPAH_PATH,
            namespaces_dict=default_namespaces(),
            all_classes_mode=True,
            input_format=TURTLE_ITER,
            inverse_paths=True)

str_result = shaper.shex_graph(string_output=True,
                               acceptance_threshold=0.30)
                               # At least 30% of the instances supporting the feature
print("--------------Threshold 0.3---------------")
print(str_result)

shaper = Shaper(
            graph_file_input=INPUT_GRPAH_PATH,
            namespaces_dict=default_namespaces(),
            all_classes_mode=True,
            input_format=TURTLE_ITER,
            inverse_paths=True)

str_result = shaper.shex_graph(string_output=True,
                               acceptance_threshold=0.60)
                               # At least 30% of the instances supporting the feature
print("--------------Threshold 0.6---------------")
print(str_result)

shaper = Shaper(
            graph_file_input=INPUT_GRPAH_PATH,
            namespaces_dict=default_namespaces(),
            all_classes_mode=True,
            input_format=TURTLE_ITER,
            inverse_paths=True)

str_result = shaper.shex_graph(string_output=True,
                               acceptance_threshold=1)
                               # EVERY instance supporting the feature
print("--------------Threshold 1---------------")
print(str_result)


--------------Threshold 0---------------
PREFIX ex: <http://example.org/>
PREFIX xml: <http://www.w3.org/XML/1998/namespace/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX : <http://weso.es/shapes/>

:Person
{
   rdf:type  [foaf:Person]  ;                                  # 100.0 %
   foaf:name  xsd:string  ;                                    # 100.0 %
   foaf:age  xsd:integer  ?;
            # 75.0 % obj: xsd:integer. Cardinality: {1}
   foaf:familyName  xsd:string  ?;
            # 75.0 % obj: xsd:string. Cardinality: {1}
   ^  foaf:knows  IRI  ?;
            # 50.0 % obj: IRI. Cardinality: {1}
            # 25.0 % obj: @:Person. Cardinality: {1}
   foaf:knows  @:Person  ?
            # 25.0 % obj: @:Person. Cardinality: {1}
}


:Document
{
   rdf:type  [foaf:Document]  ;                                # 100.0 %
   foaf:tit

In [22]:
# Shexer allows for the generation of constraint with the OR operator.
# For this example, we will work with a different graph.
# Two people, one ows a dog, the other owns a cat.



raw_graph = """
@prefix ex: <http://example.org/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

ex:Jimmy a foaf:Person ;
	foaf:age "23"^^xsd:integer ;
	foaf:name "Jimmy" ;
	foaf:familyName "Jones" ;
	ex:owns ex:catty .

ex:Sarah a foaf:Person ;
	foaf:age 22 ;
	foaf:name "Sarah" ;
	foaf:familyName "Salem" ;
	ex:owns ex:doggy .

ex:catty a ex:Cat ;
    a ex:Pet ;
		ex:color  ex:orange .

ex:doggy a ex:Dog ;
    a ex:Pet ;
		ex:color  ex:brown .

"""

# By default, sheXer does not try to generate OR constraints:

shaper = Shaper(
            raw_graph=raw_graph,
            namespaces_dict=default_namespaces(),
            all_classes_mode=True,
            input_format=TURTLE_ITER)

str_result = shaper.shex_graph(string_output=True)
print(str_result)

PREFIX ex: <http://example.org/>
PREFIX xml: <http://www.w3.org/XML/1998/namespace/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX : <http://weso.es/shapes/>

:Person
{
   rdf:type  [foaf:Person]  ;                                  # 100.0 %
   foaf:age  xsd:integer  ;                                    # 100.0 %
   foaf:name  xsd:string  ;                                    # 100.0 %
   foaf:familyName  xsd:string  ;                              # 100.0 %
   ex:owns  @:Pet                                              # 100.0 %
            # 50.0 % obj: @:Cat. Cardinality: {1}
            # 50.0 % obj: @:Dog. Cardinality: {1}
}


:Cat
{
   rdf:type  [ex:Cat]  ;                                       # 100.0 %
   rdf:type  [ex:Pet]  ;                                       # 100.0 %
   ex:color  IRI                               

In [23]:
# ... but you can ask sheXer to do so.

shaper = Shaper(
            raw_graph=raw_graph,
            namespaces_dict=default_namespaces(),
            all_classes_mode=True,
            disable_or_statements=False,  # Use this if you want a chance of having Or constraints
            input_format=TURTLE_ITER)

str_result = shaper.shex_graph(string_output=True)
print(str_result)

PREFIX ex: <http://example.org/>
PREFIX xml: <http://www.w3.org/XML/1998/namespace/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX : <http://weso.es/shapes/>

:Person
{
   rdf:type  [foaf:Person]  ;                                  # 100.0 %
   foaf:age  xsd:integer  ;                                    # 100.0 %
   foaf:name  xsd:string  ;                                    # 100.0 %
   foaf:familyName  xsd:string  ;                              # 100.0 %
   ex:owns  @:Pet  OR  @:Cat  OR  @:Dog  
            # 100.0 % obj: @:Pet. Cardinality: {1}
            # 50.0 % obj: @:Cat. Cardinality: {1}
            # 50.0 % obj: @:Dog. Cardinality: {1}
}


:Cat
{
   rdf:type  [ex:Cat]  ;                                       # 100.0 %
   rdf:type  [ex:Pet]  ;                                       # 100.0 %
   ex:color  IRI           

In [24]:
# Or constranits generated by sheXer are always meant to be "redundant"
# when ecploring instances, sheXer does not annotate which entity in pointing
# to wicch, but the fact that some entity is pointing to another which conforms
# with a certain shape.
# If all people is linked with :Pet, then, necessary, if some people is linked with
# Cats or Dogs, Pet must subsume cats and dogs.

# See what happens when we remove the type ex:Pet from the example graph:

raw_graph = """
@prefix ex: <http://example.org/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

ex:Jimmy a foaf:Person ;
	foaf:age "23"^^xsd:integer ;
	foaf:name "Jimmy" ;
	foaf:familyName "Jones" ;
	ex:owns ex:catty .

ex:Sarah a foaf:Person ;
	foaf:age 22 ;
	foaf:name "Sarah" ;
	foaf:familyName "Salem" ;
	ex:owns ex:doggy .

ex:catty a ex:Cat ;
		ex:color  ex:orange .

ex:doggy a ex:Dog ;
		ex:color  ex:brown .

"""



shaper = Shaper(
            raw_graph=raw_graph,
            namespaces_dict=default_namespaces(),
            all_classes_mode=True,
            disable_or_statements=False,
            input_format=TURTLE_ITER)

str_result = shaper.shex_graph(string_output=True)
print(str_result)

# The OR is gone. sheXer knows that 50 % of the people ows dog and 50% ows cat.
# But it is not sure wheter there is an interseccion between those two sets of
# people. So it produces a IRI node kind and informs in comments about what it
# actually knows.


PREFIX ex: <http://example.org/>
PREFIX xml: <http://www.w3.org/XML/1998/namespace/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX : <http://weso.es/shapes/>

:Person
{
   rdf:type  [foaf:Person]  ;                                  # 100.0 %
   foaf:age  xsd:integer  ;                                    # 100.0 %
   foaf:name  xsd:string  ;                                    # 100.0 %
   foaf:familyName  xsd:string  ;                              # 100.0 %
   ex:owns  IRI                                                # 100.0 %
            # 50.0 % obj: @:Cat. Cardinality: {1}
            # 50.0 % obj: @:Dog. Cardinality: {1}
}


:Cat
{
   rdf:type  [ex:Cat]  ;                                       # 100.0 %
   ex:color  IRI                                               # 100.0 %
}


:Dog
{
   rdf:type  [ex:Dog]  ;            

In [25]:
# There is a way anyhow to make Dog and Cat be part of the actual shapes
# rather tahn an statistical note in a comment:

shaper = Shaper(
            raw_graph=raw_graph,
            namespaces_dict=default_namespaces(),
            all_classes_mode=True,
            disable_or_statements=False,
            allow_redundant_or=True,  # Use this in case you want to allow or
                                      # statemnts with the macro IRI
            input_format=TURTLE_ITER)

str_result = shaper.shex_graph(string_output=True)
print(str_result)


# in this case, the node constrint is IRI or :Dog or :Cat. sheXer still is not
# sure about whether it there is intersection between dog owners and cat owners
# but it does know that everyone knows something, which is an IRI.
# IRI subsumes Cat and Dog, same as Pet did. And it would had been enough for
# validation purposes to produce the node constraint IRI. But, for data
# description, even if this choise is redundant, it is also more informative.

PREFIX ex: <http://example.org/>
PREFIX xml: <http://www.w3.org/XML/1998/namespace/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX : <http://weso.es/shapes/>

:Person
{
   rdf:type  [foaf:Person]  ;                                  # 100.0 %
   foaf:age  xsd:integer  ;                                    # 100.0 %
   foaf:name  xsd:string  ;                                    # 100.0 %
   foaf:familyName  xsd:string  ;                              # 100.0 %
   ex:owns  IRI  OR  @:Cat  OR  @:Dog  
            # 50.0 % obj: @:Cat. Cardinality: {1}
            # 50.0 % obj: @:Dog. Cardinality: {1}
}


:Cat
{
   rdf:type  [ex:Cat]  ;                                       # 100.0 %
   ex:color  IRI                                               # 100.0 %
}


:Dog
{
   rdf:type  [ex:Dog]  ;                                       # 100.

In [26]:
# sheXer allows to restrict potential cardinalities to usul values: +,*,? and {1}
# We'll use a different graph in this example. Two people, each owns tow pets.

raw_graph = """
@prefix ex: <http://example.org/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

ex:Jimmy a foaf:Person ;
	foaf:age "23"^^xsd:integer ;
	foaf:name "Jimmy" ;
	foaf:familyName "Jones" ;
  ex:owns ex:doggy ;
	ex:owns ex:catty .

ex:Sarah a foaf:Person ;
	foaf:age 22 ;
	foaf:name "Sarah" ;
	foaf:familyName "Salem" ;
  ex:owns ex:catty ;
	ex:owns ex:doggy .

ex:catty a ex:Pet ;
		ex:color  ex:orange .

ex:doggy a ex:Pet ;
		ex:color  ex:brown .

"""

shaper = Shaper(
            raw_graph=raw_graph,
            namespaces_dict=default_namespaces(),
            all_classes_mode=True,
            input_format=TURTLE_ITER)

str_result = shaper.shex_graph(string_output=True)
print(str_result)

# As you can check, the constraint ex:owns @:Pet get a cardinality of exactly 2,
# which is what is is observed in the data. Such exact cardinalities sometimes
# are legit (e.g.: every one may have exactly the same number of cromosomes),
# but frequently are produced due to small samplings sharing by chance some features.


PREFIX ex: <http://example.org/>
PREFIX xml: <http://www.w3.org/XML/1998/namespace/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX : <http://weso.es/shapes/>

:Person
{
   rdf:type  [foaf:Person]  ;                                  # 100.0 %
   foaf:age  xsd:integer  ;                                    # 100.0 %
   foaf:name  xsd:string  ;                                    # 100.0 %
   foaf:familyName  xsd:string  ;                              # 100.0 %
   ex:owns  @:Pet  {2}                                         # 100.0 %
}


:Pet
{
   rdf:type  [ex:Pet]  ;                                       # 100.0 %
   ex:color  IRI                                               # 100.0 %
}





In [29]:
# In this case, people probably can have a number of pets which is not exatcly 2.
# We can configure sheXer to avoid being "that" precise

shaper = Shaper(
            raw_graph=raw_graph,
            namespaces_dict=default_namespaces(),
            all_classes_mode=True,
            input_format=TURTLE_ITER,
            disable_exact_cardinality=True)  # Use this to avoid exact cardinalities

str_result = shaper.shex_graph(string_output=True)
print(str_result)


PREFIX ex: <http://example.org/>
PREFIX xml: <http://www.w3.org/XML/1998/namespace/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX : <http://weso.es/shapes/>

:Person
{
   rdf:type  [foaf:Person]  ;                                  # 100.0 %
   foaf:age  xsd:integer  ;                                    # 100.0 %
   foaf:name  xsd:string  ;                                    # 100.0 %
   foaf:familyName  xsd:string  ;                              # 100.0 %
   ex:owns  @:Pet  +                                           # 100.0 %
}


:Pet
{
   rdf:type  [ex:Pet]  ;                                       # 100.0 %
   ex:color  IRI                                               # 100.0 %
}





In [33]:
# We'll use a different graph in this example.
# Three people, 2 of them owns a pet

raw_graph = """
@prefix ex: <http://example.org/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

ex:Jimmy a foaf:Person ;
	foaf:age "23"^^xsd:integer ;
	foaf:name "Jimmy" ;
	foaf:familyName "Jones" ;
	ex:owns ex:catty .

ex:Sarah a foaf:Person ;
	foaf:age 22 ;
	foaf:name "Sarah" ;
	foaf:familyName "Salem" ;
	ex:owns ex:doggy .

ex:Lionel a foaf:Person ;
	foaf:age 22 ;
	foaf:name "Lionel" ;
	foaf:familyName "Lonely" .

ex:catty a ex:Pet ;
		ex:color  ex:orange .

ex:doggy a ex:Pet ;
		ex:color  ex:brown .

"""

shaper = Shaper(
            raw_graph=raw_graph,
            namespaces_dict=default_namespaces(),
            all_classes_mode=True,
            decimals=2,  # Unrelated to the case, but use this to configure the number of decimals in stats (floats could get ugly otherwhise)
            input_format=TURTLE_ITER)

str_result = shaper.shex_graph(string_output=True)
print(str_result)



PREFIX ex: <http://example.org/>
PREFIX xml: <http://www.w3.org/XML/1998/namespace/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX : <http://weso.es/shapes/>

:Person
{
   rdf:type  [foaf:Person]  ;                                  # 100.00 %
   foaf:age  xsd:integer  ;                                    # 100.00 %
   foaf:name  xsd:string  ;                                    # 100.00 %
   foaf:familyName  xsd:string  ;                              # 100.00 %
   ex:owns  @:Pet  ?
            # 66.67 % obj: @:Pet. Cardinality: {1}
}


:Pet
{
   rdf:type  [ex:Pet]  ;                                       # 100.00 %
   ex:color  IRI                                               # 100.00 %
}





In [34]:
# With the previous setting, the cardinality for owning pets is '?' (optional),
# as there is one instance which does not own a pet. But we can enforce sheXer
# to avoid cardinalities including zero ocurences even if this is not supported
# by data.
# To accept constraints non-universally met by explored instances,
# you have to handle the acceptance_threshold and an extra parameter.


shaper = Shaper(
            raw_graph=raw_graph,
            namespaces_dict=default_namespaces(),
            all_classes_mode=True,
            decimals=2,
            input_format=TURTLE_ITER,
            all_instances_are_compliant_mode=False) # Use this to avoid cardinalities with zero occurences

str_result = shaper.shex_graph(string_output=True,
                               acceptance_threshold=0.6) # Use this to set the minimun support required to accept a cosntraint.
print(str_result)

# As you can see, even if the constraint is included in the shape, you'll still
# have statiscical information so you can decide later to keep it or not.

PREFIX ex: <http://example.org/>
PREFIX xml: <http://www.w3.org/XML/1998/namespace/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX : <http://weso.es/shapes/>

:Person
{
   rdf:type  [foaf:Person]  ;                                  # 100.00 %
   foaf:age  xsd:integer  ;                                    # 100.00 %
   foaf:name  xsd:string  ;                                    # 100.00 %
   foaf:familyName  xsd:string  ;                              # 100.00 %
   ex:owns  @:Pet                                              # 66.67 %
}


:Pet
{
   rdf:type  [ex:Pet]  ;                                       # 100.00 %
   ex:color  IRI                                               # 100.00 %
}



