In [1]:
import sys
!{sys.executable} -m pip install rdflib



In [45]:
import sys
!{sys.executable} -m pip install pandas numpy

Collecting pandas
  Using cached pandas-2.2.3-cp312-cp312-win_amd64.whl.metadata (19 kB)
Collecting numpy
  Downloading numpy-2.2.3-cp312-cp312-win_amd64.whl.metadata (60 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2025.1-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2025.1-py2.py3-none-any.whl.metadata (1.4 kB)
Using cached pandas-2.2.3-cp312-cp312-win_amd64.whl (11.5 MB)
Downloading numpy-2.2.3-cp312-cp312-win_amd64.whl (12.6 MB)
   ---------------------------------------- 0.0/12.6 MB ? eta -:--:--
   ---------------- ----------------------- 5.2/12.6 MB 26.6 MB/s eta 0:00:01
   ------------------------------------ --- 11.5/12.6 MB 27.8 MB/s eta 0:00:01
   ---------------------------------------- 12.6/12.6 MB 25.5 MB/s eta 0:00:00
Downloading pytz-2025.1-py2.py3-none-any.whl (507 kB)
Downloading tzdata-2025.1-py2.py3-none-any.whl (346 kB)
Installing collected packages: pytz, tzdata, numpy, pandas
Successfully instal

In [79]:
from rdflib import Graph
from rdflib import URIRef
from rdflib.namespace import RDF, RDFS, OWL

from pandas import DataFrame

In [80]:
#namespaces
maSMP_ns = "https://discovery.biothings.io/view/maSMP/"
schema_ns = "http://schema.org/" #double check whether http or https is used in the source file

#properties used for filtering triples
rdfs_class = URIRef("http://www.w3.org/2000/01/rdf-schema#Class")
schema_domain = URIRef(schema_ns + "domainIncludes")
schema_range = URIRef(schema_ns + "rangeIncludes")

In [75]:
#load graph
g = Graph()
g.parse("https://raw.githubusercontent.com/zbmed-semtec/maSMPs/refs/heads/main/schema/maSMP_schema_v2/v2.1.0/types/maSMP.jsonld")

<Graph identifier=N2e3cdd3701f04feea36ff7321d0dcd22 (<class 'rdflib.graph.Graph'>)>

## Create an index page for Types

In [76]:
#Create a generic table listing all the types and their descriptions

table = "<table>\n" + "<tr><th>Type</th><th>Description</th></tr>\n"

#iterate over all classes in the maSMP namespace to get type and description
for s in g.subjects(object=rdfs_class, unique=True) :
    type_name = str(s).split('/')[-1]    
    if maSMP_ns in s :
        description = g.value(subject=s, predicate=RDFS.comment)
        table += "<tr><td><a href='./" + type_name + "'>" + type_name + "</a></td><td>" + description + "</td></tr>\n\n"

table += "</table>\n"

In [77]:
# Create the Types page
intro_text = """
<h1>maSMP Types</h1>\n
maSMP types are extensions to the schema.org vocabulary. 
All types and properties for the maSMP metadata schema can also be found at the [maSMP DDE namespace](https://discovery.biothings.io/ns/maSMP){:target="_blank"}.
\n\n
"""

with open("../docs/Types/index.md", 'w') as file:
    file.write(intro_text)
    file.write(table)

## Create individual pages, one per type

In [112]:
def_value = maSMP_ns
def convert_to_link(url, maSMP_ns=def_value, label=None, md=False) :
    if label is None:
        label = str(url).split('/')[-1]
        
    if maSMP_ns in url:
        return "maSMP:" + label
    else :
        if md :
            return "[{}]({})".format(label, url) + "{:target='_blank'}"
        else :
            return "<a href='{}' target='_blank'>{}</a>".format(url, label)

print(convert_to_link(maSMP_ns+"Text"))
print(convert_to_link(maSMP_ns+"Text", md=True))
print(convert_to_link(maSMP_ns+"Text", label="myText"))
print(convert_to_link(maSMP_ns+"Text", label="myText", md=True))

print(convert_to_link("http://schema.org/Text"))
print(convert_to_link("http://schema.org/Text", md=True))
print(convert_to_link("http://schema.org/Text", label="myText"))
print(convert_to_link("http://schema.org/Text", label="myText", md=True))

convert_to_link("http://schema.org/Action", md=True)

maSMP:Text
maSMP:Text
maSMP:myText
maSMP:myText
<a href='http://schema.org/Text' target='_blank'>Text</a>
[Text](http://schema.org/Text){:target='_blank'}
<a href='http://schema.org/Text' target='_blank'>myText</a>
[myText](http://schema.org/Text){:target='_blank'}


"[Action](http://schema.org/Action){:target='_blank'}"

In [121]:
#Create individual pages per type
#iterate over all classes in the maSMP namespace to get properties
for s in g.subjects(object=rdfs_class, unique=True) :
    type_name = str(s).split('/')[-1]   
    
    if maSMP_ns in s :
        description = g.value(subject=s, predicate=RDFS.comment)
        parent = g.value(subject=s, predicate=RDFS.subClassOf) 

        #info about type, hierarchy, and description
        intro_text = "(parent type) " + convert_to_link(parent, md=True) + " - (type) maSMP:" + type_name + "\n\n"
        intro_text += description + "\n\n"

        #get all properties that have this type as domain
        get_props_query = "SELECT DISTINCT ?prop ?label ?desc ?range WHERE { "
        get_props_query += "?prop <" + schema_ns + "domainIncludes> <" + s + "> . " 
        get_props_query += "?prop <" + RDFS.label + "> ?label . " 
        get_props_query += "?prop <" + RDFS.comment + "> ?desc . "
        get_props_query += "?prop <" + schema_ns + "rangeIncludes> ?range . " 
        get_props_query += " }"
        qres = g.query(get_props_query)

        table = "<table>\n" + "<tr><th>Property</th><th>Expected Type</th><th>Description</th></tr>\n"
        
        df = DataFrame(qres, columns=['prop', 'label', 'desc', 'range'])
        df['range'] = df['range'].apply(convert_to_link) # convert to link all the expected values
        df = df.groupby(['prop']).agg({'label' : 'first', 'desc' : 'first', 'range' : ' or '.join}) #groups by property so, only one row per property
        for index, row  in df.iterrows() :
            table += "<tr>"
            #property name with external links
            table += "<td>" + convert_to_link(index, label=row['label'], md=False) + "</td>\n"
            #expected types with external links (links and joint before the loop)
            table += "<td>" + row['range']+ "</td>\n"
            #description
            table += "<td>" + row['desc'] + "</td>\n"
            table += "</tr>\n"
        table += "</table>\n"

        with open("../docs/Types/" + type_name + ".md", 'w') as file:
            file.write(intro_text)
            file.write(table)
        

?prop <http://schema.org/domainIncludes> <https://discovery.biothings.io/view/maSMP/SoftwareRunAction does not look like a valid URI, trying to serialize this will break.
?prop <http://schema.org/domainIncludes> <https://discovery.biothings.io/view/maSMP/SoftwareRunAction> .  does not look like a valid URI, trying to serialize this will break.
SELECT DISTINCT ?prop ?label ?desc ?range WHERE { ?prop <http://schema.org/domainIncludes> <https://discovery.biothings.io/view/maSMP/SoftwareRunAction> .  does not look like a valid URI, trying to serialize this will break.
?prop <http://www.w3.org/2000/01/rdf-schema#label does not look like a valid URI, trying to serialize this will break.
?prop <http://www.w3.org/2000/01/rdf-schema#label> ?label .  does not look like a valid URI, trying to serialize this will break.
SELECT DISTINCT ?prop ?label ?desc ?range WHERE { ?prop <http://schema.org/domainIncludes> <https://discovery.biothings.io/view/maSMP/SoftwareRunAction> . ?prop <http://www.w3.org/