In [1]:
import sys
!{sys.executable} -m pip install rdflib



In [45]:
import sys
!{sys.executable} -m pip install pandas numpy

Collecting pandas
  Using cached pandas-2.2.3-cp312-cp312-win_amd64.whl.metadata (19 kB)
Collecting numpy
  Downloading numpy-2.2.3-cp312-cp312-win_amd64.whl.metadata (60 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2025.1-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2025.1-py2.py3-none-any.whl.metadata (1.4 kB)
Using cached pandas-2.2.3-cp312-cp312-win_amd64.whl (11.5 MB)
Downloading numpy-2.2.3-cp312-cp312-win_amd64.whl (12.6 MB)
   ---------------------------------------- 0.0/12.6 MB ? eta -:--:--
   ---------------- ----------------------- 5.2/12.6 MB 26.6 MB/s eta 0:00:01
   ------------------------------------ --- 11.5/12.6 MB 27.8 MB/s eta 0:00:01
   ---------------------------------------- 12.6/12.6 MB 25.5 MB/s eta 0:00:00
Downloading pytz-2025.1-py2.py3-none-any.whl (507 kB)
Downloading tzdata-2025.1-py2.py3-none-any.whl (346 kB)
Installing collected packages: pytz, tzdata, numpy, pandas
Successfully instal

In [19]:
#namespaces
maSMP_ns = "https://discovery.biothings.io/view/maSMP/"
maSMPPro_ns = "https://discovery.biothings.io/view/maSMPProfiles/"
schema_ns = "http://schema.org/" #double check whether http or https is used in the source file

## Create an index page for Profiles

In [9]:
from rdflib import Graph
from rdflib import URIRef
from rdflib.namespace import RDF, RDFS, OWL

from pandas import DataFrame

In [20]:
#properties used for filtering triples
rdfs_class = URIRef("http://www.w3.org/2000/01/rdf-schema#Class")
schema_domain = URIRef(schema_ns + "domainIncludes")
schema_range = URIRef(schema_ns + "rangeIncludes")

In [3]:
#load graph
g = Graph()
g.parse("https://raw.githubusercontent.com/zbmed-semtec/maSMPs/refs/heads/main/schema/maSMP_schema_v2/v2.1.0/profiles/maSMP_profiles_v2.jsonld")

<Graph identifier=Neee7e70c9e834b508a746680eae04e5b (<class 'rdflib.graph.Graph'>)>

In [12]:
#Create a generic table listing all the types and their descriptions

table = "<table>\n" + "<tr><th>Profile</th><th>Description</th></tr>\n"

#iterate over all classes in the maSMP namespace to get type and description
for s in g.subjects(object=rdfs_class, unique=True) :
    type_name = str(s).split('/')[-1]
    if maSMPPro_ns in s :
        description = g.value(subject=s, predicate=RDFS.comment)
        table += "<tr><td><a href='./" + type_name[:-len("Profile")] + "'>" + type_name + "</a></td><td>" + description + "</td></tr>\n\n"

table += "</table>\n"

In [13]:
# Create the Profiles page
intro_text = """
<h1>maSMP Profiles</h1>\n
maSMP profiles are recommendations of use corresponding to [maSMP](./Types/index.md) and [schema.org](https://schema.org) types and properties. 
Recommendations relate to the cardinality (one, many) and marginality (minimum, recommended, optional). 
The json.schema corresponding to the maSMP profiles can be found at the [maSMP DDE namespace](https://discovery.biothings.io/ns/maSMPProfiles){:target="_blank"}.
\n\n
To avoid conflict/confusion between types and profiles, all the profile names end with the suffix 'Profile'
\n\n
"""

with open("../docs/Profiles/index.md", 'w') as file:
    file.write(intro_text)
    file.write(table)

## Create individual pages, one per profile

In [15]:
import urllib.request, json

In [35]:
def_value = maSMP_ns
def convert_to_link(url, label=None, md=False) :
    local = True if url.startswith("maSMP:") else False
    
    url = url.replace("maSMP:", "../../Types/")    
    if label is None:
        label = str(url).split('/')[-1]

    if local :
        label = "maSMP:" + label
        if md :
            return "[{}]({})".format(label, url)
        else :
            return "<a href='{}'>{}</a>".format(url, label)
    else :   
        if md :
            return "[{}]({})".format(label, url) + "{:target='_blank'}"
        else :
            return "<a href='{}' target='_blank'>{}</a>".format(url, label)

print(convert_to_link("maSMP:SoftwareRunAction"))

<a href='../../Types/SoftwareRunAction'>maSMP:SoftwareRunAction</a>


In [62]:
origin = "https://raw.githubusercontent.com/zbmed-semtec/maSMPs/refs/heads/main/schema/maSMP_schema_v2/v2.1.0/profiles/maSMP_profiles_v2.jsonld"
with urllib.request.urlopen(origin) as prof_file:
    prof_data = json.load(prof_file)

In [63]:
def get_prop_list(elem, level) :
    lst = []
    for item in elem["$validation"][level] :
        new_row = {"Property": item, "Expected Type": "", "Description": "", "Cardinality": ""}
        new_row["Description"] = elem["$validation"]["properties"][item]["description"]
        new_row["Cardinality"] = elem["$validation"]["properties"][item]["owl:cardinality"]
        lst.append(new_row)
    return lst

In [66]:
lst_min = []
lst_rec= []
lst_opt = []

for elem in prof_data["@graph"] :
    if elem["@type"] == "rdfs:Class" :
        intro_text = "<h1>" + elem["rdfs:label"] + "</h1>\n\n"
        intro_text += elem["rdfs:label"] + " is profile for " + convert_to_link(elem["rdfs:subClassOf"]["@id"]) + "\n\n"
        intro_text += elem["rdfs:comment"] + "\n\n"

        lst_min = get_prop_list(elem, "required")
        lst_rec = get_prop_list(elem, "recommended")
        lst_opt = get_prop_list(elem, "optional")

        with open("../docs/Profiles/" + (elem["rdfs:label"])[:-len("Profile")] + ".md", 'w') as file:
            file.write(intro_text)
            file.write("##Minimum properties\n")
            df_min = DataFrame(lst_min)
            file.write(df_min.to_string())
            
            file.write("\n##Recommended properties\n")
            df_rec = DataFrame(lst_rec)
            file.write(df_rec.to_string())
            
            file.write("\n##Optional properties\n")
            df_opt = DataFrame(lst_opt)
            file.write(df_opt.to_string())
            file.write("this is it!!!")
        