In [23]:
#Import library 
import pandas as pd
import numpy as np
import IPython
import sparqldataframe
from SPARQLWrapper import SPARQLWrapper, JSON

In [2]:
#Configuration SPARQL end point 
endpointURL = "http://localhost:3030/biotoolsEdam/query"
rdfFormat = "turtle"

#Import prefix : 
prefixes = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX dc: <http://purl.org/dc/elements/1.1/>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>

PREFIX bt: <https://bio.tools/>
PREFIX biotools: <https://bio.tools/ontology/>
PREFIX bsc: <http://bioschemas.org/>
PREFIX bsct: <http://bioschemas.org/types/>
PREFIX edam: <http://edamontology.org/>
PREFIX sc: <http://schema.org/>
PREFIX schema: <https://schema.org/>

"""

#Link tool
biotoolsURI = "https://bio.tools/"
biotoolsOntologyURI = "https://bio.tools/ontology/"
edamURI = "http://edamontology.org/"

In [24]:
def displaySparqlResults(results):
    '''
    Displays as HTML the result of a SPARQLWrapper query in a Jupyter notebook.
    
        Parameters:
            results (dictionnary): the result of a call to SPARQLWrapper.query().convert()
    '''
    variableNames = results['head']['vars']
    #tableCode = '<table><tr><th>{}</th></tr><tr>{}</tr></table>'.format('</th><th>'.join(variableNames), '</tr><tr>'.join('<td>{}</td>'.format('</td><td>'.join([row[vName]['value'] for vName in variableNames]))for row in results["results"]["bindings"]))
    tableCode = '<table><tr><th>{}</th></tr><tr>{}</tr></table>'.format('</th><th>'.join(variableNames), '</tr><tr>'.join('<td>{}</td>'.format('</td><td>'.join([row[vName]['value'] if vName in row.keys() else "&nbsp;" for vName in variableNames]))for row in results["results"]["bindings"]))
    IPython.display.display(IPython.display.HTML(tableCode))

In [3]:
#Functions for querying topics, operations and associated labels for a tool. 
def getToolLabel(toolURI):
    """Return the label of a tool (or empty string if no label is present).

    Keyword arguments:
    toolURI -- the URI for the tool
    """
    
    if toolURI.startswith("http"):
        toolURI = "<" + toolURI + ">"
    query = """
SELECT DISTINCT ?tool ?toolLabel
WHERE {
  VALUES ?tool { """ + toolURI + """ }

  ?tool rdf:type sc:SoftwareApplication .
  OPTIONAL { ?tool sc:name ?tLabel }
  BIND(COALESCE(?tLabel, "") AS ?toolLabel)
}
"""
    sparql = SPARQLWrapper(endpointURL)
    sparql.setQuery(prefixes+query)
    sparql.setReturnFormat(JSON)
    results = sparql.queryAndConvert()
    return results["results"]["bindings"][0]["toolLabel"]["value"]


def getToolURIByLabel(toolLabel):
    """Return the URI of a tool designated by its label (or None).

    Keyword arguments:
    toolLabel -- the label for the tool
    """
    
    query = """
SELECT DISTINCT ?tool ?toolLabel
WHERE {
  VALUES ?toolLabel { \"""" + toolLabel + """\" }

  ?tool rdf:type sc:SoftwareApplication .
  ?tool sc:name ?toolLabel .
}
"""
    sparql = SPARQLWrapper(endpointURL)
    sparql.setQuery(prefixes+query)
    sparql.setReturnFormat(JSON)
    results = sparql.queryAndConvert()
    return None if len(results["results"]["bindings"]) == 0 else results["results"]["bindings"][0]["tool"]["value"]


def getToolTopics(toolURI, transitive=False):
    """Return the list of the (URI, label) tuples for the topics associated to a tool.

    Keyword arguments:
    toolURI -- the URI for the tool
    transitive -- also consider the ancestors of the topics directly associated to the tool (default: False)
    """
    
    if toolURI.startswith("http"):
        toolURI = "<" + toolURI + ">"
    transitiveClause = "/(rdfs:subClassOf*)" if transitive else ""
    query = """
SELECT DISTINCT ?tool ?topic ?topicLabel
WHERE {
  VALUES ?tool { """ + toolURI + """ }

  ?tool sc:applicationSubCategory""" + transitiveClause + """ ?topic .
  ?topic rdf:type owl:Class .
  FILTER NOT EXISTS { ?topic rdfs:subClassOf? owl:DeprecatedClass }
  OPTIONAL { ?topic rdfs:label ?tLabel }
  BIND(COALESCE(?tLabel, "") AS ?topicLabel)
}
"""
    sparql = SPARQLWrapper(endpointURL)
    sparql.setQuery(prefixes+query)
    sparql.setReturnFormat(JSON)
    results = sparql.queryAndConvert()
    toolTopics = [(result["topic"]["value"], result["topicLabel"]["value"]) for result in results["results"]["bindings"]]
    return toolTopics


def getToolOperations(toolURI, transitive=False):
    """Return the list of the (URI, label) tuples for the operations associated to a tool.

    Keyword arguments:
    toolURI -- the URI for the tool
    transitive -- also consider the ancestors of the operations directly associated to the tool (default: False)
    """
    
    if toolURI.startswith("http"):
        toolURI = "<" + toolURI + ">"
    transitiveClause = "/(rdfs:subClassOf*)" if transitive else ""
    query = """
SELECT DISTINCT ?tool ?operation ?operationLabel
WHERE {
  VALUES ?tool { """ + toolURI + """ }

  ?tool sc:featureList""" + transitiveClause + """ ?operation .
  ?operation rdf:type owl:Class .
  FILTER NOT EXISTS { ?operation rdfs:subClassOf? owl:DeprecatedClass }
  OPTIONAL { ?operation rdfs:label ?oLabel }
  BIND(COALESCE(?oLabel, "") AS ?operationLabel)
}
"""
    sparql = SPARQLWrapper(endpointURL)
    sparql.setQuery(prefixes+query)
    sparql.setReturnFormat(JSON)
    results = sparql.queryAndConvert()
    toolOperations = [(result["operation"]["value"], result["operationLabel"]["value"]) for result in results["results"]["bindings"]]
    return toolOperations

## Metrics on bio.tools : Tools with topic and operation

### Tools with SoftwareApplication class

In [5]:
query = """
# tools

SELECT DISTINCT ?tool ?toolLabel
WHERE {
  ?tool rdf:type sc:SoftwareApplication .
  ?tool sc:name ?toolLabel .
}
"""

dfTool = sparqldataframe.query(endpointURL, prefixes+query)
print(f"Number tools (SoftwareApplication) in bio.tools extraction : {len(dfTool)}")
nbTools = len(dfTool)
#Can generate dfTool.tsv.bz2 : 
#dfTool.to_csv("../Dataframes/dfTool.tsv.bz2", sep="\t", index=False)

Number tools (SoftwareApplication) in bio.tools extraction : 30025


### Number of tools with at least one topic

In [6]:
query = """
# nb of tools with topic

SELECT (COUNT(DISTINCT ?tool) AS ?nbTools)
WHERE {
  ?tool rdf:type sc:SoftwareApplication .
  ?tool sc:applicationSubCategory ?topic .
  ?topic rdf:type owl:Class .
  FILTER NOT EXISTS { ?topic rdfs:subClassOf? owl:DeprecatedClass }
}
"""

sparql = SPARQLWrapper(endpointURL)
sparql.setQuery(prefixes+query)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

nbToolsWithTopic = int(results["results"]["bindings"][0]["nbTools"]["value"])
print("Nb tools with topic: {} ({:.2f}% of the {} tools)".format(nbToolsWithTopic, nbToolsWithTopic*100/nbTools, nbTools))

Nb tools with topic: 29616 (98.64% of the 30025 tools)


### Number of tools with at least one operation

In [7]:
query = """
# nb of tools with operation

SELECT (COUNT(DISTINCT ?tool) AS ?nbTools)
WHERE {
  ?tool rdf:type sc:SoftwareApplication .
  ?tool sc:featureList ?operation .
  ?operation rdf:type owl:Class .
  FILTER NOT EXISTS { ?operation rdfs:subClassOf? owl:DeprecatedClass }
}
"""

sparql = SPARQLWrapper(endpointURL)
sparql.setQuery(prefixes+query)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

nbToolsWithOperation = int(results["results"]["bindings"][0]["nbTools"]["value"])
print("Nb tools with operation: {} ({:.2f}% of the {} tools)".format(nbToolsWithOperation, nbToolsWithOperation*100/nbTools, nbTools))

Nb tools with operation: 28299 (94.25% of the 30025 tools)


### List of topics directly assigned to tools

In [8]:
query = """
# tools topics

SELECT DISTINCT ?tool ?topic ?topicLabel
WHERE {
  ?tool rdf:type sc:SoftwareApplication .
  ?tool sc:applicationSubCategory ?topic .
  ?topic rdf:type owl:Class . # for avoiding deprecated topics
  FILTER NOT EXISTS { ?topic rdfs:subClassOf? owl:DeprecatedClass }
  OPTIONAL { ?topic rdfs:label ?tLabel }
  BIND(COALESCE(?tLabel, "") AS ?topicLabel)
}
"""

dfToolTopic = sparqldataframe.query(endpointURL, prefixes+query)
dfToolTopic.head()

Unnamed: 0,tool,topic,topicLabel
0,https://bio.tools/pmirkb,http://edamontology.org/topic_0780,Plant biology
1,https://bio.tools/pmirkb,http://edamontology.org/topic_2885,DNA polymorphism
2,https://bio.tools/pmirkb,http://edamontology.org/topic_0659,"Functional, regulatory and non-coding RNA"
3,https://bio.tools/pmirkb,http://edamontology.org/topic_0203,Gene expression
4,https://bio.tools/pmirkb,http://edamontology.org/topic_0199,Genetic variation


### List of topics directly assigned and inherited to tools

In [9]:
query = """
# tools topics (and ancestors)

SELECT DISTINCT ?tool ?topic ?topicLabel
WHERE {
  ?tool rdf:type sc:SoftwareApplication .
  ?tool sc:applicationSubCategory/(rdfs:subClassOf*) ?topic .
  ?topic rdf:type owl:Class . # for avoiding deprecated topics and owl:Restriction
  FILTER NOT EXISTS { ?topic rdfs:subClassOf? owl:DeprecatedClass }
  OPTIONAL { ?topic rdfs:label ?tLabel }
  BIND(COALESCE(?tLabel, "") AS ?topicLabel)
}
"""

dfToolTopicTransitive = sparqldataframe.query(endpointURL, prefixes+query)
dfToolTopicTransitive.head()

Unnamed: 0,tool,topic,topicLabel
0,https://bio.tools/pmirkb,http://edamontology.org/topic_0780,Plant biology
1,https://bio.tools/pmirkb,http://edamontology.org/topic_3070,Biology
2,https://bio.tools/pmirkb,http://edamontology.org/topic_0003,Topic
3,https://bio.tools/pmirkb,http://edamontology.org/topic_2885,DNA polymorphism
4,https://bio.tools/pmirkb,http://edamontology.org/topic_0199,Genetic variation


### List of operations directly assigned to tools

In [10]:
query = """
# tools operations

SELECT DISTINCT ?tool ?operation ?operationLabel
WHERE {
  ?tool rdf:type sc:SoftwareApplication .
  ?tool sc:featureList ?operation .
  ?operation rdf:type owl:Class . # for avoiding deprecated operations
  FILTER NOT EXISTS { ?operation rdfs:subClassOf? owl:DeprecatedClass }
  OPTIONAL { ?operation rdfs:label ?oLabel }
  BIND(COALESCE(?oLabel, "") AS ?operationLabel)
}
"""

dfToolOperation = sparqldataframe.query(endpointURL, prefixes+query)
dfToolOperation.head()

Unnamed: 0,tool,operation,operationLabel
0,https://bio.tools/pmirkb,http://edamontology.org/operation_2428,Validation
1,https://bio.tools/pmirkb,http://edamontology.org/operation_0484,SNP detection
2,https://bio.tools/pmirkb,http://edamontology.org/operation_0308,PCR primer design
3,https://bio.tools/pmirkb,http://edamontology.org/operation_2409,Data handling
4,https://bio.tools/pmirkb,http://edamontology.org/operation_0463,miRNA target prediction


### List of topics directly assigned and inherited to tools

In [11]:
query = """
# tools operations (and ancestors)

SELECT DISTINCT ?tool ?operation ?operationLabel
WHERE {
  ?tool rdf:type sc:SoftwareApplication .
  ?tool sc:featureList/(rdfs:subClassOf*) ?operation . # for avoiding deprecated operations and owl:Restriction
  ?operation rdf:type owl:Class .
  FILTER NOT EXISTS { ?operation rdfs:subClassOf? owl:DeprecatedClass }
  OPTIONAL { ?operation rdfs:label ?oLabel }
  BIND(COALESCE(?oLabel, "") AS ?operationLabel)
}
"""

dfToolOperationTransitive = sparqldataframe.query(endpointURL, prefixes+query)
dfToolOperationTransitive.head()

Unnamed: 0,tool,operation,operationLabel
0,https://bio.tools/pmirkb,http://edamontology.org/operation_2428,Validation
1,https://bio.tools/pmirkb,http://edamontology.org/operation_0004,Operation
2,https://bio.tools/pmirkb,http://edamontology.org/operation_0484,SNP detection
3,https://bio.tools/pmirkb,http://edamontology.org/operation_3227,Variant calling
4,https://bio.tools/pmirkb,http://edamontology.org/operation_2478,Nucleic acid sequence analysis


## Create Dataframe for Figure 2 : dfTool with directly assigned annotations and inherited annotations

#### Create dataframe with directly assigned annotations Topics and Operations and theirs numbers

In [12]:
dfTool = pd.read_csv("../Dataframes/dfTool.tsv.bz2", sep="\t")
dfToolNbTopics = dfToolTopic.groupby(by='tool').size().reset_index(name='nbTopics').sort_values(by="nbTopics", ascending=False)
dfToolNbOperations = dfToolOperation.groupby(by='tool').size().reset_index(name='nbOperations').sort_values(by="nbOperations", ascending=False)

dfTool = dfTool.join(dfToolNbTopics.set_index('tool'), on='tool')
dfTool = dfTool.join(dfToolNbOperations.set_index('tool'), on='tool')
dfTool['nbTopics'] = dfTool['nbTopics'].fillna(0)
dfTool['nbOperations'] = dfTool['nbOperations'].fillna(0)
dfTool = dfTool.astype({'nbTopics':'int'})
dfTool = dfTool.astype({'nbOperations':'int'})
dfTool_NT = dfTool
dfTool_NT.head()

Unnamed: 0,tool,toolLabel,nbTopics,nbOperations
0,https://bio.tools/pmirkb,PmiRKB,5,5
1,https://bio.tools/PCA-PAM50,PCA-PAM50,3,2
2,https://bio.tools/CEthreader,CEthreader,5,5
3,https://bio.tools/ipath,iPATH,2,0
4,https://bio.tools/nidm,NIDM,5,4


#### Create dataframe with inherited annotations Topics and Operations and theirs numbers

In [14]:
dfTool = pd.read_csv("../Dataframes/dfTool.tsv.bz2", sep="\t")
dfToolNbTopics = dfToolTopicTransitive.groupby(by='tool').size().reset_index(name='nbTopics').sort_values(by="nbTopics", ascending=False)
dfToolNbOperations = dfToolOperationTransitive.groupby(by='tool').size().reset_index(name='nbOperations').sort_values(by="nbOperations", ascending=False)

dfTool = dfTool.join(dfToolNbTopics.set_index('tool'), on='tool')
dfTool = dfTool.join(dfToolNbOperations.set_index('tool'), on='tool')
dfTool['nbTopics'] = dfTool['nbTopics'].fillna(0)
dfTool['nbOperations'] = dfTool['nbOperations'].fillna(0)
dfTool = dfTool.astype({'nbTopics':'int'})
dfTool = dfTool.astype({'nbOperations':'int'})
dfTool_T = dfTool
dfTool_T.head()

Unnamed: 0,tool,toolLabel,nbTopics,nbOperations
0,https://bio.tools/pmirkb,PmiRKB,14,18
1,https://bio.tools/PCA-PAM50,PCA-PAM50,7,11
2,https://bio.tools/CEthreader,CEthreader,12,23
3,https://bio.tools/ipath,iPATH,6,0
4,https://bio.tools/nidm,NIDM,12,12


## Create Dataframe for Figure 3 : dfToolallmetrics for inherited annotations

### Inherited Topics, metrics and compute of IC and entropy

In [15]:
nbTopicsTransitive = dfToolTopicTransitive['topic'].nunique()
print("Nb topics (incl. ancestors): {}".format(nbTopicsTransitive))
dfTopicNbTools = dfToolTopicTransitive[['tool', 'topic']].groupby(by='topic').size().reset_index(name='nbTools').sort_values(by="nbTools", ascending=False)
dfTopic = dfToolTopicTransitive[['topic', 'topicLabel']].drop_duplicates(subset=['topic', 'topicLabel'], keep='first').reset_index(drop=True)
dfTopic = dfTopic.join(dfTopicNbTools.set_index('topic'), on='topic')
dfTopic['frequence'] = dfTopic['nbTools'] / nbToolsWithTopic
dfTopic['IC'] = -np.log2(dfTopic['frequence'])
dfTopic['entropy'] = dfTopic['frequence']*dfTopic['IC']
dfTopic.head()

Nb topics (incl. ancestors): 258


Unnamed: 0,topic,topicLabel,nbTools,frequence,IC,entropy
0,http://edamontology.org/topic_0780,Plant biology,882,0.029781,5.069454,0.150974
1,http://edamontology.org/topic_3070,Biology,18273,0.616998,0.696663,0.42984
2,http://edamontology.org/topic_0003,Topic,29616,1.0,-0.0,-0.0
3,http://edamontology.org/topic_2885,DNA polymorphism,1202,0.040586,4.622868,0.187625
4,http://edamontology.org/topic_0199,Genetic variation,2907,0.098156,3.348774,0.328704


### Inherited Operations, metrics and compute of IC and entropy

In [16]:
nbOperationsTransitive = dfToolOperationTransitive['operation'].nunique()
print("Nb operations (incl. ancestors): {}".format(nbOperationsTransitive))
dfOperationNbTools = dfToolOperationTransitive[['tool', 'operation']].groupby(by='operation').size().reset_index(name='nbTools').sort_values(by="nbTools", ascending=False)
dfOperation = dfToolOperationTransitive[['operation', 'operationLabel']].drop_duplicates(subset=['operation', 'operationLabel'], keep='first').reset_index(drop=True)
dfOperation = dfOperation.join(dfOperationNbTools.set_index('operation'), on='operation')
dfOperation['frequence'] = dfOperation['nbTools'] / nbToolsWithOperation
dfOperation['IC'] = -np.log2(dfOperation['frequence'])
dfOperation['entropy'] = dfOperation['frequence']*dfOperation['IC']
dfOperation.head()

Nb operations (incl. ancestors): 527


Unnamed: 0,operation,operationLabel,nbTools,frequence,IC,entropy
0,http://edamontology.org/operation_2428,Validation,1307,0.046185,4.43642,0.204898
1,http://edamontology.org/operation_0004,Operation,28299,1.0,-0.0,-0.0
2,http://edamontology.org/operation_0484,SNP detection,330,0.011661,6.422141,0.07489
3,http://edamontology.org/operation_3227,Variant calling,1267,0.044772,4.481263,0.200635
4,http://edamontology.org/operation_2478,Nucleic acid sequence analysis,7821,0.27637,1.855326,0.512757


### Compute IC and entropy for tools with inherited annotations, creation of dfToolallmetrics

In [17]:
df = dfToolTopicTransitive.join(dfTopic[['topic', 'IC', 'entropy']].set_index('topic'), on='topic')[['tool', 'IC']].groupby(by='tool').sum().rename(columns={'IC': 'topicScore'}).reset_index('tool')
dfTool = dfTool.join(df.set_index('tool'), on='tool')
dfTool['topicScore'] = dfTool['topicScore'].fillna(0)

df = dfToolOperationTransitive.join(dfOperation[['operation', 'IC', 'entropy']].set_index('operation'), on='operation')[['tool', 'IC']].groupby(by='tool').sum().rename(columns={'IC': 'operationScore'}).reset_index('tool')
dfTool = dfTool.join(df.set_index('tool'), on='tool')
dfTool['operationScore'] = dfTool['operationScore'].fillna(0)

dfTool['score'] = dfTool['topicScore'] + dfTool['operationScore']


df = dfToolTopicTransitive.join(dfTopic[['topic', 'IC', 'entropy']].set_index('topic'), on='topic')[['tool', 'entropy']].groupby(by='tool').sum().rename(columns={'entropy': 'topicEntropy'}).reset_index('tool')
dfTool = dfTool.join(df.set_index('tool'), on='tool')
dfTool['topicEntropy'] = dfTool['topicEntropy'].fillna(0)

df = dfToolOperationTransitive.join(dfOperation[['operation', 'IC', 'entropy']].set_index('operation'), on='operation')[['tool', 'entropy']].groupby(by='tool').sum().rename(columns={'entropy': 'operationEntropy'}).reset_index('tool')
dfTool = dfTool.join(df.set_index('tool'), on='tool')
dfTool['operationEntropy'] = dfTool['operationEntropy'].fillna(0)

dfTool['entropy'] = dfTool['topicEntropy'] + dfTool['operationEntropy']
dfToolallmetrics = dfTool
dfToolallmetrics.head()

#Can generate dfToolallmetrics.tsv.bz2 : 
#dfToolallmetrics.to_csv("../Dataframes/dfToolallmetrics.tsv.bz2", sep="\t", index=False)


Unnamed: 0,tool,toolLabel,nbTopics,nbOperations,topicScore,operationScore,score,topicEntropy,operationEntropy,entropy
0,https://bio.tools/pmirkb,PmiRKB,14,18,35.012864,70.687577,105.700441,4.743019,4.103735,8.846754
1,https://bio.tools/PCA-PAM50,PCA-PAM50,7,11,19.41886,32.575948,51.994807,1.906146,3.104192,5.010339
2,https://bio.tools/CEthreader,CEthreader,12,23,37.561111,92.221264,129.782375,3.41299,5.632084,9.045074
3,https://bio.tools/ipath,iPATH,6,0,14.724993,0.0,14.724993,1.719522,0.0,1.719522
4,https://bio.tools/nidm,NIDM,12,12,41.797858,42.837473,84.635331,3.539804,2.743541,6.283344


### Explanation of calculated scores 
- topicScore: Score based on the IC (Information Content) of topics associated with a tool.
- operationScore: Score based on the IC of operations associated with a tool.
- score: Sum of the two previous scores.
- topicEntropy: Total entropy of topics associated with a tool.
- operationEntropy: Total entropy of operations associated with a tool.
- entropy: Sum of the previous entropies.

## Redundancy of tools 

### Topic redundancy

In [18]:
redundancyQuery = """
SELECT (COUNT(DISTINCT ?tool) AS ?nbToolsWithRedundantTopics)
WHERE {
  ?tool sc:applicationSubCategory ?redundantDirectTopic .
  ?redundantDirectTopic rdf:type owl:Class .
  FILTER NOT EXISTS { ?redundantDirectTopic rdfs:subClassOf? owl:DeprecatedClass }
  
  ?tool sc:applicationSubCategory ?directTopic .
  ?directTopic rdf:type owl:Class .
  FILTER NOT EXISTS { ?directTopic rdfs:subClassOf? owl:DeprecatedClass }
  
  ?directTopic rdfs:subClassOf+ ?redundantDirectTopic .
  
  OPTIONAL { ?directTopic rdfs:label ?tLabel }
  BIND(COALESCE(?tLabel, "") AS ?directTopicLabel)
  
  OPTIONAL { ?redundantDirectTopic rdfs:label ?rtLabel }
  BIND(COALESCE(?rtLabel, "") AS ?redundantDirectTopicLabel)
}
#LIMIT 10
"""

sparql = SPARQLWrapper(endpointURL)
sparql.setQuery(prefixes+redundancyQuery)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
results["results"]["bindings"][0]["nbToolsWithRedundantTopics"]["value"]

'3405'

### Operation redundancy

In [19]:
redundancyQuery = """
SELECT (COUNT(DISTINCT ?tool) AS ?nbToolsWithRedundantOperations)
WHERE {
  ?tool sc:featureList ?redundantDirectOperation .
  ?redundantDirectOperation rdf:type owl:Class .
  FILTER NOT EXISTS { ?redundantDirectOperation rdfs:subClassOf? owl:DeprecatedClass }
  
  ?tool sc:featureList ?directOperation .
  ?directOperation rdf:type owl:Class .
  FILTER NOT EXISTS { ?directOperation rdfs:subClassOf? owl:DeprecatedClass }
  
  ?directOperation rdfs:subClassOf+ ?redundantDirectOperation .
  
  OPTIONAL { ?directOperation rdfs:label ?dLabel }
  BIND(COALESCE(?dLabel, "") AS ?directOperationLabel)
  
  OPTIONAL { ?redundantDirectOperation rdfs:label ?rdLabel }
  BIND(COALESCE(?rdLabel, "") AS ?redundantDirectOperationLabel)
}
"""

sparql = SPARQLWrapper(endpointURL)
sparql.setQuery(prefixes+redundancyQuery)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
results["results"]["bindings"][0]["nbToolsWithRedundantOperations"]["value"]

'2055'

3405 tools have a redundant topic annotation on 30 025

2055 tools have a redundant operation annotation on 30 325 

## Deprecated annotations

In [25]:
query = """
# deprecated items

SELECT (COUNT(DISTINCT ?deprecatedItem) AS ?nbDeprecatedItem) #?topic
#SELECT DISTINCT ?deprecatedItem
WHERE {
  { ?deprecatedItem rdfs:subClassOf owl:DeprecatedClass }
  UNION
  { ?deprecatedItem owl:deprecated true . }
  UNION
  { ?deprecatedItem owl:deprecated "true" . }
  UNION 
  { ?deprecatedItem owl:deprecated "True" . }
  
}
ORDER BY ?deprecatedItem
"""

#df = sparqldataframe.query(endpointURL, prefixes+query)
#df

sparql = SPARQLWrapper(endpointURL)
sparql.setQuery(prefixes+query)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
displaySparqlResults(results)

nbDeprecatedItem
1114
