## Overview of the Expansion/Reduction process

In [None]:
# set logging level (suppress INFO and DEBUG)
import logging
logger = logging.getLogger()
logger.setLevel(logging.ERROR)

In [None]:
from ipyradiant import FileManager, PathLoader
import rdflib

lw = FileManager(loader=PathLoader(path="data"))
# here we hard set what we want the file to be, but ideally a user can choose a file to work with.
lw.loader.file_picker.value = lw.loader.file_picker.options["starwars.ttl"]
rdf_graph = lw.graph
qres = lw.graph.query(
    """
    PREFIX planet: <https://swapi.co/resource/planet/>
    PREFIX human: <https://swapi.co/resource/human/>
    PREFIX film: <https://swapi.co/resource/film/>
    
    CONSTRUCT {
        ?s ?p ?o .
    }
    WHERE {
        ?s ?p ?o .
        
        VALUES (?s) {
            (human:1)
#            (human:4)
#            (human:5)
#            (film:1)
#            (film:4)
#            (planet:1)
#            (planet:2)
        }
    }
    """
)

simple_graph = rdflib.graph.Graph().parse(data=qres.serialize(format="xml"))

In [None]:
import ipycytoscape as cyto
import ipywidgets as W
import traitlets as T
import rdflib

from typing import List
from IPython.display import display, JSON
from pandas import DataFrame
from ipyradiant.query.api import SPARQLQueryFramer, build_values
from ipyradiant.visualization.improved_cytoscape import CytoscapeViewer
from ipyradiant.visualization.explore.interactive_exploration import add_cyto_class, remove_cyto_class
from ipyradiant.visualization.explore import GraphExploreSelectMultiple
from ipyradiant.rdf2nx import RDF2NX

#### Tracking Graphs
The visualization widgets will have variables for the rdf and networkx graphs. These should be more adept at managing information, but for demonstration, we will keep our own instances and update them. 

In [None]:
rdf_graph = rdflib.graph.Graph()
# For now, the networkx graph is created internal to the visualization using the RDF2NX converter.
# This allows some custom logic from displaying certain annotation properties (e.g. rdfs:label) to be standardized.
# nx_graph = None

#### Empty Visualization
We will populate **new** instances of the visualization as we update the graphs. 

> Note: the current version of the ipycytoscape library prevents us from updating within the same instance. As the stability of the library improves we can fold in this capability as a set of widget observer methods.

In [None]:
cv0 = CytoscapeViewer()
cv0

#### JSON Visualization
Often, we want to inspect networkx (LPG) nodes for their data attributes. Custom visualization classes have this capability built into the widgets, but for now we will define an external class for this capability.

In [None]:
class JSONInspector(W.VBox):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.json_output = W.Output()
        self.interactive_widget = kwargs["interactive_widget"]
        self.children = [self.interactive_widget, self.json_output]
        self.interactive_widget.cytoscape_widget.on("node", "click", self.load_json)

    def load_json(self, node):
        data = node["data"]
        data.pop("_label", None)
        data.pop("_attrs", None)
        with self.json_output:
            self.json_output.clear_output()
            display(JSON(data))

### Expand/Reduce
The goal is to allow a graph (both RDF and its networkx representation) to be expanded/reduced based on the addition of one or more nodes. 

#### Expansion
To expand the graph, we want to query the RDF graph for all desired nodes and return all triples with the desired nodes as the `subject`. For the LPG representation, we also want to query and return all data associated with any nodes in the `object` position (only `Literal` data).

> Note, we use the same query and process to populate the initial set of nodes to display

In [None]:
class ConstructExpansionMeta(type):
    """Metaclass to expand/reduce."""

    _sparql = """
        CONSTRUCT {{
            ?iri ?predicate ?object .
        
            ?object a ?type;
                ?secondary_predicate ?secondary_object .
        }}
        WHERE {{
            ?iri ?predicate ?object .
            
            OPTIONAL {{
                ?object a ?type ;
                    ?secondary_predicate ?secondary_object .
                
                FILTER (isLiteral(?secondary_object))
            }}

            VALUES ({}) {{
                {}
            }}
        }}
    """
    values = None

    @property
    def sparql(cls):
        return build_values(cls._sparql, cls.values)


class ConstructExpansion(SPARQLQueryFramer, metaclass=ConstructExpansionMeta):
    values = None

In [None]:
# set values (manually here, but ideally via widget/interaction)
ConstructExpansion.values = {
    "iri": [
        rdflib.term.URIRef("https://swapi.co/resource/human/1")    ]
}
qres = ConstructExpansion.run_query(lw.graph)
qres.head()

The basic manipulation of the tracking graphs.

In [None]:
# update the rdf graph
# note, we do this by adding a newly created graph
addition_graph = rdflib.graph.Graph()
for triple in qres.values:
    addition_graph.add(triple)
    
rdf_graph = rdf_graph + addition_graph

In [None]:
# new visualization instance
# note: you can click the nodes to inspect their data
cv1 = CytoscapeViewer()
cv1.graph = rdf_graph
JSONInspector(interactive_widget=cv1)

In [None]:
# Expand the graph (i.e. user double-clicked "imperial speeder bike")
subject_to_expand = rdflib.term.URIRef("https://swapi.co/resource/vehicle/14")

# set values (manually here, but ideally via widget/interaction)
ConstructExpansion.values = {"iri": [subject_to_expand]}
qres = ConstructExpansion.run_query(lw.graph)

# update the rdf graph
# note, we do this by adding a newly created graph
addition_graph = rdflib.graph.Graph()
for triple in qres.values:
    addition_graph.add(triple)
    
rdf_graph = rdf_graph + addition_graph

In [None]:
# new visualization instance
# note: you can click the nodes to inspect their data
cv2 = CytoscapeViewer(cyto_layout="cola")
cv2.graph = rdf_graph
JSONInspector(interactive_widget=cv2)

> Note 1: Now we have a bi-directional edge between `Snowspeeder` and `Luke Skywalker`. This edge existed before, but we only visulize outogoing edges from expanded nodes in order to simplify the visualization.

> Note 2: In order to determine proper object nodes from an expanded subject, their `rdf:type` is included in the expansion. This means the `objects` of any expanded node triples will also have an outgoing edge to their `rdf:type` if it existed in the graph prior to expansion. You can see this in the graph above via the connection from `Wedge Antilles` to `Human`. 

In [None]:
# lets expand once more on `Wedge Antilles`
subject_to_expand = rdflib.term.URIRef("https://swapi.co/resource/human/18")

# set values (manually here, but ideally via widget/interaction)
ConstructExpansion.values = {"iri": [subject_to_expand]}
qres = ConstructExpansion.run_query(lw.graph)

# update the rdf graph
# note, we do this by adding a newly created graph
addition_graph = rdflib.graph.Graph()
for triple in qres.values:
    addition_graph.add(triple)
    
rdf_graph = rdf_graph + addition_graph

In [None]:
# new visualization instance
# note: you can click the nodes to inspect their data
cv3 = CytoscapeViewer(cyto_layout="cola")
cv3.graph = rdf_graph
JSONInspector(interactive_widget=cv3)

#### Reduction
Reducing the graph is a bit more complex and opinionated. While we want the simplicity of removing nodes from the `ipycytoscape.CytoscapeWidget.Graph` because it will clean up all adjacent nodes/edges, we can't leverage this capability currently. We need the state of our RDF graph in order to perform an expansion after a reduction (remember, we add to the `rdf_graph`, which means we need it to be current). 

Therefore, we must make the reduction on the `rdf_graph` and let our transformation process recreate a cytoscape graph.

As a narrative story point, imagine we see the expansion of `Wedge Antilles` and decide that the expansion did not provide any valuable information; rather, it unneccessarily cluttered our visualization. We can reduce the graph by removing the node and any outgoing edges. 

In [None]:
class ConstructReductionMeta(type):
    """Metaclass to expand/reduce."""

    _sparql = """
        CONSTRUCT {{
            ?iri ?predicate ?object .
        }}
        WHERE {{
            ?iri ?predicate ?object .

            VALUES ({}) {{
                {}
            }}
        }}
    """
    values = None

    @property
    def sparql(cls):
        return build_values(cls._sparql, cls.values)


class ConstructReduction(SPARQLQueryFramer, metaclass=ConstructReductionMeta):
    values = None

In [None]:
# lets reduce the graph on `Wedge Antilles`
subject_to_reduce = rdflib.term.URIRef("https://swapi.co/resource/human/18")

# set values (manually here, but ideally via widget/interaction)
ConstructReduction.values = {"iri": [subject_to_reduce]}
qres = ConstructReduction.run_query(lw.graph)

# update the rdf graph
# note, we do this by subracting a newly created graph
subtraction_graph = rdflib.graph.Graph()
for triple in qres.values:
    subtraction_graph.add(triple)
    
rdf_graph = rdf_graph - subtraction_graph

In [None]:
# new visualization instance
# note: you can click the nodes to inspect their data
cv4 = CytoscapeViewer(cyto_layout="cola")
cv4.graph = rdf_graph
JSONInspector(interactive_widget=cv4)

> Note 1: The reduction only removed the subject node and its edges; it does not remove other subjects that were created as part of the process (e.g. `Corellia`). We would have to run another reduction on this node to clean-up. 

TODO could we implement a `clear_disconnected_nodes` somehow? (would probably have to be on the networkx/ipycytoscape side)
TODO this brings back up the need for a cached `undo` button that stores the previous ("n"?) states and can undo the expansions more completely.

> Note 2: The RDF graph also has some loose edges floating around. We removed the `Wedge Antilles` node, but there was an edge going from `Snowspeeder`->`Wedge Antilles`. This edge is still in the `rdf_graph` (as well as several other floating edges), but it pruned during the conversion to the `nx_graph` internal to the visualization.