Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Model operations performance improvements #347

Merged
merged 10 commits into from Nov 25, 2022
Expand Up @@ -24,6 +24,10 @@ public class BlankNodeFilteringGraph implements Graph {

private Graph graph;

public Graph getInnerGraph() {
return graph;
}

public BlankNodeFilteringGraph(Graph graph) {
this.graph = graph;
}
Expand Down
Expand Up @@ -103,7 +103,7 @@ public Model openModel(String name, boolean strict) {
@Override
public void removeModel(String name) {
Model m = getModel(name);
m.removeAll(null, null, null);
m.removeAll();
}

@Override
Expand Down
@@ -0,0 +1,17 @@
package edu.cornell.mannlib.vitro.webapp.rdfservice.adapters;

import org.apache.jena.graph.Triple;
import org.apache.jena.mem.GraphMem;

public class BulkGraphMem extends GraphMem {

public void addWithoutNotify(Triple t) {
checkOpen();
performAdd(t);
}

public final void deleteWithoutNotify(Triple t) {
checkOpen();
performDelete(t);
}
}
@@ -0,0 +1,34 @@
package edu.cornell.mannlib.vitro.webapp.rdfservice.adapters;

import org.apache.jena.graph.Graph;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.impl.ModelCom;

public class BulkModelCom extends ModelCom {

public BulkModelCom(Graph graph) {
super(graph);
}

@Override
public Model remove(Model m) {
Graph unwrappedGraph = GraphUtils.unwrapUnionGraphs(graph);
if (unwrappedGraph instanceof BulkGraphMem) {
GraphUtils.deleteFrom((BulkGraphMem) unwrappedGraph, m.getGraph());
} else {
super.remove(m);
}
return this;
}

@Override
public Model add(Model m) {
Graph unwrappedGraph = GraphUtils.unwrapUnionGraphs(graph);
if (unwrappedGraph instanceof BulkGraphMem) {
GraphUtils.addInto((BulkGraphMem) unwrappedGraph, m.getGraph());
} else {
super.add(m);
}
return this;
}
}
@@ -0,0 +1,39 @@
package edu.cornell.mannlib.vitro.webapp.rdfservice.adapters;

import org.apache.jena.graph.Graph;
import org.apache.jena.ontology.OntModelSpec;
import org.apache.jena.ontology.impl.OntModelImpl;
import org.apache.jena.rdf.model.Model;

public class BulkOntModelImpl extends OntModelImpl {

public BulkOntModelImpl(OntModelSpec spec) {
super(spec);
}

public BulkOntModelImpl(OntModelSpec owlMem, Model bareModel) {
super(owlMem, bareModel);
}

@Override
public Model remove(Model m) {
Graph unwrappedGraph = GraphUtils.unwrapUnionGraphs(graph);
if (unwrappedGraph instanceof BulkGraphMem) {
GraphUtils.deleteFrom((BulkGraphMem) unwrappedGraph, m.getGraph());
} else {
super.remove(m);
}
return this;
}

@Override
public Model add(Model m) {
Graph unwrappedGraph = GraphUtils.unwrapUnionGraphs(graph);
if (unwrappedGraph instanceof BulkGraphMem) {
GraphUtils.addInto((BulkGraphMem) unwrappedGraph, m.getGraph());
} else {
super.add(m);
}
return this;
}
}
Expand Up @@ -4,6 +4,8 @@

import edu.cornell.mannlib.vitro.webapp.dao.jena.RDFServiceGraph;
import edu.cornell.mannlib.vitro.webapp.dao.jena.SparqlGraph;
import edu.cornell.mannlib.vitro.webapp.rdfservice.adapters.VitroModelFactory.BulkUpdatingUnion;

import org.apache.jena.graph.Graph;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.ModelFactory;
Expand All @@ -22,30 +24,20 @@

public class BulkUpdatingModel extends AbstractModelDecorator {
private static final RDFReaderF readerFactory = new RDFReaderFImpl();
private AbstractBulkUpdater updater;
protected AbstractBulkUpdater updater;

protected BulkUpdatingModel(Model m) {
public BulkUpdatingModel(Model m) {
super(m);
if (m instanceof BulkUpdatingModel) {
Graph graph = GraphUtils.unwrapUnionGraphs(m.getGraph());
if(graph instanceof BulkUpdatingUnion){
updater = new RDFServiceBulkUnionUpdater((BulkUpdatingUnion) graph);
return;
}
if (m instanceof BulkUpdatingOntModel) {
this.updater = ((BulkUpdatingOntModel) m).updater;
} else if (m instanceof BulkUpdatingModel) {
this.updater = ((BulkUpdatingModel) m).updater;
} else {
Graph graph = GraphUtils.unwrapUnionGraphs(m.getGraph());
if (graph instanceof RDFServiceGraph) {
updater = new RDFServiceBulkUpdater((RDFServiceGraph) graph);
} else if (graph instanceof SparqlGraph) {
updater = new SparqlBulkUpdater((SparqlGraph) graph);
} else {
updater = null;
}
}
}

protected BulkUpdatingModel(Model m, Model baseModel) {
super(m);
if (baseModel instanceof BulkUpdatingModel) {
this.updater = ((BulkUpdatingModel) baseModel).updater;
} else {
Graph graph = GraphUtils.unwrapUnionGraphs(baseModel.getGraph());
if (graph instanceof RDFServiceGraph) {
updater = new RDFServiceBulkUpdater((RDFServiceGraph) graph);
} else if (graph instanceof SparqlGraph) {
Expand Down
Expand Up @@ -4,6 +4,8 @@

import edu.cornell.mannlib.vitro.webapp.dao.jena.RDFServiceGraph;
import edu.cornell.mannlib.vitro.webapp.dao.jena.SparqlGraph;
import edu.cornell.mannlib.vitro.webapp.rdfservice.adapters.VitroModelFactory.BulkUpdatingUnion;

import org.apache.jena.graph.Graph;
import org.apache.jena.ontology.OntModel;
import org.apache.jena.rdf.model.Model;
Expand All @@ -23,30 +25,20 @@

public class BulkUpdatingOntModel extends AbstractOntModelDecorator {
private static final RDFReaderF readerFactory = new RDFReaderFImpl();
private AbstractBulkUpdater updater;
protected AbstractBulkUpdater updater;

protected BulkUpdatingOntModel(OntModel m) {
super(m);
Graph graph = GraphUtils.unwrapUnionGraphs(m.getGraph());
if(graph instanceof BulkUpdatingUnion){
updater = new RDFServiceBulkUnionUpdater((BulkUpdatingUnion) graph);
return;
}
if (m instanceof BulkUpdatingOntModel) {
this.updater = ((BulkUpdatingOntModel) m).updater;
} else if (m instanceof BulkUpdatingModel) {
this.updater = ((BulkUpdatingModel) m).updater;
} else {
Graph graph = GraphUtils.unwrapUnionGraphs(m.getGraph());
if (graph instanceof RDFServiceGraph) {
updater = new RDFServiceBulkUpdater((RDFServiceGraph) graph);
} else if (graph instanceof SparqlGraph) {
updater = new SparqlBulkUpdater((SparqlGraph) graph);
} else {
updater = null;
}
}
}

protected BulkUpdatingOntModel(OntModel m, OntModel baseModel) {
super(m);
if (baseModel instanceof BulkUpdatingOntModel) {
this.updater = ((BulkUpdatingOntModel) baseModel).updater;
} else {
Graph graph = GraphUtils.unwrapUnionGraphs(baseModel.getGraph());
if (graph instanceof RDFServiceGraph) {
updater = new RDFServiceBulkUpdater((RDFServiceGraph) graph);
} else if (graph instanceof SparqlGraph) {
Expand Down
Expand Up @@ -2,20 +2,128 @@

package edu.cornell.mannlib.vitro.webapp.rdfservice.adapters;

import org.apache.jena.atlas.iterator.Iter;
import org.apache.jena.graph.Graph;
import org.apache.jena.graph.Triple;
import org.apache.jena.graph.compose.MultiUnion;
import org.apache.jena.util.IteratorCollection;
import org.apache.jena.util.iterator.ExtendedIterator;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

final public class GraphUtils {
public static Graph unwrapUnionGraphs(Graph graph) {
if (graph != null && graph instanceof MultiUnion) {
List<Graph> subGraphs = ((MultiUnion)graph).getSubGraphs();
if (subGraphs == null || subGraphs.isEmpty()) {
return ((MultiUnion)graph).getBaseGraph();
}
}

return graph;
}

private static final int CMP_GREATER = 1;
private static final int CMP_EQUAL = 0;
private static final int CMP_LESS = -1;
private static int MIN_SRC_SIZE = 1000;
// If source and destination are large, limit the search for the best way round
// to "deleteFrom"
private static int DST_SRC_RATIO = 2;

public static Graph unwrapUnionGraphs(Graph graph) {
if (graph != null && graph instanceof MultiUnion) {
List<Graph> subGraphs = ((MultiUnion) graph).getSubGraphs();
if (subGraphs == null || subGraphs.isEmpty()) {
return ((MultiUnion) graph).getBaseGraph();
}
}
return graph;
}

public static void deleteFrom(BulkGraphMem bulkGraphMem, Graph srcGraph) {
boolean events = bulkGraphMem.getEventManager().listening();
if (bulkGraphMem == srcGraph && !events) {
bulkGraphMem.clear();
return;
}
boolean loopOnSrc = decideHowtoExecuteBySizeStep(bulkGraphMem, srcGraph);
if (loopOnSrc) {
deleteLoopSrc(bulkGraphMem, srcGraph);
return;
}
deleteLoopDst(bulkGraphMem, srcGraph);
}

public static void addInto(BulkGraphMem bulkGraphMem, Graph srcGraph) {
if (bulkGraphMem == srcGraph && !bulkGraphMem.getEventManager().listening()) {
return;
}
bulkGraphMem.getPrefixMapping().setNsPrefixes(srcGraph.getPrefixMapping());
addIteratorWorker(bulkGraphMem, findAll(srcGraph));
bulkGraphMem.getEventManager().notifyAddGraph(bulkGraphMem, srcGraph);
}

private static ExtendedIterator<Triple> findAll(Graph g) {
return g.find();
}

private static void addIteratorWorker(BulkGraphMem bulkGraphMem, Iterator<Triple> it) {
List<Triple> s = IteratorCollection.iteratorToList(it);
addIteratorWorkerDirect(bulkGraphMem, s.iterator());
}

private static void addIteratorWorkerDirect(BulkGraphMem bulkGraphMem, Iterator<Triple> it) {
it.forEachRemaining(bulkGraphMem::addWithoutNotify);
}

private static void deleteLoopSrc(BulkGraphMem bulkGraphMem, Graph srcGraph) {
deleteIteratorWorker(bulkGraphMem, findAll(srcGraph));
bulkGraphMem.getEventManager().notifyDeleteGraph(bulkGraphMem, srcGraph);
}

private static void deleteLoopDst(BulkGraphMem bulkGraphMem, Graph srcGraph) {
// Size the list to avoid reallocation on growth.
int dstSize = bulkGraphMem.size();
List<Triple> toBeDeleted = new ArrayList<>(dstSize);

Iterator<Triple> iter = findAll(bulkGraphMem);
for (; iter.hasNext();) {
Triple t = iter.next();
if (srcGraph.contains(t)) {
toBeDeleted.add(t);
}
}
deleteIteratorWorkerDirect(bulkGraphMem, toBeDeleted.iterator());
bulkGraphMem.getEventManager().notifyDeleteGraph(bulkGraphMem, srcGraph);
}

private static void deleteIteratorWorker(BulkGraphMem bulkGraphMem, Iterator<Triple> it) {
List<Triple> s = IteratorCollection.iteratorToList(it);
deleteIteratorWorkerDirect(bulkGraphMem, s.iterator());
}

private static void deleteIteratorWorkerDirect(BulkGraphMem bulkGraphMem, Iterator<Triple> it) {
it.forEachRemaining(bulkGraphMem::deleteWithoutNotify);
}

private static boolean decideHowtoExecuteBySizeStep(BulkGraphMem bulkGraphMem, Graph srcGraph) {
int srcSize = srcGraph.size();
if (srcSize <= MIN_SRC_SIZE)
return true;
boolean loopOnSrc = (srcSize <= MIN_SRC_SIZE
|| compareSizeTo(bulkGraphMem, DST_SRC_RATIO * srcSize) == CMP_GREATER);
return loopOnSrc;
}

private static int compareSizeTo(Graph graph, int size) {
ExtendedIterator<Triple> it = graph.find();
try {
int stepsTake = Iter.step(it, size);
if (stepsTake < size) {
// Iterator ran out.
return CMP_LESS;
}
if (!it.hasNext()) {
// Finished at the same time.
return CMP_EQUAL;
}
// Still more to go
return CMP_GREATER;
} finally {
it.close();
}
}
}