Skip to content

Commit

Permalink
Refactored code to allow usage of the JobManager API with custom pers…
Browse files Browse the repository at this point in the history
…istence logic, and added persistence classes for RAM (in-memory data structures) and MongoDB.
  • Loading branch information
EdDuarte committed Jul 1, 2016
1 parent 601ef27 commit 06d751e
Show file tree
Hide file tree
Showing 34 changed files with 1,433 additions and 1,502 deletions.
29 changes: 19 additions & 10 deletions vokter-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,15 @@

<dependencies>

<!-- dropwizard (included here so we have -->
<!-- access to the MediaType.java class -->
<dependency>
<groupId>io.dropwizard</groupId>
<artifactId>dropwizard-core</artifactId>
<version>${dropwizard.version}</version>
</dependency>


<!-- mutable string -->
<dependency>
<groupId>it.unimi.dsi</groupId>
Expand All @@ -200,16 +209,16 @@


<!-- MediaType class -->
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>${jackson.version}</version>
</dependency>
<dependency>
<groupId>org.glassfish.jersey.containers</groupId>
<artifactId>jersey-container-servlet</artifactId>
<version>${jersey.version}</version>
</dependency>
<!--<dependency>-->
<!--<groupId>com.fasterxml.jackson.core</groupId>-->
<!--<artifactId>jackson-databind</artifactId>-->
<!--<version>${jackson.version}</version>-->
<!--</dependency>-->
<!--<dependency>-->
<!--<groupId>org.glassfish.jersey.containers</groupId>-->
<!--<artifactId>jersey-container-servlet</artifactId>-->
<!--<version>${jersey.version}</version>-->
<!--</dependency>-->


<!-- sentence splitting parsing -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

package com.edduarte.vokter.diff;

import com.edduarte.vokter.persistence.Diff;
import com.edduarte.vokter.persistence.Document;
import com.edduarte.vokter.similarity.JaccardStringSimilarity;
import com.edduarte.vokter.similarity.LSHSimilarity;
Expand All @@ -35,28 +34,24 @@
* @version 1.3.2
* @since 1.0.0
*/
public class DiffDetector implements Callable<List<Diff>> {
public class DiffDetector implements Callable<List<DiffDetector.Result>> {

private static final Logger logger = LoggerFactory.getLogger(DiffDetector.class);

private final Document oldSnapshot;

private final Document newSnapshot;

private final Class<? extends Diff> diffClass;


public DiffDetector(final Document oldSnapshot,
final Document newSnapshot,
final Class<? extends Diff> diffClass) {
final Document newSnapshot) {
this.oldSnapshot = oldSnapshot;
this.newSnapshot = newSnapshot;
this.diffClass = diffClass;
}


@Override
public List<Diff> call() {
public List<Result> call() {
Stopwatch sw = Stopwatch.createStarted();

int[] oldBands = oldSnapshot.getBands();
Expand Down Expand Up @@ -87,20 +82,9 @@ public List<Diff> call() {
LinkedList<DiffMatchPatch.Diff> diffs = dmp.diff_main(original, revision);
dmp.diff_cleanupSemantic(diffs);

List<Diff> retrievedDiffs = diffs.parallelStream()
List<Result> retrievedDiffs = diffs.parallelStream()
.filter(diff -> !diff.getOperation().equals(DiffEvent.nothing))
.map(diff -> {
try {
return (Diff) diffClass.getConstructor(
DiffEvent.class,
String.class,
int.class
).newInstance(diff.action, diff.text, diff.startIndex);
} catch (ReflectiveOperationException e) {
logger.error(e.getMessage(), e);
return null;
}
})
.map(Result::new)
.filter(diff -> diff != null)
.collect(Collectors.toList());

Expand All @@ -109,4 +93,36 @@ public List<Diff> call() {
newSnapshot.getUrl(), sw.toString());
return retrievedDiffs;
}


public static class Result {

private final DiffEvent event;

private final String text;

private final int startIndex;


private Result(DiffMatchPatch.Diff diff) {
this.event = diff.action;
this.text = diff.text;
this.startIndex = diff.startIndex;
}


public DiffEvent getEvent() {
return event;
}


public String getText() {
return text;
}


public int getStartIndex() {
return startIndex;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ public final class DocumentBuilder {
/**
* Flag that sets usage of stopword filtering.
*/
private boolean isStoppingEnabled = false;
private boolean filterStopwords = false;

// /**
// * Flag that sets usage of a porter stemmer.
Expand Down Expand Up @@ -140,8 +140,8 @@ public static DocumentBuilder fromString(final String url,
// }


public DocumentBuilder withStopwords() {
this.isStoppingEnabled = true;
public DocumentBuilder filterStopwords() {
this.filterStopwords = true;
return this;
}

Expand Down Expand Up @@ -226,7 +226,7 @@ public Document build(LanguageDetector langDetector,

// flag that sets that stopwords will be filtered during
// k-shingling
isStoppingEnabled,
filterStopwords,

// flag that forces the document to be in lower case, so that
// during difference matching, every match will be case
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ public class DiffMatcherJob implements InterruptableJob {

public static final String CLIENT_CONTENT_TYPE = "client_content_type";

public static final String CLIENT_TOKEN = "client_token";

public final static String KEYWORDS = "keywords";

public final static String EVENTS = "events";
Expand Down Expand Up @@ -87,6 +89,7 @@ public void execute(JobExecutionContext context)
String documentContentType = dataMap.getString(DOCUMENT_CONTENT_TYPE);
String clientUrl = dataMap.getString(CLIENT_URL);
String clientContentType = dataMap.getString(CLIENT_CONTENT_TYPE);
String clientToken = dataMap.getString(CLIENT_TOKEN);

try {
ObjectMapper mapper = new ObjectMapper();
Expand Down Expand Up @@ -124,7 +127,7 @@ public void execute(JobExecutionContext context)
if (!results.isEmpty()) {
boolean wasSuccessful = manager.sendNotificationToClient(
documentUrl, documentContentType,
clientUrl, clientContentType,
clientUrl, clientContentType, clientToken,
results
);
// TODO: Add fault tolerance so that, if failed 10 times,
Expand Down
Loading

0 comments on commit 06d751e

Please sign in to comment.