Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/main/java/co/zeroae/gate/AnnotationSetExporter.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamWriter;

import java.io.IOException;
import java.io.OutputStream;

Expand Down Expand Up @@ -47,6 +46,7 @@ static class GATEFastInfoset extends AnnotationSetExporter {
public GATEFastInfoset() {
super("AnnotationSet FastInfoset", "finf", "application/fastinfoset; includeText=no");
}

@Override
public void export(Document doc, OutputStream out, FeatureMap options) throws IOException {
try {
Expand All @@ -60,6 +60,7 @@ public void export(Document doc, OutputStream out, FeatureMap options) throws IO

static class GateXML extends AnnotationSetExporter {
private static final XMLOutputFactory outputFactory = XMLOutputFactory.newInstance();

/**
* Creates a new exporter instance for a given file type with default
* extension.
Expand Down
76 changes: 36 additions & 40 deletions src/main/java/co/zeroae/gate/App.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,20 @@
import com.amazonaws.services.lambda.runtime.RequestHandler;
import com.amazonaws.services.lambda.runtime.events.APIGatewayProxyRequestEvent;
import com.amazonaws.services.lambda.runtime.events.APIGatewayProxyResponseEvent;

import com.amazonaws.util.Base64;
import com.amazonaws.xray.AWSXRay;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import gate.*;
import gate.corpora.DocumentImpl;
import gate.util.GateException;
import gate.util.persistence.PersistenceManager;

import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;

import java.io.*;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLStreamHandler;
Expand All @@ -44,20 +43,43 @@ public class App implements RequestHandler<APIGatewayProxyRequestEvent, APIGatew

private static final String GATE_APP_NAME = System.getenv("GATE_APP_NAME");
private static final String CACHE_DIR = System.getenv().getOrDefault(
"CACHE_DIR_PREFIX", "/tmp/lru/" + GATE_APP_NAME );
"CACHE_DIR_PREFIX", "/tmp/lru/" + GATE_APP_NAME);
private static final double CACHE_DIR_USAGE = .9;
private static final String DIGEST_SALT = UUID.randomUUID().toString();

private static final Logger logger = LogManager.getLogger(App.class);
private static final CorpusController application = AWSXRay.createSegment(
"Gate Load", App::loadApplication);
private static final AppMetadata metadata = loadMetadata();

private static final DocumentLRUCache cache = AWSXRay.createSegment("Cache Init",
() -> new DocumentLRUCache(App.CACHE_DIR, App.CACHE_DIR_USAGE));

private static final URLStreamHandler b64Handler = new Handler();

private static AppMetadata loadMetadata() {
final AppMetadata rv = new AppMetadata();
// TODO: Load metadata/metadata.xml if it exists, and set as default values
rv.name = null;
rv.costPerRequest = Integer.parseInt(System.getenv().getOrDefault("GATE_APP_COST_PER_REQUEST", "0"));
rv.dailyQuota = Integer.parseUnsignedInt(System.getenv().getOrDefault("GATE_APP_DAILY_QUOTA", "0"));
rv.defaultAnnotations = System.getenv("GATE_APP_DEFAULT_ANNOTATIONS");
rv.additionalAnnotations = System.getenv("GATE_APP_ADDITIONAL_ANNOTATIONS");
return rv;
}

private static CorpusController loadApplication() {
try {
final String gappResourcePah = GATE_APP_NAME + "/application.xgapp";
final URL gappUrl = App.class.getClassLoader().getResource(gappResourcePah);
final File gappFile = new File(Objects.requireNonNull(gappUrl).getFile());
final CorpusController rv =
(CorpusController) PersistenceManager.loadObjectFromFile(gappFile);
final Corpus corpus = Factory.newCorpus("Lambda Corpus");
rv.setCorpus(corpus);
return rv;
} catch (Exception e) {
throw new RuntimeException(e);
}
}

public APIGatewayProxyResponseEvent handleRequest(APIGatewayProxyRequestEvent input, final Context context) {
final String path = input.getPath();
if (path.matches("^/([^/]*)/?$"))
Expand All @@ -84,7 +106,7 @@ public APIGatewayProxyResponseEvent handleMetadata(APIGatewayProxyRequestEvent i

public APIGatewayProxyResponseEvent handleExecute(APIGatewayProxyRequestEvent input, final Context context) {
final APIGatewayProxyResponseEvent response = new APIGatewayProxyResponseEvent()
.withHeaders(new HashMap<>());
.withHeaders(new HashMap<>());
final Map<String, String> headers = input.getHeaders();
final Map<String, String> queryStringParams = Optional.ofNullable(
input.getQueryStringParameters()).orElse(new HashMap<>());
Expand All @@ -101,7 +123,7 @@ public APIGatewayProxyResponseEvent handleExecute(APIGatewayProxyRequestEvent in
"nextAnnotationId", "0"));
final String contentType = Utils.ensureValidRequestContentType(headers.getOrDefault(
"Content-Type", "text/plain"));
final String contentDigest = AWSXRay.createSubsegment("Message Digest",() -> {
final String contentDigest = AWSXRay.createSubsegment("Message Digest", () -> {
String rv = Utils.computeMessageDigest(contentType + input.getBody() + nextAnnotationId + DIGEST_SALT);
AWSXRay.getCurrentSubsegment().putMetadata("SHA256", rv);
return rv;
Expand Down Expand Up @@ -176,7 +198,7 @@ private Document execute(FeatureMap docFeatureMap) throws GateException {

// Note: The DocumentImpl API does not conform to JavaBeans for the nextAnnotationId method.
// Paragraphs may be annotated right away, so we need to handle that issue.
final int nextAnnotationId = (Integer)docFeatureMap.get("nextAnnotationId");
final int nextAnnotationId = (Integer) docFeatureMap.get("nextAnnotationId");
docFeatureMap.remove("nextAnnotationId");
rvImpl = (DocumentImpl) Factory.createResource("gate.corpora.DocumentImpl", docFeatureMap);
rvImpl.setNextAnnotationId(Math.max(nextAnnotationId, rvImpl.getNextAnnotationId()));
Expand All @@ -194,10 +216,10 @@ private Document execute(FeatureMap docFeatureMap) throws GateException {
}

/**
* @param exporter The document exporter
* @param doc an instance of gate.Document
* @param exporter The document exporter
* @param doc an instance of gate.Document
* @param annotationSelector the List of AnnotationTypes to return
* @param response The response where we put the exported Document as body
* @param response The response where we put the exported Document as body
* @return the modified response
*/
private APIGatewayProxyResponseEvent export(
Expand Down Expand Up @@ -240,30 +262,4 @@ private APIGatewayProxyResponseEvent export(
}
return response;
}

private static AppMetadata loadMetadata() {
final AppMetadata rv = new AppMetadata();
// TODO: Load metadata/metadata.xml if it exists, and set as default values
rv.name = null;
rv.costPerRequest = Integer.parseInt(System.getenv().getOrDefault("GATE_APP_COST_PER_REQUEST", "0"));
rv.dailyQuota = Integer.parseUnsignedInt(System.getenv().getOrDefault("GATE_APP_DAILY_QUOTA", "0"));
rv.defaultAnnotations = System.getenv("GATE_APP_DEFAULT_ANNOTATIONS");
rv.additionalAnnotations = System.getenv("GATE_APP_ADDITIONAL_ANNOTATIONS");
return rv;
}

private static CorpusController loadApplication() {
try {
final String gappResourcePah = GATE_APP_NAME + "/application.xgapp";
final URL gappUrl = App.class.getClassLoader().getResource(gappResourcePah);
final File gappFile = new File(Objects.requireNonNull(gappUrl).getFile());
final CorpusController rv =
(CorpusController) PersistenceManager.loadObjectFromFile(gappFile);
final Corpus corpus = Factory.newCorpus("Lambda Corpus");
rv.setCorpus(corpus);
return rv;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
1 change: 1 addition & 0 deletions src/main/java/co/zeroae/gate/AppMetadata.java
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
package co.zeroae.gate;

/**
* This class structure must match https://github.com/GateNLP/cloud-client/blob/master/library/src/main/java/uk/ac/gate/cloud/online/ServiceMetadata.java
*/
Expand Down
28 changes: 14 additions & 14 deletions src/main/java/co/zeroae/gate/DocumentLRUCache.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,20 @@ class DocumentLRUCache {
cache = initializeCache(cacheDir, maxUsage);
}

private static DiskLruCache initializeCache(String cacheDir, double maxUsage) {
File cacheDirPath = new File(cacheDir);
if (!cacheDirPath.exists() && !cacheDirPath.mkdirs()) {
throw new RuntimeException("Unable to create cache directory '" + cacheDirPath.getName() + "'.");
}
for (File file : Objects.requireNonNull(cacheDirPath.listFiles())) file.delete();
try {
long usableSpace = (long) (cacheDirPath.getUsableSpace() * maxUsage);
return DiskLruCache.open(cacheDirPath, VERSION, VALUE_COUNT, usableSpace);
} catch (IOException e) {
throw new RuntimeException(e);
}
}

public Document computeIfNull(String key, Utils.GATESupplier<Document> supplier) throws GateException {
Document rv = get(key);
if (rv == null) {
Expand Down Expand Up @@ -74,18 +88,4 @@ public void put(String key, Document doc) {
AWSXRay.endSubsegment();
}
}

private static DiskLruCache initializeCache(String cacheDir, double maxUsage) {
File cacheDirPath = new File(cacheDir);
if (!cacheDirPath.exists() && !cacheDirPath.mkdirs()) {
throw new RuntimeException("Unable to create cache directory '" + cacheDirPath.getName() + "'.");
}
for (File file: Objects.requireNonNull(cacheDirPath.listFiles())) file.delete();
try {
long usableSpace = (long) (cacheDirPath.getUsableSpace()*maxUsage);
return DiskLruCache.open(cacheDirPath, VERSION, VALUE_COUNT, usableSpace);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
59 changes: 31 additions & 28 deletions src/main/java/co/zeroae/gate/Utils.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
import gate.util.GateException;
import org.codehaus.httpcache4j.util.Hex;

import javax.xml.stream.*;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import java.io.Reader;
import java.net.MalformedURLException;
import java.security.MessageDigest;
Expand Down Expand Up @@ -42,7 +44,7 @@ static String ensureValidRequestContentType(String contentType) throws GateExcep
.map((type) -> type.equals("text/json") ? "application/json" : type)
.sorted()
.toArray())
);
);
}
return rv;
}
Expand All @@ -58,46 +60,23 @@ static String ensureValidResponseType(String acceptHeader) throws GateException
Arrays.toString(exporters.keySet().stream().sorted().toArray()));
}

@FunctionalInterface
interface GATESupplier<T> {
T get() throws GateException;
}

/**
*
* @param gateXMLReader a Reader with GateXML content.
* @return The parsed Document.
* @throws ResourceInstantiationException if the Factory fails to create an empty Document.
* @throws XMLStreamException if the reader has invalid XML content.
* @throws XMLStreamException if the reader has invalid XML content.
*/
static Document xmlToDocument(Reader gateXMLReader) throws ResourceInstantiationException, XMLStreamException {
final Document doc = Factory.newDocument("");
XMLStreamReader reader;
reader = XMLInputFactory.newFactory().createXMLStreamReader(gateXMLReader);
do {
reader.next();
} while(reader.getEventType() != XMLStreamReader.START_ELEMENT);
} while (reader.getEventType() != XMLStreamReader.START_ELEMENT);
gate.corpora.DocumentStaxUtils.readGateXmlDocument(reader, doc);
return doc;
}

/**
* The Plugin.Component class has a bug when the baseUrl fails to resolve uniquely.
* This fixes the bug by assigning the Class' hashCode instead of the Plugin level one.
*/
private static class UniqueHashComponent extends Plugin.Component {
final private int hashCode;
public UniqueHashComponent(Class<? extends Resource> resourceClass) throws MalformedURLException {
super(resourceClass);
hashCode = resourceClass.hashCode();
}

@Override
public int hashCode() {
return hashCode;
}
}

static void loadDocumentFormats() {
try {
final Set<Class<? extends gate.Resource>> classes = new HashSet<>();
Expand All @@ -108,7 +87,7 @@ static void loadDocumentFormats() {
classes.add(JSONTweetFormat.class);
classes.add(MediaWikiDocumentFormat.class);
classes.add(PubmedTextDocumentFormat.class);
for (Class<? extends gate.Resource> clazz: classes) {
for (Class<? extends gate.Resource> clazz : classes) {
Gate.getCreoleRegister().registerPlugin(new UniqueHashComponent(clazz));
}
} catch (GateException | MalformedURLException e) {
Expand All @@ -118,6 +97,7 @@ static void loadDocumentFormats() {

/**
* Loads all exporters that we support
*
* @return an UnmodifiableMap of the supported exporters.
*/
static Map<String, DocumentExporter> loadExporters() {
Expand Down Expand Up @@ -150,4 +130,27 @@ static String computeMessageDigest(String message) {
throw new RuntimeException(e);
}
}

@FunctionalInterface
interface GATESupplier<T> {
T get() throws GateException;
}

/**
* The Plugin.Component class has a bug when the baseUrl fails to resolve uniquely.
* This fixes the bug by assigning the Class' hashCode instead of the Plugin level one.
*/
private static class UniqueHashComponent extends Plugin.Component {
final private int hashCode;

public UniqueHashComponent(Class<? extends Resource> resourceClass) throws MalformedURLException {
super(resourceClass);
hashCode = resourceClass.hashCode();
}

@Override
public int hashCode() {
return hashCode;
}
}
}
11 changes: 7 additions & 4 deletions src/main/java/co/zeroae/gate/b64/Handler.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@

public class Handler extends URLStreamHandler {
static final public Map<String, String> paths = Collections.synchronizedMap(new WeakHashMap<>());

@Override
protected URLConnection openConnection(URL u) {
return new Connection(u);
}

private class Connection extends URLConnection {
/**
* Constructs a URL connection to the specified URL. A connection to
Expand All @@ -25,6 +31,7 @@ private class Connection extends URLConnection {
protected Connection(URL url) {
super(url);
}

@Override
public void connect() {
}
Expand All @@ -41,8 +48,4 @@ public InputStream getInputStream() throws IOException {
));
}
}
@Override
protected URLConnection openConnection(URL u) {
return new Connection(u);
}
}