Skip to content

Commit

Permalink
feat(ZNTA-2275): JSON parser for glossary
Browse files Browse the repository at this point in the history
Allow the import and export of a glossary in json format.
  • Loading branch information
djansen-redhat committed Feb 21, 2018
1 parent 1a74f3a commit 64c62a1
Show file tree
Hide file tree
Showing 24 changed files with 720 additions and 39 deletions.
Expand Up @@ -47,8 +47,9 @@
**/
@XmlRootElement(name = "glossaryEntry")
@XmlType(name = "glossaryEntryType", propOrder = { "id", "pos",
"description", "sourceReference", "glossaryTerms", "termsCount", "qualifiedName" })
@JsonPropertyOrder({ "id", "pos", "description", "srcLang", "sourceReference", "glossaryTerms", "termsCount", "qualifiedName" })
"description", "externalId", "sourceReference", "glossaryTerms", "termsCount", "qualifiedName" })
@JsonPropertyOrder({ "id", "pos", "description", "externalId", "srcLang", "sourceReference",
"glossaryTerms", "termsCount", "qualifiedName" })
@JsonSerialize(include = JsonSerialize.Inclusion.NON_NULL)
@Label("Glossary Entry")
public class GlossaryEntry implements Serializable, HasMediaType {
Expand All @@ -63,6 +64,8 @@ public class GlossaryEntry implements Serializable, HasMediaType {

private String description;

private String externalId;

private List<GlossaryTerm> glossaryTerms;

private LocaleId srcLang;
Expand Down Expand Up @@ -119,6 +122,20 @@ public void setDescription(String description) {
this.description = description;
}

/**
* An identifier for maintenance in external tools
*/
@XmlElement(name = "externalId", namespace = Namespaces.ZANATA_OLD)
@JsonProperty("externalId")
@DocumentationExample(value = "myterm-verb")
public String getExternalId() {
return externalId;
}

public void setExternalId(String externalId) {
this.externalId = externalId;
}

/**
* Number of translated terms. A term is the glossary entry's representation
* for a specific locale
Expand Down
Expand Up @@ -226,7 +226,7 @@ Response getDetails(
/**
* Download all glossary entries as a file
*
* @param fileType 'po' or 'csv' (case insensitive) are currently supported
* @param fileType 'po', 'json' or 'csv' (case insensitive) are currently supported
* @param locales optional comma separated list of languages required.
* @param qualifiedName
* Qualified name of glossary, default to {@link #GLOBAL_QUALIFIED_NAME}
Expand Down Expand Up @@ -271,7 +271,7 @@ public Response post(List<GlossaryEntry> glossaryEntries,
@DefaultValue(GLOBAL_QUALIFIED_NAME) @QueryParam("qualifiedName") String qualifiedName);

/**
* Upload glossary file (currently supported formats: po, csv)
* Upload glossary file (currently supported formats: po, csv, json)
*
*
* @param form Multi-part form with the following named parts: <br>
Expand Down
6 changes: 5 additions & 1 deletion common/pom.xml
Expand Up @@ -92,7 +92,11 @@
<groupId>junit</groupId>
<artifactId>junit</artifactId>
</dependency>

<dependency>
<groupId>org.json</groupId>
<artifactId>json</artifactId>
<version>20160810</version>
</dependency>
</dependencies>

<modules>
Expand Down
@@ -0,0 +1,164 @@
/*
* Copyright 2018, Red Hat, Inc. and individual contributors
* as indicated by the @author tags. See the copyright.txt file in the
* distribution for a full listing of individual contributors.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
package org.zanata.adapter.glossary;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.commons.lang3.StringUtils;
import org.json.JSONException;
import org.zanata.common.LocaleId;
import org.zanata.rest.dto.GlossaryEntry;
import org.json.JSONObject;
import org.json.JSONArray;
import org.zanata.rest.dto.GlossaryTerm;
import org.zanata.rest.dto.QualifiedName;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import static org.apache.commons.lang3.ObjectUtils.firstNonNull;
import static org.apache.commons.lang3.StringUtils.isBlank;

/**
* @author Damian Jansen <a href="mailto:djansen@redhat.com">djansen@redhat.com</a>
*/
public class GlossaryJsonReader {
private final LocaleId srcLang;

private final static String TERM = "term";
private final static String TRANSLATIONS = "translations";
private final static String[] POSSYNONYMS =
{"pos", "partofspeech", "part of speech"};
private final static String[] DESCSYNONYMS =
{"desc", "description", "definition"};
private final static String[] EXTERNALID = {"id", "externalid", "external id"};

public GlossaryJsonReader(LocaleId srcLang) {
this.srcLang = srcLang;
}

/**
* Extract a glossary from a representative json file.
* The format of the glossary should be:
* {"terms": [
* {
* "term": "hello",
* "id": "hello-verb"
* "desc": "testing of hello json",
* "pos": "verb",
* "translations": { "es": "Hola", "zh": "您好" }
* "synonyms": "Hi",
* ...
* },
* term2...
* ]}
* @param reader input source for the json content
* @param qualifiedName name for the glossary, e.g. global, projectname
* @return a map of glossary entries
* @throws IOException if the file is not available
*/
public Map<LocaleId, List<GlossaryEntry>> extractGlossary(Reader reader,
String qualifiedName) throws IOException {
BufferedReader bufferedReader = new BufferedReader(reader);
String content = bufferedReader.lines().collect(Collectors.joining());
reader.close();
Map<LocaleId, List<GlossaryEntry>> results = Maps.newHashMap();

try {
JSONObject jsonObj = new JSONObject(content);
JSONArray termsArray = jsonObj.getJSONArray("terms");
List<GlossaryEntry> empty = Lists.newArrayList();
// Iterate through the terms
for (int current = 0; current < termsArray.length(); ++current) {
Object obj = termsArray.get(current);
if (!(obj instanceof JSONObject)) {
continue;
}
JSONObject entry = ((JSONObject) obj);
if (!entry.has(TERM)) {
continue;
}
String srcTerm = entry.getString(TERM);
GlossaryEntry glossaryEntry = new GlossaryEntry();
String description = getValueOf(DESCSYNONYMS, entry);
if (!isBlank(description)) {
glossaryEntry.setDescription(getValueOf(DESCSYNONYMS, entry));
}
String pos = getValueOf(POSSYNONYMS, entry);
if (!isBlank(pos)) {
glossaryEntry.setPos(pos);
}
glossaryEntry.setQualifiedName(new QualifiedName(qualifiedName));
glossaryEntry.setSrcLang(srcLang);
glossaryEntry.setExternalId(getValueOf(EXTERNALID, entry));
GlossaryTerm glossaryTerm = new GlossaryTerm();
glossaryTerm.setLocale(srcLang);
glossaryTerm.setContent(srcTerm);
glossaryEntry.getGlossaryTerms().add(glossaryTerm);
// Iterate through the translations
if (entry.has(TRANSLATIONS) &&
entry.get(TRANSLATIONS) instanceof JSONObject) {
JSONObject translations = (JSONObject) entry.get(TRANSLATIONS);
Iterator<?> transKeys = translations.keys();

while (transKeys.hasNext()) {
String locale = (String) transKeys.next();
if (translations.getString(locale) != null) {
LocaleId transLocaleId = new LocaleId(locale);
String transContent = translations.getString(locale);

GlossaryTerm transTerm = new GlossaryTerm();
transTerm.setLocale(transLocaleId);
transTerm.setContent(transContent);
glossaryEntry.getGlossaryTerms().add(transTerm);
}
}
}
List<GlossaryEntry> srcEntries = firstNonNull(
results.get(srcLang), empty);
srcEntries.add(glossaryEntry);
results.put(srcLang, srcEntries);
}
} catch (ClassCastException | JSONException exception) {
throw new RuntimeException("Invalid JSON glossary file: "
.concat(exception.getMessage()));
}
return results;
}

/*
* Attempt to return a value from the json data based on a key synonym
*/
private String getValueOf(String[] synonyms, JSONObject data) {
for (String option : synonyms) {
if (data.has(option)) {
return data.getString(option);
} else if (data.has(option.toUpperCase())) {
return data.getString(option.toUpperCase());
}
}
return StringUtils.EMPTY;
}
}
@@ -0,0 +1,95 @@
/*
* Copyright 2017, Red Hat, Inc. and individual contributors
* as indicated by the @author tags. See the copyright.txt file in the
* distribution for a full listing of individual contributors.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
package org.zanata.adapter.glossary;

import com.google.common.base.Charsets;
import org.json.JSONArray;
import org.json.JSONObject;
import org.zanata.common.LocaleId;
import org.zanata.rest.dto.GlossaryEntry;
import org.zanata.rest.dto.GlossaryTerm;

import javax.annotation.Nonnull;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.List;

public class GlossaryJsonWriter extends AbstractGlossaryPullWriter {

public GlossaryJsonWriter() {
}

/**
* @see {@link #write(Writer, List, LocaleId, List)}
*/
public void write(@Nonnull OutputStream stream,
@Nonnull final List<GlossaryEntry> entries,
@Nonnull final LocaleId srcLocale,
@Nonnull final List<LocaleId> transLocales) throws IOException {
OutputStreamWriter osWriter =
new OutputStreamWriter(stream, Charsets.UTF_8);
write(osWriter, entries, srcLocale, transLocales);
}

/**
* This outputs a json file of given <code>transLocales</code>.
*/
public void write(@Nonnull final Writer fileWriter,
@Nonnull final List<GlossaryEntry> entries,
@Nonnull final LocaleId srcLocale,
@Nonnull final List<LocaleId> transLocales) throws IOException {

JSONObject root = new JSONObject();
try {

JSONArray entriesOut = new JSONArray();

for (GlossaryEntry entry : entries) {
GlossaryTerm srcTerm =
getGlossaryTerm(entry.getGlossaryTerms(), srcLocale);

JSONObject newEntry = new JSONObject();
newEntry.put("id", entry.getExternalId());
newEntry.put("term", srcTerm.getContent());
newEntry.put("description", entry.getDescription());
newEntry.put("pos", entry.getPos());

JSONObject translations = new JSONObject();
for (LocaleId transLocale : transLocales) {
GlossaryTerm transTerm =
getGlossaryTerm(entry.getGlossaryTerms(), transLocale);
if (transTerm != null) {
translations.put(transTerm.getLocale().toJavaName(), transTerm.getContent());
}
}
newEntry.put("translations", translations);
entriesOut.put(newEntry);
}
root.put("terms", entriesOut);
} finally {
fileWriter.write(root.toString(2));
fileWriter.close();
}
}

}

0 comments on commit 64c62a1

Please sign in to comment.