Skip to content

Commit

Permalink
feat(glossary): JSON parser for glossary
Browse files Browse the repository at this point in the history
Allow the import/export of a glossary in json format.
  • Loading branch information
djansen-redhat committed Nov 17, 2017
1 parent 4be8963 commit 4f261f0
Show file tree
Hide file tree
Showing 24 changed files with 652 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,9 @@
**/
@XmlRootElement(name = "glossaryEntry")
@XmlType(name = "glossaryEntryType", propOrder = { "id", "pos",
"description", "sourceReference", "glossaryTerms", "termsCount", "qualifiedName" })
@JsonPropertyOrder({ "id", "pos", "description", "srcLang", "sourceReference", "glossaryTerms", "termsCount", "qualifiedName" })
"description", "externalId", "sourceReference", "glossaryTerms", "termsCount", "qualifiedName" })
@JsonPropertyOrder({ "id", "pos", "description", "externalId", "srcLang", "sourceReference",
"glossaryTerms", "termsCount", "qualifiedName" })
@JsonSerialize(include = JsonSerialize.Inclusion.NON_NULL)
@Label("Glossary Entry")
public class GlossaryEntry implements Serializable, HasMediaType {
Expand All @@ -63,6 +64,8 @@ public class GlossaryEntry implements Serializable, HasMediaType {

private String description;

private String externalId;

private List<GlossaryTerm> glossaryTerms;

private LocaleId srcLang;
Expand Down Expand Up @@ -119,6 +122,20 @@ public void setDescription(String description) {
this.description = description;
}

/**
* An identifier for maintenance in external tools
*/
@XmlElement(name = "externalId", namespace = Namespaces.ZANATA_OLD)
@JsonProperty("externalId")
@DocumentationExample(value = "myterm-verb")
public String getExternalId() {
return externalId;
}

public void setExternalId(String externalId) {
this.externalId = externalId;
}

/**
* Number of translated terms. A term is the glossary entry's representation
* for a specific locale
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ Response getDetails(
/**
* Download all glossary entries as a file
*
* @param fileType 'po' or 'csv' (case insensitive) are currently supported
* @param fileType 'po', 'json' or 'csv' (case insensitive) are currently supported
* @param locales optional comma separated list of languages required.
* @param qualifiedName
* Qualified name of glossary, default to {@link #GLOBAL_QUALIFIED_NAME}
Expand Down Expand Up @@ -271,7 +271,7 @@ public Response post(List<GlossaryEntry> glossaryEntries,
@DefaultValue(GLOBAL_QUALIFIED_NAME) @QueryParam("qualifiedName") String qualifiedName);

/**
* Upload glossary file (currently supported formats: po, csv)
* Upload glossary file (currently supported formats: po, csv, json)
*
*
* @param form Multi-part form with the following named parts: <br>
Expand Down
6 changes: 5 additions & 1 deletion common/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,11 @@
<groupId>junit</groupId>
<artifactId>junit</artifactId>
</dependency>

<dependency>
<groupId>org.json</groupId>
<artifactId>json</artifactId>
<version>20160810</version>
</dependency>
</dependencies>

<modules>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
/*
* Copyright 2017, Red Hat, Inc. and individual contributors
* as indicated by the @author tags. See the copyright.txt file in the
* distribution for a full listing of individual contributors.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
package org.zanata.adapter.glossary;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.json.JSONException;
import org.zanata.common.LocaleId;
import org.zanata.rest.dto.GlossaryEntry;
import org.json.JSONObject;
import org.json.JSONArray;
import org.zanata.rest.dto.GlossaryTerm;
import org.zanata.rest.dto.QualifiedName;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import static org.apache.commons.lang3.ObjectUtils.firstNonNull;
import static org.apache.commons.lang3.StringUtils.isBlank;

/**
* @author Damian Jansen <a href="mailto:djansen@redhat.com">djansen@redhat.com</a>
**/
public class GlossaryJsonReader {
private final LocaleId srcLang;

private final static String TERM = "term";
private final static String TRANSLATIONS = "translations";
private final static String[] POSSYNONYMS = {"pos", "partofspeech", "part of speech"};
private final static String[] DESCSYNONYMS = {"desc", "description", "definition"};
private final static String[] EXTERNALID = {"id", "externalid", "external id"};

public GlossaryJsonReader(LocaleId srcLang) {
this.srcLang = srcLang;
}

/**
* Extract a glossary from a representative json file. The format of the glossary should be:
* {"terms": [
* {
* "term": "hello",
* "id": "hello-verb"
* "desc": "testing of hello json",
* "pos": "verb",
* "translations": { "es": "Hola", "zh": "您好" }
* "synonyms": "Hi",
* ...
* },
* term2...
* ]}
* @param reader input source for the json content
* @param qualifiedName name for the glossary, e.g. global, projectname
* @return a map of glossary entries
* @throws IOException if the file is not available
*/
public Map<LocaleId, List<GlossaryEntry>> extractGlossary(Reader reader,
String qualifiedName) throws IOException {
BufferedReader bufferedReader = new BufferedReader(reader);
String content = bufferedReader.lines().collect(Collectors.joining());
reader.close();
Map<LocaleId, List<GlossaryEntry>> results = Maps.newHashMap();

try {
JSONObject jsonObj = new JSONObject(content);
JSONArray termsArray = jsonObj.getJSONArray("terms");
List<GlossaryEntry> empty = Lists.newArrayList();
// Iterate through the terms
for (int current = 0; current < termsArray.length(); ++current) {
Object obj = termsArray.get(current);
if (!(obj instanceof JSONObject)) {
continue;
}
JSONObject entry = ((JSONObject) obj);
if (!entry.has(TERM)) {
continue;
}
String srcTerm = entry.getString(TERM);
GlossaryEntry glossaryEntry = new GlossaryEntry();
String description = getValueOf(DESCSYNONYMS, entry);
if (!isBlank(description)) {
glossaryEntry.setDescription(getValueOf(DESCSYNONYMS, entry));
}
String pos = getValueOf(POSSYNONYMS, entry);
if (!isBlank(pos)) {
glossaryEntry.setPos(pos);
}
glossaryEntry.setQualifiedName(new QualifiedName(qualifiedName));
glossaryEntry.setSrcLang(srcLang);
glossaryEntry.setExternalId(getValueOf(EXTERNALID, entry));
GlossaryTerm glossaryTerm = new GlossaryTerm();
glossaryTerm.setLocale(srcLang);
glossaryTerm.setContent(srcTerm);
glossaryEntry.getGlossaryTerms().add(glossaryTerm);
// Iterate through the translations
if (entry.has(TRANSLATIONS) && entry.get(TRANSLATIONS) instanceof JSONObject) {
JSONObject translations = (JSONObject) entry.get(TRANSLATIONS);
Iterator<?> transKeys = translations.keys();

while (transKeys.hasNext()) {
String locale = (String)transKeys.next();
if (translations.getString(locale) != null) {
LocaleId transLocaleId = new LocaleId(locale);
String transContent = translations.getString(locale);

GlossaryTerm transTerm = new GlossaryTerm();
transTerm.setLocale(transLocaleId);
transTerm.setContent(transContent);
glossaryEntry.getGlossaryTerms().add(transTerm);
}
}
}
List<GlossaryEntry> srcEntries = firstNonNull(
results.get(srcLang), empty);
srcEntries.add(glossaryEntry);
results.put(srcLang, srcEntries);
}
} catch (ClassCastException|JSONException exception) {
throw new RuntimeException("Invalid JSON glossary file: ".concat(exception.getMessage()));
}
return results;
}

/*
* Attempt to return a value from the json data based on a key synonym
*/
private String getValueOf(String[] synonyms, JSONObject data) {
for (String option : synonyms) {
if (data.has(option)) {
return data.getString(option);
} else if (data.has(option.toUpperCase())) {
return data.getString(option.toUpperCase());
}
}
return "";
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/*
* Copyright 2017, Red Hat, Inc. and individual contributors
* as indicated by the @author tags. See the copyright.txt file in the
* distribution for a full listing of individual contributors.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
package org.zanata.adapter.glossary;

import com.google.common.base.Charsets;
import org.json.JSONArray;
import org.json.JSONObject;
import org.zanata.common.LocaleId;
import org.zanata.rest.dto.GlossaryEntry;
import org.zanata.rest.dto.GlossaryTerm;

import javax.annotation.Nonnull;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.List;

public class GlossaryJsonWriter extends AbstractGlossaryPullWriter {

public GlossaryJsonWriter() {
}

/**
* @see {@link #write(Writer, List, LocaleId, List)}
*/
public void write(@Nonnull OutputStream stream,
@Nonnull final List<GlossaryEntry> entries,
@Nonnull final LocaleId srcLocale,
@Nonnull final List<LocaleId> transLocales) throws IOException {
OutputStreamWriter osWriter =
new OutputStreamWriter(stream, Charsets.UTF_8);
write(osWriter, entries, srcLocale, transLocales);
}

/**
* This outputs a json file of given <code>transLocales</code>.
*/
public void write(@Nonnull final Writer fileWriter,
@Nonnull final List<GlossaryEntry> entries,
@Nonnull final LocaleId srcLocale,
@Nonnull final List<LocaleId> transLocales) throws IOException {

JSONObject root = new JSONObject();
try {

JSONArray entriesOut = new JSONArray();

for (GlossaryEntry entry : entries) {
GlossaryTerm srcTerm =
getGlossaryTerm(entry.getGlossaryTerms(), srcLocale);

JSONObject newEntry = new JSONObject();
newEntry.put("id", entry.getExternalId());
newEntry.put("term", srcTerm.getContent());
newEntry.put("description", entry.getDescription());
newEntry.put("pos", entry.getPos());

JSONObject translations = new JSONObject();
for (LocaleId transLocale : transLocales) {
GlossaryTerm transTerm =
getGlossaryTerm(entry.getGlossaryTerms(), transLocale);
if (transTerm != null) {
translations.put(transTerm.getLocale().toJavaName(), transTerm.getContent());
}
}
newEntry.put("translations", translations);
entriesOut.put(newEntry);
}
root.put("terms", entriesOut);
} finally {
fileWriter.write(root.toString(2));
fileWriter.close();
}
}

}
Loading

0 comments on commit 4f261f0

Please sign in to comment.