Skip to content

Commit

Permalink
fix(ZNTA-2247): Remove magic numbers for glossary csv columns (#592)
Browse files Browse the repository at this point in the history
* fix(ZNTA-2247): Remove magic numbers for glossary csv columns
Still expects column 0 to be the source locale, but checks that the
PoS and Description columns are named. This prevents garbage in
said fields.
This change also prevents repeatedly setting these fields, only
doing so once per row.
* fix(ZNTA-2247): Allow several synonyms for data fields
Fix invalid test, contradicting the specified behaviour.
* fix(ZNTA-2247): Add docs and improve code as per review
  • Loading branch information
djansen-redhat committed Jan 11, 2018
1 parent 33802a3 commit 89a9521
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 45 deletions.
Expand Up @@ -22,6 +22,7 @@

import java.io.IOException;
import java.io.Reader;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
Expand All @@ -45,9 +46,15 @@
*
**/
public class GlossaryCSVReader extends AbstractGlossaryPushReader {

private final LocaleId srcLang;
private final static String POS = "POS";
private final static String[] POSSYNONYMS =
{"POS", "PARTOFSPEECH", "PART OF SPEECH"};
private final static String DESC = "DESCRIPTION";
private final static String[] DESCSYNONYMS =
{"DESCRIPTION", "DESC", "DEFINITION"};
private final static int TERM = 0;

/**
* This class will close the reader
Expand All @@ -68,7 +75,7 @@ public Map<LocaleId, List<GlossaryEntry>> extractGlossary(Reader reader,
Map<Integer, LocaleId> localeColMap =
setupLocalesMap(records, descriptionMap);

LocaleId srcLocale = localeColMap.get(0);
LocaleId srcLocale = localeColMap.get(TERM);

if (!srcLang.equals(srcLocale)) {
throw new RuntimeException("input source language '" + srcLang
Expand All @@ -77,20 +84,23 @@ public Map<LocaleId, List<GlossaryEntry>> extractGlossary(Reader reader,
}
Map<LocaleId, List<GlossaryEntry>> results = Maps.newHashMap();

for (int i = 1; i < records.size(); i++) {
CSVRecord row = records.get(i);
for (int x = 1; x < row.size()
&& localeColMap.containsKey(x); x++) {

GlossaryEntry entry = new GlossaryEntry();
entry.setSrcLang(srcLocale);
for (CSVRecord row : records.subList(1, records.size())) {
GlossaryEntry entry = new GlossaryEntry();
entry.setSrcLang(srcLocale);
if (descriptionMap.containsKey(POS)) {
entry.setPos(row.get(descriptionMap.get(POS)));
}
if (descriptionMap.containsKey(DESC)) {
entry.setDescription(row.get(descriptionMap.get(DESC)));
entry.setQualifiedName(new QualifiedName(qualifiedName));
}
entry.setQualifiedName(new QualifiedName(qualifiedName));

for (int x = 1; x < row.size()
&& localeColMap.containsKey(x); x++) {

GlossaryTerm srcTerm = new GlossaryTerm();
srcTerm.setLocale(srcLocale);
srcTerm.setContent(row.get(0));
srcTerm.setContent(row.get(TERM));

entry.getGlossaryTerms().add(srcTerm);

Expand All @@ -100,8 +110,8 @@ public Map<LocaleId, List<GlossaryEntry>> extractGlossary(Reader reader,
GlossaryTerm transTerm = new GlossaryTerm();
transTerm.setLocale(transLocaleId);
transTerm.setContent(transContent);

entry.getGlossaryTerms().add(transTerm);

List<GlossaryEntry> list = results.get(transLocaleId);
if (list == null) {
list = Lists.newArrayList();
Expand All @@ -117,8 +127,9 @@ public Map<LocaleId, List<GlossaryEntry>> extractGlossary(Reader reader,
}

/**
* Basic validation of CVS file format - At least 2 rows in the CVS file -
* Empty content validation - All row must have the same column count
* Basic validation of CSV file format - At least 2 rows in the CSV file -
* Empty content validation - All rows must have the same column count.
* @param records list of records representing a csv file
*/
private void validateCSVEntries(@Nonnull List<CSVRecord> records) {
if (records.isEmpty()) {
Expand All @@ -138,33 +149,46 @@ private void validateCSVEntries(@Nonnull List<CSVRecord> records) {
}

/**
* Parser reads from all from first row and exclude column from description
* map. Format of CVS: {source locale},{locale},{locale}...,pos,description
* Parser reads from all from first row and up to first recognised
* non-locale column mapping.
* Format of CSV: {source locale},{locale},{locale}...,pos,description
* @param records list of records representing a csv file
* @param descriptionMap header locations of non-locale columns
*/
private Map<Integer, LocaleId> setupLocalesMap(List<CSVRecord> records,
Map<String, Integer> descriptionMap) {
Map<Integer, LocaleId> localeColMap = new HashMap<Integer, LocaleId>();
Map<Integer, LocaleId> localeColMap = new HashMap<>();
CSVRecord headerRow = records.get(0);
for (int row = 0; row <= headerRow.size()
&& !descriptionMap.containsValue(row); row++) {
for (int column = 0; column < headerRow.size()
&& !descriptionMap.containsValue(column); column++) {
LocaleId locale =
new LocaleId(StringUtils.trim(headerRow.get(row)));
localeColMap.put(row, locale);
new LocaleId(StringUtils.trim(headerRow.get(column)));
localeColMap.put(column, locale);
}
return localeColMap;
}

/**
* Read last 2 columns in CSV:
* {source locale},{locale},{locale}...,pos,description
* Locate trailing data columns in CSV:
* {source locale},{locale},{locale}...,pos,description,...
*
* @param records
* @param records list of records representing a csv file
*/
private Map<String, Integer> setupDescMap(List<CSVRecord> records) {
Map<String, Integer> descMap = new HashMap<String, Integer>();
Map<String, Integer> descMap = new HashMap<>();
CSVRecord headerRow = records.get(0);
descMap.put(POS, headerRow.size() - 2);
descMap.put(DESC, headerRow.size() - 1);
for (int position = 1; position < headerRow.size(); position++) {
String headerItem = headerRow.get(position);
if (headerItem.trim().length() == 0) {
continue;
}
if (Arrays.asList(POSSYNONYMS).contains(headerItem.toUpperCase())) {
descMap.put(POS, position);
} else if (Arrays.asList(DESCSYNONYMS)
.contains(headerItem.toUpperCase())) {
descMap.put(DESC, position);
}
}
return descMap;
}
}
Expand Up @@ -57,20 +57,18 @@ public void testNotMatchingSource() throws IOException {
}

@Test
public void extractGlossaryTest1() throws IOException {

public void extractGlossary() throws IOException {
GlossaryCSVReader reader = new GlossaryCSVReader(LocaleId.EN_US);
int entryPerLocale = 2;

File sourceFile =
new File("src/test/resources/glossary/translate1.csv");

Reader inputStreamReader =
new InputStreamReader(new FileInputStream(sourceFile), "UTF-8");
BufferedReader br = new BufferedReader(inputStreamReader);
BufferedReader bufferedReader = new BufferedReader(
new InputStreamReader(new FileInputStream(sourceFile), "UTF-8"));

Map<LocaleId, List<GlossaryEntry>> glossaries =
reader.extractGlossary(br,
reader.extractGlossary(bufferedReader,
GlossaryResource.GLOBAL_QUALIFIED_NAME);

assertThat(glossaries.keySet()).hasSize(2);
Expand All @@ -79,29 +77,40 @@ public void extractGlossaryTest1() throws IOException {
for (Map.Entry<LocaleId, List<GlossaryEntry>> entry : glossaries
.entrySet()) {
assertThat(entry.getValue()).hasSize(entryPerLocale);
assertEntry(entry);
}
}

@Test
public void extractGlossaryTest2() throws IOException {
public void extractGlossaryWithOtherHeaders() throws IOException {
GlossaryCSVReader reader = new GlossaryCSVReader(LocaleId.EN_US);
int entryPerLocale = 2;

File sourceFile =
new File("src/test/resources/glossary/translate2.csv");

Reader inputStreamReader =
new InputStreamReader(new FileInputStream(sourceFile), "UTF-8");
BufferedReader br = new BufferedReader(inputStreamReader);
BufferedReader bufferedReader = new BufferedReader(
new InputStreamReader(new FileInputStream(sourceFile), "UTF-8"));

Map<LocaleId, List<GlossaryEntry>> glossaries =
reader.extractGlossary(br, GlossaryResource.GLOBAL_QUALIFIED_NAME);
Map<LocaleId, List<GlossaryEntry>> glossaries = reader.extractGlossary(
bufferedReader, GlossaryResource.GLOBAL_QUALIFIED_NAME);

assertThat(glossaries.keySet()).hasSize(2);
assertThat(glossaries.keySet()).contains(LocaleId.ES, new LocaleId("zh"));

for (Map.Entry<LocaleId, List<GlossaryEntry>> entry : glossaries
.entrySet()) {
assertThat(entry.getValue()).hasSize(entryPerLocale);
assertEntry(entry);
}
}

private void assertEntry(Map.Entry<LocaleId, List<GlossaryEntry>> entry) {
assertThat(entry.getValue().get(0).getDescription()
.startsWith("testing of hello"));
assertThat(entry.getValue().get(1).getDescription()
.startsWith("testing of morning"));
assertThat(entry.getValue().get(0).getPos()).isEqualTo("verb");
assertThat(entry.getValue().get(1).getPos()).isEqualTo("noun");
}
}
@@ -1,3 +1,3 @@
"en-US","es","zh","pos","description"
"Hello","Hola","您好","noun","testing of hello csv"
"Hello","Hola","您好","verb","testing of hello csv"
"Morning","mañana","上午","noun","testing of morning csv"
@@ -1,3 +1,3 @@
"en-US","es","zh","description1","description2"
"Hello","Hola","您好","testing of hello csv1","testing of hello csv2"
"Morning","mañana","上午","testing of morning csv1","testing of morning csv2"
"en-US","es","zh","definition","synonyms","partofspeech"
"Hello","Hola","您好","testing of hello csv","Hi","verb"
"Morning","mañana","上午","testing of morning csv","Dawn,AM","noun"
6 changes: 3 additions & 3 deletions docs/user-guide/glossary/upload-glossaries.md
Expand Up @@ -10,8 +10,8 @@ See [glossary roles and permission](/user-guide/glossary/glossary-roles-permissi
##### header row

1. locale columns: The locale header column must contain a valid locale code. At least one locale code is required for the glossary. First locale column will be used as source.
1. part-of-speech column (optional): The part-of-speech header column, when included, should contain only one value: pos.
1. description column (optional): The description header column, when included, should contain only one value: description.
1. part-of-speech column (optional): The part-of-speech header column, when included, should be only one of: `pos`, `partofspeech` or `part of speech`.
1. description column (optional): The description header column, when included, should be only one of: `desc`, `description` or `definition` .


##### data rows
Expand All @@ -27,7 +27,7 @@ See [glossary roles and permission](/user-guide/glossary/glossary-roles-permissi
### Upload via Web UI

1. Login into Zanata
1. To upload to the **system glossary**, click `Glossary` menu.
1. To upload to the **system glossary**, click `Glossary` menu.
1. To upload to a **project glossary**, navigate to the project page, click on "Glossary" in the project page.
1. Click on `Import Glossary` on top right corner of the page.
<figure>
Expand Down

0 comments on commit 89a9521

Please sign in to comment.