Skip to content

Commit

Permalink
Set of fixes from Viznet + bump a couple of dependencies + CLI tweaks
Browse files Browse the repository at this point in the history
  • Loading branch information
tsegall committed May 18, 2023
1 parent 4b1d384 commit b3ea759
Show file tree
Hide file tree
Showing 14 changed files with 232 additions and 72 deletions.
9 changes: 9 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@

## Changes ##

### 14.3.1
- BUG: Fix issue with late switches from DOUBLE_GROUPING to SIGNED_DOUBLE_GROUPING
- BUG: Fix issue with Switching from SIGNED_DOUBLE_TRAILING to SIGNED_DOUBLE_TRAILING_GROUPING
_ BUG: Fix issue with 0000 as a year (format: yyyy)
- ENH: Bump jackson
- ENH: Bump google phonenumber
- CLI: Defend against null option files
- CLI: Continue processing even if one of the files provided has fatal errors

### 14.3.0
- ENH: I18N - Improve support for COORDINATE.LATITUDE_DECIMAL and COORDINATE.LONGITUDE_DECIMAL (nl-NL)
- ENH: Fix up test suite to support compiling with Java 17
Expand Down
5 changes: 5 additions & 0 deletions cli/src/main/java/com/cobber/fta/driver/Driver.java
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,11 @@ else if (logical instanceof LogicalTypeInfinite)
final Locale activeLocale = cmdLineOptions.locale != null ? cmdLineOptions.locale : Locale.getDefault();
error.printf("ERROR: Unsupported Locale: %s, error: %s%n", activeLocale.toLanguageTag(), e.getMessage());
System.exit(1);
} catch (FTAProcessingException e) {
error.printf("ERROR: Filename: %s, error: %s%n", e.getFilename(), e.getMessage());
} catch (Throwable t) {
error.printf("ERROR: '%s' error: %s%n", filename, t.getMessage());
t.printStackTrace(error);
}
}
}
Expand Down
5 changes: 4 additions & 1 deletion cli/src/main/java/com/cobber/fta/driver/DriverOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,10 @@ public void apply(final TextAnalyzer analyzer) throws IOException {
}

public void addFromFile(final String filePath) throws IOException {
addFromStringArray(Files.readString(Paths.get(filePath)).trim().split("[ \n]"));
final String args = Files.readString(Paths.get(filePath)).trim();
if (args.isEmpty())
return;
addFromStringArray(args.split("[ \n]"));
}

private int nextIntegerArg(final String[] args, final int index) {
Expand Down
88 changes: 45 additions & 43 deletions cli/src/main/java/com/cobber/fta/driver/FileProcessor.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.io.PrintStream;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.text.MessageFormat;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
Expand Down Expand Up @@ -55,7 +56,7 @@ class FileProcessor {
this.options = new DriverOptions(cmdLineOptions);
}

protected void process() throws IOException, FTAPluginException, FTAUnsupportedLocaleException {
protected void process() throws IOException, FTAPluginException, FTAUnsupportedLocaleException, FTAProcessingException {
if (Files.exists(Paths.get(filename + ".options"))) {
options.addFromFile(filename + ".options");
}
Expand Down Expand Up @@ -86,16 +87,20 @@ protected void process() throws IOException, FTAPluginException, FTAUnsupportedL

settings.setMaxColumns(options.xMaxColumns);

if (options.bulk)
processBulk(settings);
else
processAllFields(settings);

if (options.output)
output.close();
try {
if (options.bulk)
processBulk(settings);
else
processAllFields(settings);
}
catch (Exception e) {
if (options.output)
output.close();
throw e;
}
}

private void processBulk(final CsvParserSettings settings) throws IOException, FTAPluginException, FTAUnsupportedLocaleException {
private void processBulk(final CsvParserSettings settings) throws IOException, FTAPluginException, FTAUnsupportedLocaleException, FTAProcessingException {
String[] header;
int numFields;
TextAnalyzer analyzer;
Expand All @@ -112,10 +117,10 @@ private void processBulk(final CsvParserSettings settings) throws IOException, F

header = parser.getRecordMetadata().headers();

if (header.length != 4) {
error.printf("ERROR: Expected input with four columns (key,fieldName,fieldValue,fieldCount). %d field(s) in input.%n", header.length);
System.exit(1);
}
if (header.length != 4)
throw new FTAProcessingException(filename,
MessageFormat.format("Expected input with four columns (key,fieldName,fieldValue,fieldCount). {0} field(s) in input",
header.length));

numFields = header.length;

Expand Down Expand Up @@ -214,37 +219,36 @@ boolean isFull() {
}
}

private void processAllFields(final CsvParserSettings settings) throws IOException, FTAPluginException, FTAUnsupportedLocaleException {
private void processAllFields(final CsvParserSettings settings) throws IOException, FTAPluginException, FTAUnsupportedLocaleException, FTAProcessingException {
final long startTime = System.currentTimeMillis();
long initializedTime = -1;
long consumedTime = -1;
long resultsTime = -1;
Processor processor = null;
String[] header = null;
int numFields = 0;
long thisRecord = 0;
long rawRecordIndex = 0;

try (BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(new File(filename)), options.charset))) {

// Skip the first <n> lines if requested
if (options.skip != 0) {
for (int i = 0; i < options.skip; i++)
in.readLine();
rawRecordIndex += options.skip;
}

final CsvParser parser = new CsvParser(settings);
parser.beginParsing(in);

header = parser.getRecordMetadata().headers();
if (header == null) {
error.printf("ERROR: Cannot parse header for file '%s'%n", filename);
System.exit(1);
}
if (header == null)
throw new FTAProcessingException(filename, "Cannot parse header");

numFields = header.length;
if (options.col > numFields) {
error.printf("ERROR: Column %d does not exist. Only %d field(s) in input.%n", options.col, numFields);
System.exit(1);
}
if (options.col > numFields)
throw new FTAProcessingException(filename, MessageFormat.format("Column {0} does not exist. Only {1} field(s) in input.", options.col, numFields));

for (int i = 0; i < numFields; i++) {
if ((options.col == -1 || options.col == i) && options.verbose != 0 && options.noAnalysis)
System.out.println(header[i]);
Expand All @@ -256,33 +260,33 @@ private void processAllFields(final CsvParserSettings settings) throws IOExcepti
CircularBuffer buffer = new CircularBuffer(options.trailer + 1);

String[] row;
int processedRecords = 0;

while ((row = parser.parseNext()) != null) {
rawRecordIndex++;
if (row.length != numFields) {
error.printf("ERROR: File: '%s', record %d has %d fields, expected %d, skipping%n",
filename, thisRecord, row.length, numFields);
filename, rawRecordIndex, row.length, numFields);
continue;
}
buffer.add(row);
if (!buffer.isFull())
continue;
row = buffer.get();
thisRecord++;
processedRecords++;
processor.consume(row);
if (thisRecord == options.recordsToProcess) {
if (processedRecords == options.recordsToProcess) {
parser.stopParsing();
break;
}
}
consumedTime = System.currentTimeMillis();
}
catch (FileNotFoundException e) {
error.printf("ERROR: Filename '%s' not found.%n", filename);
System.exit(1);
throw new FTAProcessingException(filename, "File not found");
}
catch (TextParsingException|ArrayIndexOutOfBoundsException e) {
error.printf("ERROR: Filename '%s' Univocity exception. %s%n", filename, e.getMessage());
System.exit(1);
throw new FTAProcessingException(filename, "Univocity exception", e);
}

if (options.noAnalysis)
Expand All @@ -309,11 +313,11 @@ private void processAllFields(final CsvParserSettings settings) throws IOExcepti
if (options.col == -1 || options.col == i)
patterns[i] = Pattern.compile(results[i].getRegExp());

thisRecord = 0;
rawRecordIndex = 0;
String[] row;

while ((row = parser.parseNext()) != null) {
thisRecord++;
rawRecordIndex++;
if (row.length != numFields)
continue;

Expand All @@ -330,7 +334,7 @@ else if (options.verbose != 0)
failures.add(value);
}
}
if (thisRecord == options.recordsToProcess) {
if (rawRecordIndex == options.recordsToProcess) {
parser.stopParsing();
break;
}
Expand All @@ -348,8 +352,8 @@ else if (options.verbose != 0)
for (int i = 0; i < numFields; i++) {
if (options.col == -1 || options.col == i) {
final TextAnalyzer analyzer = processor.getAnalyzer(i);
if (thisRecord != options.recordsToProcess)
analyzer.setTotalCount(thisRecord);
if (rawRecordIndex != options.recordsToProcess)
analyzer.setTotalCount(rawRecordIndex);

result = results[i];
if (options.json) {
Expand All @@ -369,8 +373,7 @@ else if (options.verbose != 0)
try {
output.printf("%s%n", writer.writeValueAsString(pluginDefinition));
} catch (JsonProcessingException e) {
error.printf("ERROR: JsonProcessing exception. %s%n", filename, e.getMessage());
System.exit(1);
throw new FTAProcessingException(filename, "JsonProcessing exception", e);
}
}
}
Expand All @@ -382,12 +385,11 @@ else if (options.verbose != 0)
// Check the counts if we are validating
if (options.validate >= 1) {
final String ret = result.checkCounts();
if (ret != null) {
System.err.printf("Composite: %s, field: %s (%d), failed count validation - %s\n",
if (ret != null)
throw new FTAProcessingException(filename,
MessageFormat.format("Composite: {0}, field: {1} ({2}), failed count validation - {}",
analyzer.getContext().getCompositeName(), analyzer.getContext().getStreamName(),
analyzer.getContext().getStreamIndex(), ret);
System.exit(1);
}
analyzer.getContext().getStreamIndex(), ret));
}

if (options.validate == 2 && matched[i] != result.getMatchCount()) {
Expand Down Expand Up @@ -427,7 +429,7 @@ else if (options.verbose != 0)
sampleCount, confidence*100, usedMemory);
}
error.printf("Execution time (#fields: %d, #records: %d): initialization: %dms, consumption: %dms, results: %dms, total: %dms%n",
numFields, thisRecord, initializedTime - startTime, consumedTime - initializedTime, resultsTime - consumedTime, durationTime);
numFields, rawRecordIndex, initializedTime - startTime, consumedTime - initializedTime, resultsTime - consumedTime, durationTime);
}
}
}
3 changes: 3 additions & 0 deletions core/src/main/java/com/cobber/fta/Keywords.java
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ public String get(final String keyTag) {
* @return An integer (1-100) indicating how well the input 'matches' the supplied tag.
*/
public int match(final String input, final String keyTag, MatchStyle matchStyle) {
if (input == null)
return 0;

final String lower = input.trim().toLowerCase(locale);
if (lower.isEmpty())
return 0;
Expand Down
Loading

0 comments on commit b3ea759

Please sign in to comment.