Skip to content

Commit

Permalink
Option to ignore extraneous input characters
Browse files Browse the repository at this point in the history
  • Loading branch information
MikeEdgar committed Oct 6, 2020
1 parent 0281246 commit a3c7f20
Show file tree
Hide file tree
Showing 11 changed files with 179 additions and 33 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

<groupId>io.xlate</groupId>
<artifactId>staedi</artifactId>
<version>1.12.1-SNAPSHOT</version>
<version>1.13.0-SNAPSHOT</version>

<name>StAEDI : Streaming API for EDI for Java</name>
<description>Streaming API for EDI for Java</description>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ public StaEDIInputFactory() {
supportedProperties.add(EDI_VALIDATE_CONTROL_STRUCTURE);
supportedProperties.add(EDI_VALIDATE_CONTROL_CODE_VALUES);
supportedProperties.add(XML_DECLARE_TRANSACTION_XMLNS);
supportedProperties.add(EDI_IGNORE_EXTRANEOUS_CHARACTERS);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ public StaEDIStreamReader(
this.properties = new HashMap<>(properties);
this.reporter = reporter;
this.proxy = new ProxyEventHandler(location, this.controlSchema);
this.lexer = new Lexer(stream, charset, proxy, location);
this.lexer = new Lexer(stream, charset, proxy, location, ignoreExtraneousCharacters());
}

private void ensureOpen() {
Expand Down Expand Up @@ -472,6 +472,10 @@ boolean useInternalControlSchema() {
return getBooleanProperty(EDIInputFactory.EDI_VALIDATE_CONTROL_STRUCTURE, true);
}

boolean ignoreExtraneousCharacters() {
return getBooleanProperty(EDIInputFactory.EDI_IGNORE_EXTRANEOUS_CHARACTERS, false);
}

boolean getBooleanProperty(String propertyName, boolean defaultValue) {
Object property = properties.get(propertyName);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,10 +174,16 @@ public class CharacterSet {

private final CharacterClass[] list;
private final Map<Integer, CharacterClass> auxilary;
private final boolean extraneousIgnored;

public CharacterSet() {
list = Arrays.copyOf(prototype, prototype.length);
auxilary = new TreeMap<>();
this(false);
}

public CharacterSet(boolean extraneousIgnored) {
this.list = Arrays.copyOf(prototype, prototype.length);
this.auxilary = new TreeMap<>();
this.extraneousIgnored = extraneousIgnored;
}

public CharacterClass getClass(int character) {
Expand Down Expand Up @@ -209,6 +215,17 @@ public boolean isDelimiter(int character) {
}
}

public boolean isIgnored(int character) {
switch (getClass(character)) {
case CONTROL:
case INVALID:
case WHITESPACE:
return extraneousIgnored;
default:
return false;
}
}

public boolean isCharacterClass(int character, CharacterClass clazz) {
return getClass(character).equals(clazz);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,17 +146,25 @@ public String[] getVersion() {

@Override
public boolean appendHeader(CharacterSet characters, char value) {
boolean proceed = true;

if (++index == 0) {
header = new StringBuilder();
}

header.append(value);

if (UNB.equals(headerTag)) {
return processInterchangeHeader(characters, value);
if (characters.isIgnored(value)) {
index--;
} else {
header.append(value);
proceed = processInterchangeHeader(characters, value);
}
} else {
header.append(value);
proceed = processServiceStringAdvice(characters, value);
}

return processServiceStringAdvice(characters, value);
return proceed;
}

boolean processInterchangeHeader(CharacterSet characters, char value) {
Expand All @@ -177,6 +185,8 @@ boolean processInterchangeHeader(CharacterSet characters, char value) {
}

boolean processServiceStringAdvice(CharacterSet characters, char value) {
boolean proceed = true;

switch (index) {
case 3:
componentDelimiter = value;
Expand Down Expand Up @@ -206,10 +216,12 @@ boolean processServiceStringAdvice(CharacterSet characters, char value) {
}

if (index > EDIFACT_UNA_LENGTH) {
if (unbStart > -1 && (index - unbStart) > 3) {
if (characters.isIgnored(value)) {
header.deleteCharAt(index--);
} else if (isIndexBeyondUNBFirstElement()) {
if (value == elementDelimiter) {
rejected = !initialize(characters);
return isConfirmed();
proceed = isConfirmed();
}
} else if (value == 'B') {
CharSequence un = header.subSequence(index - 2, index);
Expand All @@ -218,15 +230,23 @@ boolean processServiceStringAdvice(CharacterSet characters, char value) {
unbStart = index - 2;
} else {
// Some other segment / element?
return false;
proceed = false;
}
} else if (unbStart < 0 && value == elementDelimiter) {
} else if (isUnexpectedSegmentDetected(value)) {
// Some other segment / element?
return false;
proceed = false;
}
}

return true;
return proceed;
}

boolean isIndexBeyondUNBFirstElement() {
return unbStart > -1 && (index - unbStart) > 3;
}

boolean isUnexpectedSegmentDetected(int value) {
return unbStart < 0 && value == elementDelimiter;
}

void setCharacterClass(CharacterSet characters, CharacterClass charClass, char value, boolean allowSpace) {
Expand Down
25 changes: 16 additions & 9 deletions src/main/java/io/xlate/edi/internal/stream/tokenization/Lexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,7 @@ private interface Notifier {
private ByteBuffer readByteBuf = ByteBuffer.allocate(4);

private final StaEDIStreamLocation location;

private CharacterSet characters = new CharacterSet();
private final CharacterSet characters;
private CharBuffer buffer = CharBuffer.allocate(4096);
private Dialect dialect;

Expand All @@ -79,7 +78,7 @@ private interface Notifier {
private Notifier en;
private Notifier bn;

public Lexer(InputStream stream, Charset charset, EventHandler handler, StaEDIStreamLocation location) {
public Lexer(InputStream stream, Charset charset, EventHandler handler, StaEDIStreamLocation location, boolean extraneousIgnored) {
if (stream.markSupported()) {
this.stream = stream;
} else {
Expand All @@ -89,6 +88,7 @@ public Lexer(InputStream stream, Charset charset, EventHandler handler, StaEDISt
this.decoder = charset.newDecoder();

this.location = location;
this.characters = new CharacterSet(extraneousIgnored);

isn = (notifyState, start, length) -> {
handler.interchangeBegin(dialect);
Expand Down Expand Up @@ -206,13 +206,17 @@ public void parse() throws IOException, EDIException {
case TRAILER_TAG_N:
case TRAILER_TAG_Z:
case ELEMENT_DATA:
case ELEMENT_INVALID_DATA:
case TRAILER_ELEMENT_DATA:
buffer.put((char) input);
break;
case HEADER_TAG_1:
case HEADER_TAG_2:
case HEADER_TAG_3:
case ELEMENT_INVALID_DATA:
if (!characters.isIgnored(input)) {
buffer.put((char) input);
}
break;
case HEADER_TAG_1: // U - When UNA is present
case HEADER_TAG_2: // N - When UNA is present
case HEADER_TAG_3: // B - When UNA is present
handleStateHeaderTag(input);
break;
case DATA_RELEASE:
Expand All @@ -222,6 +226,7 @@ public void parse() throws IOException, EDIException {
handleStateElementDataBinary();
break;
case INTERCHANGE_CANDIDATE:
// ISA, UNA, or UNB was found
handleStateInterchangeCandidate(input);
break;
case HEADER_DATA:
Expand Down Expand Up @@ -272,7 +277,9 @@ public void parse() throws IOException, EDIException {
eventsReady = nextEvent();
break;
default:
if (clazz != CharacterClass.INVALID) {
if (characters.isIgnored(input)) {
state = previous;
} else if (clazz != CharacterClass.INVALID) {
throw invalidStateError();
} else {
throw error(EDIException.INVALID_CHARACTER);
Expand Down Expand Up @@ -380,7 +387,7 @@ void handleStateHeaderData(int input) throws EDIException {
case RELEASE_CHARACTER:
break;
default:
if (dialect.getDecimalMark() != input) {
if (dialect.getDecimalMark() != input && !characters.isIgnored(input)) {
buffer.put((char) input);
}
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,16 @@ public boolean appendHeader(CharacterSet characters, char value) {
elementDelimiter = value;
characters.setClass(elementDelimiter, CharacterClass.ELEMENT_DELIMITER);
break;
case X12_REPEAT_OFFSET:
case X12_COMPONENT_OFFSET:
case X12_SEGMENT_OFFSET:
break;
default:
if (characters.isIgnored(value)) {
// Discard control character if not used as a delimiter
index--;
return true;
}
break;
}

Expand Down
8 changes: 8 additions & 0 deletions src/main/java/io/xlate/edi/stream/EDIInputFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,14 @@ public abstract class EDIInputFactory extends PropertySupport {
*/
public static final String XML_DECLARE_TRANSACTION_XMLNS = "io.xlate.edi.stream.XML_DECLARE_TRANSACTION_XMLNS";

/**
* When set to true, non-graphical, control characters will be ignored in the EDI
* input stream. This includes characters ranging from 0x00 through 0x1F and 0x7F.
*
* @since 1.13
*/
public static final String EDI_IGNORE_EXTRANEOUS_CHARACTERS = "io.xlate.edi.stream.EDI_IGNORE_EXTRANEOUS_CHARACTERS";

/**
* Create a new instance of the factory. This static method creates a new
* factory instance.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/*******************************************************************************
* Copyright 2017 xlate.io LLC, http://www.xlate.io
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
******************************************************************************/
package io.xlate.edi.internal.stream;

import static org.junit.jupiter.api.Assertions.assertEquals;

import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;

import org.junit.jupiter.api.Test;

import io.xlate.edi.stream.EDIInputFactory;
import io.xlate.edi.stream.EDIStreamReader;

class StaEDIStreamReaderExtraneousCharsTest {

Logger LOGGER = Logger.getGlobal();

/**
* Original issue: https://github.com/xlate/staedi/issues/128
*
* @throws Exception
*/
@Test
void testValidatorLookAhead_Issue122() throws Exception {
EDIInputFactory factory = EDIInputFactory.newFactory();
factory.setProperty(EDIInputFactory.EDI_IGNORE_EXTRANEOUS_CHARACTERS, "true");
EDIStreamReader reader = factory.createEDIStreamReader(getClass().getResourceAsStream("/x12/issue128/ts210_80char.edi"));
List<Object> unexpected = new ArrayList<>();

try {
while (reader.hasNext()) {
switch (reader.next()) {
case SEGMENT_ERROR:
case ELEMENT_OCCURRENCE_ERROR:
case ELEMENT_DATA_ERROR:
LOGGER.log(Level.WARNING, () -> reader.getErrorType() + ", " + reader.getLocation() + ", data: [" + reader.getText() + "]");
unexpected.add(reader.getErrorType());
break;
default:
break;
}
}
} catch (Exception e) {
unexpected.add(e);
e.printStackTrace();
} finally {
reader.close();
}

assertEquals(0, unexpected.size(), () -> "Expected none, but got: " + unexpected);
}

}
Loading

0 comments on commit a3c7f20

Please sign in to comment.