diff --git a/src/main/java/io/xlate/edi/internal/stream/StaEDIStreamWriter.java b/src/main/java/io/xlate/edi/internal/stream/StaEDIStreamWriter.java index d2c901c2..46af46ae 100644 --- a/src/main/java/io/xlate/edi/internal/stream/StaEDIStreamWriter.java +++ b/src/main/java/io/xlate/edi/internal/stream/StaEDIStreamWriter.java @@ -315,7 +315,7 @@ private void write(int output) throws EDIStreamException { throw new EDIStreamException(String.format("Invalid character: 0x%04X", output), location); } - state = state.transition(clazz); + state = State.transition(state, dialect, clazz); switch (state) { case HEADER_X12_I: // I(SA) diff --git a/src/main/java/io/xlate/edi/internal/stream/tokenization/Dialect.java b/src/main/java/io/xlate/edi/internal/stream/tokenization/Dialect.java index 8bf7e9e4..5cdae9c5 100644 --- a/src/main/java/io/xlate/edi/internal/stream/tokenization/Dialect.java +++ b/src/main/java/io/xlate/edi/internal/stream/tokenization/Dialect.java @@ -19,6 +19,9 @@ public abstract class Dialect { + protected final String[] transactionVersion; + protected final int dialectStateCode; + protected char segmentDelimiter; protected char segmentTagTerminator = '\0'; protected char elementDelimiter; @@ -28,16 +31,28 @@ public abstract class Dialect { protected char elementRepeater; protected boolean initialized; - protected boolean rejected; + protected String rejectionMessage; protected String transactionType; - protected final String[] transactionVersion; protected String transactionVersionString; - protected Dialect(String[] initialTransactionVersion) { + protected Dialect(int dialectStateCode, String[] initialTransactionVersion) { + this.dialectStateCode = dialectStateCode; this.transactionVersion = initialTransactionVersion; } + public static String getStandard(Dialect dialect) { + return dialect != null ? dialect.getStandard() : "UNKNOWN"; + } + + public int getDialectStateCode() { + return dialectStateCode; + } + + public State getTagSearchState() { + return State.TAG_SEARCH; + } + public char getComponentElementSeparator() { return componentDelimiter; } @@ -75,7 +90,11 @@ public boolean isConfirmed() { } public boolean isRejected() { - return rejected; + return rejectionMessage != null; + } + + public String getRejectionMessage() { + return rejectionMessage; } /** diff --git a/src/main/java/io/xlate/edi/internal/stream/tokenization/EDIFACTDialect.java b/src/main/java/io/xlate/edi/internal/stream/tokenization/EDIFACTDialect.java index 037161be..cb04d0e0 100644 --- a/src/main/java/io/xlate/edi/internal/stream/tokenization/EDIFACTDialect.java +++ b/src/main/java/io/xlate/edi/internal/stream/tokenization/EDIFACTDialect.java @@ -46,7 +46,7 @@ public class EDIFACTDialect extends Dialect { private static final int TX_ASSIGNED_CODE = 3; EDIFACTDialect(String headerTag) { - super(new String[4]); + super(State.DialectCode.EDIFACT, new String[4]); componentDelimiter = DFLT_COMPONENT_ELEMENT_SEPARATOR; elementDelimiter = DFLT_DATA_ELEMENT_SEPARATOR; decimalMark = DFLT_DECIMAL_MARK; @@ -100,6 +100,7 @@ boolean initialize(CharacterSet characters) { characters.setClass(segmentDelimiter, CharacterClass.SEGMENT_DELIMITER); initialized = true; } else { + rejectionMessage = "Unable to obtain version from EDIFACT header segment"; initialized = false; } @@ -138,6 +139,14 @@ public boolean isServiceAdviceSegment(CharSequence tag) { return UNA.contentEquals(tag); } + @Override + public State getTagSearchState() { + if (isServiceAdviceSegment(this.headerTag)) { + return State.HEADER_EDIFACT_UNB_SEARCH; + } + return State.TAG_SEARCH; + } + @Override public String getStandard() { return Standards.EDIFACT; @@ -181,7 +190,7 @@ boolean processInterchangeHeader(CharacterSet characters, char value) { */ characters.setClass(elementDelimiter, CharacterClass.ELEMENT_DELIMITER); } else if (segmentDelimiter == value) { - rejected = !initialize(characters); + initialize(characters); return isConfirmed(); } @@ -224,7 +233,7 @@ boolean processServiceStringAdvice(CharacterSet characters, char value) { header.deleteCharAt(index--); } else if (isIndexBeyondUNBFirstElement()) { if (value == elementDelimiter || value == segmentDelimiter) { - rejected = !initialize(characters); + initialize(characters); proceed = isConfirmed(); } } else if (value == 'B') { diff --git a/src/main/java/io/xlate/edi/internal/stream/tokenization/Lexer.java b/src/main/java/io/xlate/edi/internal/stream/tokenization/Lexer.java index 7d667280..c0f2b808 100644 --- a/src/main/java/io/xlate/edi/internal/stream/tokenization/Lexer.java +++ b/src/main/java/io/xlate/edi/internal/stream/tokenization/Lexer.java @@ -184,13 +184,13 @@ public void parse() throws IOException, EDIException { CharacterClass clazz = characters.getClass(input); previous = state; - state = state.transition(clazz); - LOGGER.finer(() -> "State " + previous + "(" + clazz + ") -> " + state); + state = State.transition(state, dialect, clazz); + LOGGER.finer(() -> String.format("%s + (%s, '%s', %s) -> %s", previous, Dialect.getStandard(dialect), (char) input, clazz, state)); switch (state) { case INITIAL: case TAG_SEARCH: - case HEADER_TAG_SEARCH: + case HEADER_EDIFACT_UNB_SEARCH: break; case HEADER_X12_I: case HEADER_X12_S: @@ -219,11 +219,12 @@ public void parse() throws IOException, EDIException { buffer.put((char) input); } break; - case HEADER_TAG_1: // U - When UNA is present - case HEADER_TAG_2: // N - When UNA is present - case HEADER_TAG_3: // B - When UNA is present + case HEADER_EDIFACT_UNB_1: // U - When UNA is present + case HEADER_EDIFACT_UNB_2: // N - When UNA is present + case HEADER_EDIFACT_UNB_3: // B - When UNA is present handleStateHeaderTag(input); break; + case HEADER_RELEASE: case DATA_RELEASE: // Skip this character - next character will be literal value break; @@ -384,7 +385,7 @@ void handleStateHeaderData(int input) throws EDIException { switch (characters.getClass(input)) { case SEGMENT_DELIMITER: closeSegment(); - state = State.HEADER_TAG_SEARCH; + state = dialect.getTagSearchState(); break; case SEGMENT_TAG_DELIMITER: case ELEMENT_DELIMITER: @@ -400,6 +401,15 @@ void handleStateHeaderData(int input) throws EDIException { } } + /** + * Determine if the input text has been confirmed by the dialect as being + * initially accepted. If so, transition to the state given by the + * confirmed parameter. + * + * @param confirmed the state to transition to if the dialect is confirmed. + * @return true if the dialect is confirmed, otherwise false. + * @throws EDIException when the input text has been rejected by the dialect. + */ private boolean dialectConfirmed(State confirmed) throws EDIException { if (dialect.isConfirmed()) { state = confirmed; @@ -408,9 +418,10 @@ private boolean dialectConfirmed(State confirmed) throws EDIException { } else if (dialect.isRejected()) { buffer.clear(); clearQueues(); + String rejectionMessage = dialect.getRejectionMessage(); dialect = null; state = State.INITIAL; - throw error(EDIException.INVALID_STATE, "Invalid header segment"); + throw error(EDIException.INVALID_STATE, rejectionMessage); } return false; diff --git a/src/main/java/io/xlate/edi/internal/stream/tokenization/State.java b/src/main/java/io/xlate/edi/internal/stream/tokenization/State.java index c034f0c6..62ff9bc4 100644 --- a/src/main/java/io/xlate/edi/internal/stream/tokenization/State.java +++ b/src/main/java/io/xlate/edi/internal/stream/tokenization/State.java @@ -15,72 +15,111 @@ ******************************************************************************/ package io.xlate.edi.internal.stream.tokenization; +import java.util.Objects; + /* * Processing states. */ public enum State { - INVALID(-1), - - INITIAL(0), - INTERCHANGE_END(0), - - HEADER_X12_I(1), - HEADER_X12_S(2), - TRAILER_X12_I(3), - TRAILER_X12_E(4), - TRAILER_X12_A(5), - - HEADER_EDIFACT_U(6), - HEADER_EDIFACT_N(7), - TRAILER_EDIFACT_U(8), - TRAILER_EDIFACT_N(9), - TRAILER_EDIFACT_Z(10), - - HEADER_TRADACOMS_S(11), - HEADER_TRADACOMS_T(12), - TRAILER_TRADACOMS_E(13), - TRAILER_TRADACOMS_N(14), - TRAILER_TRADACOMS_D(15), - - // start at last header_tag + 1 - INTERCHANGE_CANDIDATE(16), // IC - HEADER_DATA(16), // HD - HEADER_SEGMENT_BEGIN(16), - HEADER_INVALID_DATA(16), // HV - HEADER_COMPONENT_END(16), // HC - HEADER_ELEMENT_END(16), // HE - HEADER_SEGMENT_END(16), - - HEADER_TAG_SEARCH(17), - HEADER_TAG_1(18), - HEADER_TAG_2(19), - HEADER_TAG_3(20), - - TAG_SEARCH(21), - SEGMENT_END(21), - SEGMENT_EMPTY(21), - TAG_1(22), - // TODO: Clear ELEMT for TAG_2 and TAG_3 states for TRADACOMS (requires dialect-specific transition tables) - TAG_2(23), - TAG_3(24), - - SEGMENT_BEGIN(25), - ELEMENT_DATA(25), - ELEMENT_INVALID_DATA(25), - COMPONENT_END(25), - ELEMENT_REPEAT(25), - ELEMENT_END(25), - - // TODO: Data Release (DR) needs a header equivalent - DATA_RELEASE(26), - - ELEMENT_DATA_BINARY(27), - ELEMENT_END_BINARY(28), - - TRAILER_BEGIN(29), - TRAILER_ELEMENT_DATA(29), - TRAILER_ELEMENT_END(29); + // Initial States + INVALID(DialectCode.UNKNOWN, Category.INVALID), + INITIAL(DialectCode.UNKNOWN, Category.INITIAL), + INTERCHANGE_END(DialectCode.UNKNOWN, Category.INITIAL), + HEADER_EDIFACT_U(DialectCode.UNKNOWN, Category.EDIFACT_1), + HEADER_EDIFACT_N(DialectCode.UNKNOWN, Category.EDIFACT_2), + HEADER_TRADACOMS_S(DialectCode.UNKNOWN, Category.TRADACOMS_1), + HEADER_TRADACOMS_T(DialectCode.UNKNOWN, Category.TRADACOMS_2), + HEADER_X12_I(DialectCode.UNKNOWN, Category.X12_1), + HEADER_X12_S(DialectCode.UNKNOWN, Category.X12_2), + + // Common States (shared among dialects) + INTERCHANGE_CANDIDATE(Category.HEADER), // IC + HEADER_DATA(Category.HEADER), // HD + HEADER_SEGMENT_BEGIN(Category.HEADER), + HEADER_INVALID_DATA(Category.HEADER), // HV + HEADER_COMPONENT_END(Category.HEADER), // HC + HEADER_ELEMENT_END(Category.HEADER), // HE + HEADER_SEGMENT_END(Category.HEADER), + HEADER_RELEASE(Category.HEADER_RELEASE), // HR + TAG_SEARCH(Category.TAG_SEARCH), + SEGMENT_END(Category.TAG_SEARCH), + SEGMENT_EMPTY(Category.TAG_SEARCH), + TAG_1(Category.TAG_1), + TAG_2(Category.TAG_2), + TAG_3(Category.TAG_3), + SEGMENT_BEGIN(Category.ELEMENT_PROCESS), + ELEMENT_DATA(Category.ELEMENT_PROCESS), + ELEMENT_INVALID_DATA(Category.ELEMENT_PROCESS), + COMPONENT_END(Category.ELEMENT_PROCESS), + ELEMENT_REPEAT(Category.ELEMENT_PROCESS), + ELEMENT_END(Category.ELEMENT_PROCESS), + DATA_RELEASE(Category.DATA_RELEASE), + ELEMENT_DATA_BINARY(Category.DATA_BINARY), + ELEMENT_END_BINARY(Category.DATA_BINARY_END), + TRAILER_BEGIN(Category.TRAILER), + TRAILER_ELEMENT_DATA(Category.TRAILER), + TRAILER_ELEMENT_END(Category.TRAILER), + + // EDIFACT + TRAILER_EDIFACT_U(Category.TERM_7), + TRAILER_EDIFACT_N(Category.TERM_8), + TRAILER_EDIFACT_Z(Category.TERM_9), + HEADER_EDIFACT_UNB_SEARCH(Category.EDIFACT_UNB_0), // EDIFACT UNA -> UNB Only + HEADER_EDIFACT_UNB_1(Category.EDIFACT_UNB_1), // EDIFACT UNA -> UNB Only + HEADER_EDIFACT_UNB_2(Category.EDIFACT_UNB_2), // EDIFACT UNA -> UNB Only + HEADER_EDIFACT_UNB_3(Category.EDIFACT_UNB_3), // EDIFACT UNA -> UNB Only + + // TRADACOMS + TRAILER_TRADACOMS_E(Category.TERM_7), + TRAILER_TRADACOMS_N(Category.TERM_8), + TRAILER_TRADACOMS_D(Category.TERM_9), + + // X12 + TRAILER_X12_I(Category.TERM_7), + TRAILER_X12_E(Category.TERM_8), + TRAILER_X12_A(Category.TERM_9); + + public static final class DialectCode { + private DialectCode() {} + public static final int UNKNOWN = 0; + public static final int EDIFACT = 1; + public static final int TRADACOMS = 2; + public static final int X12 = 3; + } + + private static final class Category { + // Initial + static final int INVALID = -1; + static final int INITIAL = 0; + static final int EDIFACT_1 = 1; + static final int EDIFACT_2 = 2; + static final int TRADACOMS_1 = 3; + static final int TRADACOMS_2 = 4; + static final int X12_1 = 5; + static final int X12_2 = 6; + + // Common (placed in dialect-specific tables) + static final int HEADER = 0; + static final int HEADER_RELEASE = 1; + static final int TAG_1 = 2; + static final int TAG_2 = 3; // Common for EDIFACT & X12, overridden TRADACOMS + static final int TAG_3 = 4; // Common for EDIFACT & X12, overridden TRADACOMS + static final int ELEMENT_PROCESS = 5; + static final int DATA_RELEASE = 6; + static final int DATA_BINARY = 7; + static final int DATA_BINARY_END = 8; + static final int TRAILER = 9; + // Dialect-Specific + static final int TAG_SEARCH = 10; // Each dialect has their own version to support transition to interchange end segments + static final int TERM_7 = 11; // EDIFACT Unz, TRADACOMS End, X12 Iea + static final int TERM_8 = 12; // EDIFACT uNz, TRADACOMS eNd, X12 iEa + static final int TERM_9 = 13; // EDIFACT unZ, TRADACOMS enD, X12 ieA + static final int EDIFACT_UNB_0 = 14; + static final int EDIFACT_UNB_1 = 15; + static final int EDIFACT_UNB_2 = 16; + static final int EDIFACT_UNB_3 = 17; + } private static final State __ = State.INVALID; @@ -107,15 +146,16 @@ public enum State { private static final State IC = State.INTERCHANGE_CANDIDATE; private static final State HD = State.HEADER_DATA; + private static final State HR = State.HEADER_RELEASE; private static final State HV = State.HEADER_INVALID_DATA; private static final State HC = State.HEADER_COMPONENT_END; private static final State HE = State.HEADER_ELEMENT_END; private static final State HZ = State.HEADER_SEGMENT_END; - private static final State B0 = State.HEADER_TAG_SEARCH; - private static final State B1 = State.HEADER_TAG_1; - private static final State B2 = State.HEADER_TAG_2; - private static final State B3 = State.HEADER_TAG_3; + private static final State B0 = State.HEADER_EDIFACT_UNB_SEARCH; + private static final State B1 = State.HEADER_EDIFACT_UNB_1; + private static final State B2 = State.HEADER_EDIFACT_UNB_2; + private static final State B3 = State.HEADER_EDIFACT_UNB_3; private static final State BB = State.HEADER_SEGMENT_BEGIN; private static final State TS = State.TAG_SEARCH; @@ -149,60 +189,172 @@ public enum State { * of the text the state is initial and if the mode list is empty. */ // @formatter:off - private static final State[][] TRANSITION_TABLE = { - /*- - * SPACE SEGMT CMPST RELSE CNTRL INVLD * - * | A B D E I N S T U X Z | | | | | * - * | | | | | | | | | | | | ALNUM | ELEMT | RPEAT | WHITE | OTHER | SEGTG * - * | | | | | | | | | | | | | | | | | | | | | | | * - * | | | | | | | | | | | | | | | | | | | | | | | */ - /* II | IE Initial */ { II, __, __, __, __, X1, __, C1, __, U1, __, __, __, __, __, __, __, __, II, II, __, __, __ }, - - /* X1 (ISA / I) */ { __, __, __, __, __, __, __, X2, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __ }, - /* X2 (ISA / S) */ { __, IC, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __ }, - /* X7 (IEA / I) */ { __, T2, T2, T2, X8, T2, T2, T2, T2, T2, T2, T2, T2, __, __, __, __, __, __, __, __, __, __ }, - /* X8 (IEA / E) */ { __, X9, T3, T3, T3, T3, T3, T3, T3, T3, T3, T3, T3, __, SB, __, __, __, __, __, __, __, __ }, - /* X9 (IEA / A) */ { __, __, __, __, __, __, __, __, __, __, __, __, __, __, TB, __, __, __, __, __, __, __, __ }, - /* ^ 5 */ - /* U1 (UNB / U) */ { __, __, __, __, __, __, U2, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __ }, - /* U2 (UNB / N) */ { __, IC, IC, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __ }, - /* U7 (UNZ / U) */ { __, T2, T2, T2, T2, T2, U8, T2, T2, T2, T2, T2, T2, __, __, __, __, __, __, __, __, __, __ }, - /* U8 (UNZ / N) */ { __, T3, T3, T3, T3, T3, T3, T3, T3, T3, T3, U9, T3, __, SB, __, __, __, __, __, __, __, __ }, - /* U9 (UNZ / Z) */ { __, __, __, __, __, __, __, __, __, __, __, __, __, __, TB, __, __, __, __, __, __, __, __ }, - /* ^ 10 */ - /* C1 (STX / S) */ { __, __, __, __, __, __, __, __, C2, __, __, __, __, __, __, __, __, __, __, __, __, __, __ }, - /* C2 (STX / T) */ { __, __, __, __, __, __, __, __, __, __, IC, __, __, __, __, __, __, __, __, __, __, __, __ }, - /* C7 (END / E) */ { __, T2, T2, T2, T2, T2, C8, T2, T2, T2, T2, T2, T2, __, __, __, __, __, __, __, __, __, __ }, - /* C8 (END / N) */ { __, T3, T3, C9, T3, T3, T3, T3, T3, T3, T3, T3, T3, __, __, __, __, __, __, __, __, __, SB }, - /* C9 (END / D) */ { __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, TB }, - /* ^ 15 */ - /* IC | HD */ { HD, HD, HD, HD, HD, HD, HD, HD, HD, HD, HD, HD, HD, HZ, HE, HC, __, DR, HD, HD, HD, HV, HE }, - /* B0 (Header Search*/ { B0, __, __, __, __, __, __, __, B1, B1, __, __, __, __, __, __, __, __, B0, __, __, __, __ }, - /* B1 (UNB / U) */ { __, __, __, __, __, __, B2, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __ }, - /* B2 (UNB / N) */ { __, __, B3, __, __, __, __, __, __, __, __, __, __, __, BB, __, __, __, __, __, __, __, __ }, - /* B3 (UNB / B) */ { __, __, __, __, __, __, __, __, __, __, __, __, __, __, BB, __, __, __, __, __, __, __, __ }, - /* ^ 20 */ - /* SE+TS Tag Search */ { TS, T1, T1, C7, C7, X7, T1, T1, T1, U7, T1, T1, T1, __, __, __, __, __, TS, __, __, __, __ }, - /* T1 Tag Char 1 * */ { __, T2, T2, T2, T2, T2, T2, T2, T2, T2, T2, T2, T2, __, __, __, __, __, __, __, __, __, __ }, - /* T2 Tag Char 2 * */ { __, T3, T3, T3, T3, T3, T3, T3, T3, T3, T3, T3, T3, SY, SB, __, __, __, __, __, __, __, SB }, - /* T3 Tag Char 3 * */ { __, __, __, __, __, __, __, __, __, __, __, __, __, SY, SB, __, __, __, __, __, __, __, SB }, - /* Element Process */ { ED, ED, ED, ED, ED, ED, ED, ED, ED, ED, ED, ED, ED, SE, EE, CE, ER, DR, EI, EI, ED, EI, __ }, - /* ^ 25 */ - /* Data Release */ { ED, ED, ED, ED, ED, ED, ED, ED, ED, ED, ED, ED, ED, ED, ED, ED, ED, ED, EI, EI, ED, EI, ED }, - /* Binary Data */ { BD, BD, BD, BD, BD, BD, BD, BD, BD, BD, BD, BD, BD, BD, BD, BD, BD, BD, BD, BD, BD, BD, BD }, - /* Binary Data End */ { __, __, __, __, __, __, __, __, __, __, __, __, __, SE, EE, __, __, __, __, __, __, __, __ }, - /* TB | TD | TE IEA */ { TD, TD, TD, TD, TD, TD, TD, TD, TD, TD, TD, TD, TD, IE, TE, __, __, __, __, __, TD, __, __ } - /* ^ 29 */ - }; + /*- + * SPACE SEGMT CMPST RELSE CNTRL INVLD * + * | A B D E I N S T U X Z | | | | | * + * | | | | | | | | | | | | ALNUM | ELEMT | RPEAT | WHITE | OTHER | SEGTG * + * | | | | | | | | | | | | | | | | | | | | | | | * + * | | | | | | | | | | | | | | | | | | | | | | | */ + /******************* Initial */ + private static final State[] FROM_INITIAL = { II, __, __, __, __, X1, __, C1, __, U1, __, __, __, __, __, __, __, __, II, II, __, __, __ }; + /* ^ 0 */ + private static final State[] FROM_EDIFACT_1 = { __, __, __, __, __, __, U2, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __ }; + private static final State[] FROM_EDIFACT_2 = { __, IC, IC, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __ }; + private static final State[] FROM_TRADACOMS_1 = { __, __, __, __, __, __, __, __, C2, __, __, __, __, __, __, __, __, __, __, __, __, __, __ }; + private static final State[] FROM_TRADACOMS_2 = { __, __, __, __, __, __, __, __, __, __, IC, __, __, __, __, __, __, __, __, __, __, __, __ }; + private static final State[] FROM_X12_1 = { __, __, __, __, __, __, __, X2, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __ }; + /* ^ 5 */ + private static final State[] FROM_X12_2 = { __, IC, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __ }; + + /******************* Common */ + private static final State[] FROM_HEADER = { HD, HD, HD, HD, HD, HD, HD, HD, HD, HD, HD, HD, HD, HZ, HE, HC, __, HR, HD, HD, HD, HV, HE }; + /* ^ 0 */ + private static final State[] FROM_HEADER_RELEASE = { HD, HD, HD, HD, HD, HD, HD, HD, HD, HD, HD, HD, HD, HD, HD, HD, HD, HD, HV, HV, HD, HV, HD }; + private static final State[] FROM_TAG_1 = { __, T2, T2, T2, T2, T2, T2, T2, T2, T2, T2, T2, T2, __, __, __, __, __, __, __, __, __, __ }; + private static final State[] FROM_TAG_2 = { __, T3, T3, T3, T3, T3, T3, T3, T3, T3, T3, T3, T3, SY, SB, __, __, __, __, __, __, __, __ }; + private static final State[] FROM_TAG_3 = { __, __, __, __, __, __, __, __, __, __, __, __, __, SY, SB, __, __, __, __, __, __, __, __ }; + private static final State[] FROM_ED = { ED, ED, ED, ED, ED, ED, ED, ED, ED, ED, ED, ED, ED, SE, EE, CE, ER, DR, EI, EI, ED, EI, __ }; + /* ^ 5 */ + private static final State[] FROM_DR = { ED, ED, ED, ED, ED, ED, ED, ED, ED, ED, ED, ED, ED, ED, ED, ED, ED, ED, EI, EI, ED, EI, ED }; + private static final State[] FROM_BD = { BD, BD, BD, BD, BD, BD, BD, BD, BD, BD, BD, BD, BD, BD, BD, BD, BD, BD, BD, BD, BD, BD, BD }; + private static final State[] FROM_BE = { __, __, __, __, __, __, __, __, __, __, __, __, __, SE, EE, __, __, __, __, __, __, __, __ }; + private static final State[] FROM_TRAILER = { TD, TD, TD, TD, TD, TD, TD, TD, TD, TD, TD, TD, TD, IE, TE, __, __, __, __, __, TD, __, __ }; + /* ^ 9 */ + + /******************* EDIFACT */ + private static final State[] FROM_TS_EDIFACT = { TS, T1, T1, T1, T1, T1, T1, T1, T1, U7, T1, T1, T1, __, __, __, __, __, TS, __, __, __, __ }; + /* ^ 10 (follows common) */ + private static final State[] FROM_EDIFACT_7 = { __, T2, T2, T2, T2, T2, U8, T2, T2, T2, T2, T2, T2, __, __, __, __, __, __, __, __, __, __ }; + private static final State[] FROM_EDIFACT_8 = { __, T3, T3, T3, T3, T3, T3, T3, T3, T3, T3, U9, T3, __, SB, __, __, __, __, __, __, __, __ }; + private static final State[] FROM_EDIFACT_9 = { __, __, __, __, __, __, __, __, __, __, __, __, __, __, TB, __, __, __, __, __, __, __, __ }; + private static final State[] FROM_EDIFACT_UNB_0 = { B0, __, __, __, __, __, __, __, B1, B1, __, __, __, __, __, __, __, __, B0, __, __, __, __ }; + private static final State[] FROM_EDIFACT_UNB_1 = { __, __, __, __, __, __, B2, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __ }; + private static final State[] FROM_EDIFACT_UNB_2 = { __, __, B3, __, __, __, __, __, __, __, __, __, __, __, BB, __, __, __, __, __, __, __, __ }; + private static final State[] FROM_EDIFACT_UNB_3 = { __, __, __, __, __, __, __, __, __, __, __, __, __, __, BB, __, __, __, __, __, __, __, __ }; + + /******************* TRADACOMS */ + private static final State[] FROM_TS_TRADACOMS = { TS, T1, T1, T1, C7, T1, T1, T1, T1, T1, T1, T1, T1, __, __, __, __, __, TS, __, __, __, __ }; + /* ^ 10 (follows common) */ + private static final State[] FROM_TRADACOMS_7 = { __, T2, T2, T2, T2, T2, C8, T2, T2, T2, T2, T2, T2, __, __, __, __, __, __, __, __, __, __ }; + private static final State[] FROM_TRADACOMS_8 = { __, T3, T3, C9, T3, T3, T3, T3, T3, T3, T3, T3, T3, __, __, __, __, __, __, __, __, __, SB }; + private static final State[] FROM_TRADACOMS_9 = { __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, TB }; + private static final State[] FROM_TAG_2_TRADACOMS = { __, T3, T3, T3, T3, T3, T3, T3, T3, T3, T3, T3, T3, SY, __, __, __, __, __, __, __, __, SB }; + private static final State[] FROM_TAG_3_TRADACOMS = { __, __, __, __, __, __, __, __, __, __, __, __, __, SY, __, __, __, __, __, __, __, __, SB }; + + /******************* X12 */ + private static final State[] FROM_TS_X12 = { TS, T1, T1, T1, T1, X7, T1, T1, T1, T1, T1, T1, T1, __, __, __, __, __, TS, __, __, __, __ }; + /* ^ 10 (follows common) */ + private static final State[] FROM_X12_7 = { __, T2, T2, T2, X8, T2, T2, T2, T2, T2, T2, T2, T2, __, __, __, __, __, __, __, __, __, __ }; + private static final State[] FROM_X12_8 = { __, X9, T3, T3, T3, T3, T3, T3, T3, T3, T3, T3, T3, __, SB, __, __, __, __, __, __, __, __ }; + private static final State[] FROM_X12_9 = { __, __, __, __, __, __, __, __, __, __, __, __, __, __, TB, __, __, __, __, __, __, __, __ }; + + private static final State[][] TRANSITION_INITIAL = { + FROM_INITIAL, + FROM_EDIFACT_1, + FROM_EDIFACT_2, + FROM_TRADACOMS_1, + FROM_TRADACOMS_2, + FROM_X12_1, + FROM_X12_2 + }; + + private static final State[][] TRANSITION_EDIFACT = { + // Common + FROM_HEADER, + FROM_HEADER_RELEASE, + FROM_TAG_1, + FROM_TAG_2, + FROM_TAG_3, + FROM_ED, + FROM_DR, + FROM_BD, + FROM_BE, + FROM_TRAILER, + // Dialect-specific + FROM_TS_EDIFACT, + FROM_EDIFACT_7, + FROM_EDIFACT_8, + FROM_EDIFACT_9, + FROM_EDIFACT_UNB_0, + FROM_EDIFACT_UNB_1, + FROM_EDIFACT_UNB_2, + FROM_EDIFACT_UNB_3 + }; + + private static final State[][] TRANSITION_TRADACOMS = { + // Common + FROM_HEADER, + FROM_HEADER_RELEASE, + FROM_TAG_1, + FROM_TAG_2_TRADACOMS, // Overrides common transitions + FROM_TAG_3_TRADACOMS, // Overrides common transitions + FROM_ED, + FROM_DR, + FROM_BD, + FROM_BE, + FROM_TRAILER, + // Dialect-specific + FROM_TS_TRADACOMS, + FROM_TRADACOMS_7, + FROM_TRADACOMS_8, + FROM_TRADACOMS_9 + }; + + private static final State[][] TRANSITION_X12 = { + // Common + FROM_HEADER, + FROM_HEADER_RELEASE, + FROM_TAG_1, + FROM_TAG_2, + FROM_TAG_3, + FROM_ED, + FROM_DR, + FROM_BD, + FROM_BE, + FROM_TRAILER, + // Dialect-specific + FROM_TS_X12, + FROM_X12_7, + FROM_X12_8, + FROM_X12_9 + }; + + private static final State[][][] TRANSITIONS = { + TRANSITION_INITIAL, + TRANSITION_EDIFACT, + TRANSITION_TRADACOMS, + TRANSITION_X12 + }; // @formatter:on - private int code; + private final int table; + private final int code; - State(int code) { + State(int table, int code) { + this.table = table; this.code = code; } - public State transition(CharacterClass clazz) { - return TRANSITION_TABLE[code][clazz.code]; + State(int code) { + this(-1, code); } + + public static State transition(State state, Dialect dialect, CharacterClass clazz) { + if (state.table != -1) { + /* + * A state's table is set to force transition to another table. For example, + * end of interchange states transition back to the unknown dialect transition + * table. + */ + return state.transition(state.table, clazz); + } + + Objects.requireNonNull(dialect, "dialect was unexpectedly null"); + return state.transition(dialect.getDialectStateCode(), clazz); + } + + public State transition(int dialect, CharacterClass clazz) { + return TRANSITIONS[dialect][code][clazz.code]; + } + } diff --git a/src/main/java/io/xlate/edi/internal/stream/tokenization/TradacomsDialect.java b/src/main/java/io/xlate/edi/internal/stream/tokenization/TradacomsDialect.java index fb5ed8f3..b8ce9bad 100644 --- a/src/main/java/io/xlate/edi/internal/stream/tokenization/TradacomsDialect.java +++ b/src/main/java/io/xlate/edi/internal/stream/tokenization/TradacomsDialect.java @@ -24,6 +24,7 @@ public class TradacomsDialect extends Dialect { public static final String MHD = "MHD"; private static final String[] EMPTY = new String[0]; + private static final int TRADACOMS_ELEMENT_OFFSET = 3; static final char DFLT_SEGMENT_TERMINATOR = '\''; static final char DFLT_DATA_ELEMENT_SEPARATOR = '+'; @@ -37,7 +38,7 @@ public class TradacomsDialect extends Dialect { private static final int TX_VERSION = 0; TradacomsDialect() { - super(new String[1]); + super(State.DialectCode.TRADACOMS, new String[1]); componentDelimiter = DFLT_COMPONENT_ELEMENT_SEPARATOR; elementDelimiter = DFLT_DATA_ELEMENT_SEPARATOR; decimalMark = 0; @@ -62,6 +63,7 @@ boolean initialize(CharacterSet characters) { initialized = true; characters.setClass(segmentDelimiter, CharacterClass.SEGMENT_DELIMITER); } else { + rejectionMessage = "Unable to obtain version from TRADACOMS header segment"; initialized = false; } @@ -97,7 +99,11 @@ public boolean appendHeader(CharacterSet characters, char value) { case 0: header = new StringBuilder(); break; - case 3: + case TRADACOMS_ELEMENT_OFFSET: + if (value != segmentTagTerminator) { + rejectionMessage = String.format("Expected TRADACOMS segment tag delimiter '%s', but found '%s'", segmentTagTerminator, value); + return false; + } /* * TRADACOMS delimiters are fixed. Do not set the element delimiter * until after the segment tag has been passed to prevent triggering @@ -124,7 +130,7 @@ public boolean appendHeader(CharacterSet characters, char value) { boolean processInterchangeHeader(CharacterSet characters, char value) { if (segmentDelimiter == value) { - rejected = !initialize(characters); + initialize(characters); return isConfirmed(); } diff --git a/src/main/java/io/xlate/edi/internal/stream/tokenization/X12Dialect.java b/src/main/java/io/xlate/edi/internal/stream/tokenization/X12Dialect.java index 2e2ed491..66025aa7 100644 --- a/src/main/java/io/xlate/edi/internal/stream/tokenization/X12Dialect.java +++ b/src/main/java/io/xlate/edi/internal/stream/tokenization/X12Dialect.java @@ -53,7 +53,7 @@ public class X12Dialect extends Dialect { private String groupVersion; X12Dialect() { - super(new String[2]); + super(State.DialectCode.X12, new String[2]); segmentDelimiter = DFLT_SEGMENT_TERMINATOR; elementDelimiter = DFLT_DATA_ELEMENT_SEPARATOR; decimalMark = '.'; @@ -80,6 +80,7 @@ boolean initialize(CharacterSet characters) { for (int i = 0, m = X12_ISA_LENGTH; i < m; i++) { if (ELEMENT == header[i] && X12_ISA_TOKENS[e++] != i) { + rejectionMessage = String.format("Unexpected element delimiter value '%s' in X12 header position %d", ELEMENT, i + 1); return false; } } @@ -150,7 +151,7 @@ public boolean appendHeader(CharacterSet characters, char value) { boolean proceed = true; if (index == X12_SEGMENT_OFFSET) { - rejected = !initialize(characters); + initialize(characters); proceed = isConfirmed(); } diff --git a/src/test/java/io/xlate/edi/internal/stream/StaEDIStreamReaderTest.java b/src/test/java/io/xlate/edi/internal/stream/StaEDIStreamReaderTest.java index 218687bf..86c043ba 100644 --- a/src/test/java/io/xlate/edi/internal/stream/StaEDIStreamReaderTest.java +++ b/src/test/java/io/xlate/edi/internal/stream/StaEDIStreamReaderTest.java @@ -1939,4 +1939,47 @@ void testDecimalScaleAvailableFromSchema() throws EDISchemaException, IOExceptio assertEquals(new BigDecimal("2554.38"), tds01); } + + /** + * Original issue: https://github.com/xlate/staedi/issues/174 + * + * @throws Exception + */ + @Test + void testOtherDialectTerminalSegmentsIgnored_Issue174() throws Exception { + EDIInputFactory factory = EDIInputFactory.newFactory(); + factory.setProperty(EDIInputFactory.EDI_VALIDATE_CONTROL_STRUCTURE, true); + factory.setProperty(EDIInputFactory.EDI_VALIDATE_CONTROL_CODE_VALUES, false); + EDIStreamReader reader = factory.createEDIStreamReader(getClass().getResourceAsStream("/EDIFACT/issue174/other_dialect_term_segments.edi")); + List unexpected = new ArrayList<>(); + + try { + while (reader.hasNext()) { + switch (reader.next()) { + case SEGMENT_ERROR: + case ELEMENT_OCCURRENCE_ERROR: + case ELEMENT_DATA_ERROR: + unexpected.add(reader.getErrorType()); + break; + default: + break; + } + } + } catch (Exception e) { + unexpected.add(e); + } finally { + reader.close(); + } + + assertEquals(0, unexpected.size()); + } + + @Test + void testTRADACOMS_IncorrectSegmentTagDelimiterIsInvalid() throws Exception { + EDIInputFactory factory = EDIInputFactory.newFactory(); + EDIStreamReader reader = factory.createEDIStreamReader(getClass().getResourceAsStream("/TRADACOMS/order-wrong-segment-tag-delimiter.edi")); + List unexpected = new ArrayList<>(); + EDIStreamException thrown = assertThrows(EDIStreamException.class, () -> reader.next()); + assertTrue(thrown.getMessage().contains("Expected TRADACOMS segment tag delimiter")); + } } diff --git a/src/test/resources/EDIFACT/issue174/other_dialect_term_segments.edi b/src/test/resources/EDIFACT/issue174/other_dialect_term_segments.edi new file mode 100644 index 00000000..f3ea5489 --- /dev/null +++ b/src/test/resources/EDIFACT/issue174/other_dialect_term_segments.edi @@ -0,0 +1,6 @@ +UNB+UNOA:3+005435656:1+006415160:1+210220:1605+00000000000001' +UNH+00000000000001+CUSTOM:D:97B:UN' +IEA+1:525' +END+1:525' +UNT+24+00000000000001' +UNZ+1+00000000000001' diff --git a/src/test/resources/TRADACOMS/order-wrong-segment-tag-delimiter.edi b/src/test/resources/TRADACOMS/order-wrong-segment-tag-delimiter.edi new file mode 100644 index 00000000..56a119aa --- /dev/null +++ b/src/test/resources/TRADACOMS/order-wrong-segment-tag-delimiter.edi @@ -0,0 +1,18 @@ +STX+ANA:1+5000000000000:SOME STORES LTD+5010000000000:SUPPLIER UK LTD+070315:130233+000007+PASSW+ORDHDR+B' +MHD+1+ORDHDR:9' +TYP+0430+NEW-ORDERS' +SDT+5010000000000:000030034' +CDT+5000000000000' +FIL+1630+1+070315' +MTR+6' +MHD+2+ORDERS:9' +CLO+5000000000283:89828+EAST SOMEWHERE DEPOT' +ORD+70970::070315' +DIN+070321++0000' +OLD+1+5010210000000++:00893592+12+60++++CRUSTY ROLLS:4 PACK' +OTR+1' +MTR+7' +MHD+3+ORDTLR:9' +OFT+1' +MTR+3' +END+3'