From a7fb90c98955ebf6a78d80a9aa24579eff2a7981 Mon Sep 17 00:00:00 2001 From: Markus Fisch Date: Thu, 12 Oct 2023 16:41:46 +0200 Subject: [PATCH] DataMatrix: use charset for encoding (#628) Support non-ISO8859_1 encodings for DataMatrix generation. This fixes #592. --- core/src/MultiFormatWriter.cpp | 8 +++++++- core/src/datamatrix/DMHighLevelEncoder.cpp | 10 +++++++--- core/src/datamatrix/DMHighLevelEncoder.h | 4 +++- core/src/datamatrix/DMWriter.cpp | 6 ++++-- core/src/datamatrix/DMWriter.h | 7 +++++++ test/unit/datamatrix/DMHighLevelEncodeTest.cpp | 3 ++- 6 files changed, 30 insertions(+), 8 deletions(-) diff --git a/core/src/MultiFormatWriter.cpp b/core/src/MultiFormatWriter.cpp index b278d848dd..e8d48471cc 100644 --- a/core/src/MultiFormatWriter.cpp +++ b/core/src/MultiFormatWriter.cpp @@ -49,9 +49,15 @@ MultiFormatWriter::encode(const std::wstring& contents, int width, int height) c return exec0(std::move(writer)); }; + auto exec2 = [&](auto&& writer) { + if (_encoding != CharacterSet::Unknown) + writer.setEncoding(_encoding); + return exec0(std::move(writer)); + }; + switch (_format) { case BarcodeFormat::Aztec: return exec1(Aztec::Writer(), AztecEccLevel); - case BarcodeFormat::DataMatrix: return exec0(DataMatrix::Writer()); + case BarcodeFormat::DataMatrix: return exec2(DataMatrix::Writer()); case BarcodeFormat::PDF417: return exec1(Pdf417::Writer(), Pdf417EccLevel); case BarcodeFormat::QRCode: return exec1(QRCode::Writer(), QRCodeEccLevel); case BarcodeFormat::Codabar: return exec0(OneD::CodabarWriter()); diff --git a/core/src/datamatrix/DMHighLevelEncoder.cpp b/core/src/datamatrix/DMHighLevelEncoder.cpp index 9741de5473..db0859347f 100644 --- a/core/src/datamatrix/DMHighLevelEncoder.cpp +++ b/core/src/datamatrix/DMHighLevelEncoder.cpp @@ -857,7 +857,7 @@ static bool EndsWith(const std::wstring& s, const std::wstring& ss) ByteArray Encode(const std::wstring& msg) { - return Encode(msg, SymbolShape::NONE, -1, -1, -1, -1); + return Encode(msg, CharacterSet::ISO8859_1, SymbolShape::NONE, -1, -1, -1, -1); } /** @@ -871,7 +871,7 @@ ByteArray Encode(const std::wstring& msg) * @param maxSize the maximum symbol size constraint or null for no constraint * @return the encoded message (the char values range from 0 to 255) */ -ByteArray Encode(const std::wstring& msg, SymbolShape shape, int minWidth, int minHeight, int maxWidth, int maxHeight) +ByteArray Encode(const std::wstring& msg, CharacterSet charset, SymbolShape shape, int minWidth, int minHeight, int maxWidth, int maxHeight) { //the codewords 0..255 are encoded as Unicode characters //Encoder[] encoders = { @@ -879,7 +879,11 @@ ByteArray Encode(const std::wstring& msg, SymbolShape shape, int minWidth, int m // new X12Encoder(), new EdifactEncoder(), new Base256Encoder() //}; - EncoderContext context(TextEncoder::FromUnicode(msg, CharacterSet::ISO8859_1)); + if (charset == CharacterSet::Unknown) { + charset = CharacterSet::ISO8859_1; + } + + EncoderContext context(TextEncoder::FromUnicode(msg, charset)); context.setSymbolShape(shape); context.setSizeConstraints(minWidth, minHeight, maxWidth, maxHeight); diff --git a/core/src/datamatrix/DMHighLevelEncoder.h b/core/src/datamatrix/DMHighLevelEncoder.h index 1a17587300..8f1e589707 100644 --- a/core/src/datamatrix/DMHighLevelEncoder.h +++ b/core/src/datamatrix/DMHighLevelEncoder.h @@ -6,6 +6,8 @@ #pragma once +#include "CharacterSet.h" + #include namespace ZXing { @@ -21,7 +23,7 @@ enum class SymbolShape; * annex S. */ ByteArray Encode(const std::wstring& msg); -ByteArray Encode(const std::wstring& msg, SymbolShape shape, int minWidth, int minHeight, int maxWidth, int maxHeight); +ByteArray Encode(const std::wstring& msg, CharacterSet encoding, SymbolShape shape, int minWidth, int minHeight, int maxWidth, int maxHeight); } // DataMatrix } // ZXing diff --git a/core/src/datamatrix/DMWriter.cpp b/core/src/datamatrix/DMWriter.cpp index e3ab758de3..a6723843c3 100644 --- a/core/src/datamatrix/DMWriter.cpp +++ b/core/src/datamatrix/DMWriter.cpp @@ -8,6 +8,7 @@ #include "BitMatrix.h" #include "ByteArray.h" +#include "CharacterSet.h" #include "DMBitLayout.h" #include "DMECEncoder.h" #include "DMHighLevelEncoder.h" @@ -75,7 +76,8 @@ static BitMatrix EncodeLowLevel(const BitMatrix& placement, const SymbolInfo& sy } Writer::Writer() : - _shapeHint(SymbolShape::NONE) + _shapeHint(SymbolShape::NONE), + _encoding(CharacterSet::Unknown) { } @@ -91,7 +93,7 @@ Writer::encode(const std::wstring& contents, int width, int height) const } //1. step: Data encodation - auto encoded = Encode(contents, _shapeHint, _minWidth, _minHeight, _maxWidth, _maxHeight); + auto encoded = Encode(contents, _encoding, _shapeHint, _minWidth, _minHeight, _maxWidth, _maxHeight); const SymbolInfo* symbolInfo = SymbolInfo::Lookup(Size(encoded), _shapeHint, _minWidth, _minHeight, _maxWidth, _maxHeight); if (symbolInfo == nullptr) { throw std::invalid_argument("Can't find a symbol arrangement that matches the message. Data codewords: " + std::to_string(encoded.size())); diff --git a/core/src/datamatrix/DMWriter.h b/core/src/datamatrix/DMWriter.h index c0e3a43ee4..896f0036e8 100644 --- a/core/src/datamatrix/DMWriter.h +++ b/core/src/datamatrix/DMWriter.h @@ -6,6 +6,7 @@ #pragma once +#include "CharacterSet.h" #include "DMSymbolShape.h" #include @@ -43,12 +44,18 @@ class Writer return *this; } + Writer& setEncoding(CharacterSet encoding) { + _encoding = encoding; + return *this; + } + BitMatrix encode(const std::wstring& contents, int width, int height) const; BitMatrix encode(const std::string& contents, int width, int height) const; private: SymbolShape _shapeHint; int _quietZone = 1, _minWidth = -1, _minHeight = -1, _maxWidth = -1, _maxHeight = -1; + CharacterSet _encoding; }; } // DataMatrix diff --git a/test/unit/datamatrix/DMHighLevelEncodeTest.cpp b/test/unit/datamatrix/DMHighLevelEncodeTest.cpp index cf03ef4e62..fd436b8e8b 100644 --- a/test/unit/datamatrix/DMHighLevelEncodeTest.cpp +++ b/test/unit/datamatrix/DMHighLevelEncodeTest.cpp @@ -5,6 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 #include "ByteArray.h" +#include "CharacterSet.h" #include "ZXAlgorithms.h" #include "datamatrix/DMHighLevelEncoder.h" #include "datamatrix/DMSymbolInfo.h" @@ -346,7 +347,7 @@ TEST(DMHighLevelEncodeTest, EncodingWithStartAsX12AndLatchToEDIFACTInTheMiddle) TEST(DMHighLevelEncodeTest, EDIFACTWithEODBug) { std::string visualized = Visualize( - DataMatrix::Encode(L"abc<->ABCDE", DataMatrix::SymbolShape::SQUARE, -1, -1, -1, -1)); + DataMatrix::Encode(L"abc<->ABCDE", CharacterSet::ISO8859_1, DataMatrix::SymbolShape::SQUARE, -1, -1, -1, -1)); // switch to EDIFACT on '<', uses 10 code words + 2 padding. Buggy code introduced invalid 254 after the 5 EXPECT_EQ(visualized, "98 99 100 240 242 223 129 8 49 5 129 147"); }