Skip to content

Commit f852576

Browse files
authored
Merge pull request #3462 from Kumataro:3.4-fixc3457
text: change default char_whitelist parameter.
2 parents c60fde0 + 42fc489 commit f852576

File tree

2 files changed

+7
-3
lines changed

2 files changed

+7
-3
lines changed

modules/text/include/opencv2/text/ocr.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,14 +153,16 @@ class CV_EXPORTS_W OCRTesseract : public BaseOCR
153153
@param datapath the name of the parent directory of tessdata ended with "/", or NULL to use the
154154
system's default directory.
155155
@param language an ISO 639-3 code or NULL will default to "eng".
156-
@param char_whitelist specifies the list of characters used for recognition. NULL defaults to
157-
"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".
156+
@param char_whitelist specifies the list of characters used for recognition. NULL defaults to ""
157+
(All characters will be used for recognition).
158158
@param oem tesseract-ocr offers different OCR Engine Modes (OEM), by default
159159
tesseract::OEM_DEFAULT is used. See the tesseract-ocr API documentation for other possible
160160
values.
161161
@param psmode tesseract-ocr offers different Page Segmentation Modes (PSM) tesseract::PSM_AUTO
162162
(fully automatic layout analysis) is used. See the tesseract-ocr API documentation for other
163163
possible values.
164+
165+
@note The char_whitelist default is changed after OpenCV 4.7.0/3.19.0 from "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" to "".
164166
*/
165167
CV_WRAP static Ptr<OCRTesseract> create(const char* datapath=NULL, const char* language=NULL,
166168
const char* char_whitelist=NULL, int oem=OEM_DEFAULT, int psmode=PSM_AUTO);

modules/text/src/ocr_tesseract.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,10 +163,12 @@ class OCRTesseractImpl CV_FINAL : public OCRTesseract
163163
tesseract::PageSegMode pagesegmode = (tesseract::PageSegMode)psmode;
164164
tess.SetPageSegMode(pagesegmode);
165165

166+
// tessedit_whitelist default changes from [0-9a-zA-Z] to "".
167+
// See https://github.com/opencv/opencv_contrib/issues/3457
166168
if(char_whitelist != NULL)
167169
tess.SetVariable("tessedit_char_whitelist", char_whitelist);
168170
else
169-
tess.SetVariable("tessedit_char_whitelist", "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
171+
tess.SetVariable("tessedit_char_whitelist", "");
170172

171173
tess.SetVariable("save_best_choices", "T");
172174
#else

0 commit comments

Comments
 (0)