From 18d2ec67612597837b2215ef4665ed9c81891eda Mon Sep 17 00:00:00 2001 From: Benoit Pierre Date: Mon, 11 Nov 2024 12:44:22 +0100 Subject: [PATCH] kopt: fix OCR segmentation mode (#12726) Previously unused by `libk2pdfopt`, the `ocr_type` argument passed to `k2pdfopt_tocr_single_word` and forwarded to `ocrtess_ocrwords_from_bmp8` now has a big impact for some languages (e.g. Arabic). --- frontend/document/koptinterface.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/document/koptinterface.lua b/frontend/document/koptinterface.lua index 4d84748aa..f64a28a1a 100644 --- a/frontend/document/koptinterface.lua +++ b/frontend/document/koptinterface.lua @@ -24,7 +24,7 @@ local KoptInterface = { -- in `$TESSDATA_PREFIX/` on more recent versions). tessocr_data = not os.getenv('TESSDATA_PREFIX') and DataStorage:getDataDir().."/data/tessdata" or nil, ocr_lang = "eng", - ocr_type = 3, -- default 0, for more accuracy use 3 + ocr_type = -1, -- default: assume a single uniform block of text. last_context_size = nil, default_context_size = 1024*1024, }