From 78d4f915a8d668fe90c94ff846376b6bd6b5b94a Mon Sep 17 00:00:00 2001 From: chrox Date: Thu, 24 Oct 2013 21:29:12 +0800 Subject: [PATCH 1/3] set auto detect word gap in reflowing by default --- defaults.lua | 6 +++--- frontend/document/koptinterface.lua | 1 - frontend/ui/data/koptoptions.lua | 2 +- koreader-base | 2 +- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/defaults.lua b/defaults.lua index ddb4a1c67..9fdce7758 100644 --- a/defaults.lua +++ b/defaults.lua @@ -48,13 +48,13 @@ DKOPTREADER_CONFIG_PAGE_MARGIN = 0.10 -- range from 0.0 to 1.0 DKOPTREADER_CONFIG_LINE_SPACING = 1.2 -- range from 0.5 to 2.0 DKOPTREADER_CONFIG_RENDER_QUALITY = 1.0 -- range from 0.5 to 2.0 DKOPTREADER_CONFIG_AUTO_STRAIGHTEN = 0 -- range from 0 to 10 -DKOPTREADER_CONFIG_JUSTIFICATION = -1 -- -1 = auto, 0 = left, 1 = center, 2 = right, 3 = full +DKOPTREADER_CONFIG_JUSTIFICATION = 3 -- -1 = auto, 0 = left, 1 = center, 2 = right, 3 = full DKOPTREADER_CONFIG_MAX_COLUMNS = 2 -- range from 1 to 4 DKOPTREADER_CONFIG_CONTRAST = 1.0 -- range from 0.2 to 2.0 -- word spacing for reflow -DKOPTREADER_CONFIG_WORD_SAPCINGS = {0.05, 0.15, 0.375} -- range from 0.05 to 0.5 -DKOPTREADER_CONFIG_DEFAULT_WORD_SAPCING = 0.15 -- range from 0.05 to 0.5 +DKOPTREADER_CONFIG_WORD_SAPCINGS = {0.05, -1, 0.375} -- range from 0.05 to 0.5 +DKOPTREADER_CONFIG_DEFAULT_WORD_SAPCING = -1 -- range from 0.05 to 0.5 -- document languages for OCR DKOPTREADER_CONFIG_DOC_LANGS_TEXT = {"English", "Chinese"} DKOPTREADER_CONFIG_DOC_LANGS_CODE = {"eng", "chi_sim"} -- language code, make sure you have corresponding training data diff --git a/frontend/document/koptinterface.lua b/frontend/document/koptinterface.lua index 51e3ebfb0..bdc5f773d 100644 --- a/frontend/document/koptinterface.lua +++ b/frontend/document/koptinterface.lua @@ -58,7 +58,6 @@ function KoptInterface:createContext(doc, pageno, bbox) lang == "jpn" or lang == "kor" then kc:setCJKChar() end - DEBUG("configurable", doc.configurable) kc:setLanguage(lang) kc:setTrim(doc.configurable.trim_page) kc:setWrap(doc.configurable.text_wrap) diff --git a/frontend/ui/data/koptoptions.lua b/frontend/ui/data/koptoptions.lua index 898994e3b..c1aa6d1d0 100644 --- a/frontend/ui/data/koptoptions.lua +++ b/frontend/ui/data/koptoptions.lua @@ -182,7 +182,7 @@ local KoptOptions = { { name = "word_spacing", name_text = S.WORD_GAP, - toggle = {S.SMALL, S.MEDIUM, S.LARGE}, + toggle = {S.SMALL, S.AUTO, S.LARGE}, values = DKOPTREADER_CONFIG_WORD_SAPCINGS, default_value = DKOPTREADER_CONFIG_DEFAULT_WORD_SAPCING, }, diff --git a/koreader-base b/koreader-base index 61f28fbbd..669fb3dad 160000 --- a/koreader-base +++ b/koreader-base @@ -1 +1 @@ -Subproject commit 61f28fbbddc11c2ed0a600529d68ac91f33e00d6 +Subproject commit 669fb3dad4738f8003ddb4d9bb8da16e95f7ef25 From 90d55cbde4143d2b6da742407fe398f4af71ff61 Mon Sep 17 00:00:00 2001 From: chrox Date: Thu, 24 Oct 2013 21:29:12 +0800 Subject: [PATCH 2/3] set auto detect word gap in reflowing by default --- defaults.lua | 6 +++--- frontend/document/koptinterface.lua | 1 - frontend/ui/data/koptoptions.lua | 2 +- koreader-base | 2 +- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/defaults.lua b/defaults.lua index ddb4a1c67..9fdce7758 100644 --- a/defaults.lua +++ b/defaults.lua @@ -48,13 +48,13 @@ DKOPTREADER_CONFIG_PAGE_MARGIN = 0.10 -- range from 0.0 to 1.0 DKOPTREADER_CONFIG_LINE_SPACING = 1.2 -- range from 0.5 to 2.0 DKOPTREADER_CONFIG_RENDER_QUALITY = 1.0 -- range from 0.5 to 2.0 DKOPTREADER_CONFIG_AUTO_STRAIGHTEN = 0 -- range from 0 to 10 -DKOPTREADER_CONFIG_JUSTIFICATION = -1 -- -1 = auto, 0 = left, 1 = center, 2 = right, 3 = full +DKOPTREADER_CONFIG_JUSTIFICATION = 3 -- -1 = auto, 0 = left, 1 = center, 2 = right, 3 = full DKOPTREADER_CONFIG_MAX_COLUMNS = 2 -- range from 1 to 4 DKOPTREADER_CONFIG_CONTRAST = 1.0 -- range from 0.2 to 2.0 -- word spacing for reflow -DKOPTREADER_CONFIG_WORD_SAPCINGS = {0.05, 0.15, 0.375} -- range from 0.05 to 0.5 -DKOPTREADER_CONFIG_DEFAULT_WORD_SAPCING = 0.15 -- range from 0.05 to 0.5 +DKOPTREADER_CONFIG_WORD_SAPCINGS = {0.05, -1, 0.375} -- range from 0.05 to 0.5 +DKOPTREADER_CONFIG_DEFAULT_WORD_SAPCING = -1 -- range from 0.05 to 0.5 -- document languages for OCR DKOPTREADER_CONFIG_DOC_LANGS_TEXT = {"English", "Chinese"} DKOPTREADER_CONFIG_DOC_LANGS_CODE = {"eng", "chi_sim"} -- language code, make sure you have corresponding training data diff --git a/frontend/document/koptinterface.lua b/frontend/document/koptinterface.lua index 51e3ebfb0..bdc5f773d 100644 --- a/frontend/document/koptinterface.lua +++ b/frontend/document/koptinterface.lua @@ -58,7 +58,6 @@ function KoptInterface:createContext(doc, pageno, bbox) lang == "jpn" or lang == "kor" then kc:setCJKChar() end - DEBUG("configurable", doc.configurable) kc:setLanguage(lang) kc:setTrim(doc.configurable.trim_page) kc:setWrap(doc.configurable.text_wrap) diff --git a/frontend/ui/data/koptoptions.lua b/frontend/ui/data/koptoptions.lua index 898994e3b..c1aa6d1d0 100644 --- a/frontend/ui/data/koptoptions.lua +++ b/frontend/ui/data/koptoptions.lua @@ -182,7 +182,7 @@ local KoptOptions = { { name = "word_spacing", name_text = S.WORD_GAP, - toggle = {S.SMALL, S.MEDIUM, S.LARGE}, + toggle = {S.SMALL, S.AUTO, S.LARGE}, values = DKOPTREADER_CONFIG_WORD_SAPCINGS, default_value = DKOPTREADER_CONFIG_DEFAULT_WORD_SAPCING, }, diff --git a/koreader-base b/koreader-base index 61f28fbbd..669fb3dad 160000 --- a/koreader-base +++ b/koreader-base @@ -1 +1 @@ -Subproject commit 61f28fbbddc11c2ed0a600529d68ac91f33e00d6 +Subproject commit 669fb3dad4738f8003ddb4d9bb8da16e95f7ef25 From 184a6f5d21a4ec95dcad23bf467c7636a229992f Mon Sep 17 00:00:00 2001 From: chrox Date: Thu, 24 Oct 2013 23:38:03 +0800 Subject: [PATCH 3/3] highlight word from scratch instead of reusing rectmaps in reflowing mode Totally revert the OCR in reflowed page to build 545. And this should fix #309. --- frontend/document/koptinterface.lua | 40 ++++++++++++++++++++++++----- koreader-base | 2 +- 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/frontend/document/koptinterface.lua b/frontend/document/koptinterface.lua index bdc5f773d..e640ad730 100644 --- a/frontend/document/koptinterface.lua +++ b/frontend/document/koptinterface.lua @@ -338,9 +338,8 @@ function KoptInterface:getTextBoxes(doc, pageno) else if doc.configurable.text_wrap == 1 then return self:getNativeTextBoxes(doc, pageno) - --return self:getTextBoxesFromScratch(doc, pageno) else - return self:getTextBoxesFromScratch(doc, pageno) + return self:getNativeTextBoxesFromScratch(doc, pageno) end end end @@ -393,12 +392,39 @@ function KoptInterface:getNativeTextBoxes(doc, pageno) end end +--[[ +get text boxes in reflowed page via optical method, +i.e. OCR pre-processing in Tesseract and Leptonica. +--]] +function KoptInterface:getReflowedTextBoxesFromScratch(doc, pageno) + local bbox = doc:getPageBBox(pageno) + local context_hash = self:getContextHash(doc, pageno, bbox) + local hash = "scratchrfpgboxes|"..context_hash + local cached = Cache:check(hash) + if not cached then + local kctx_hash = "kctx|"..context_hash + local cached = Cache:check(kctx_hash) + if cached then + local reflowed_kc = self:waitForContext(cached.kctx) + local fullwidth, fullheight = reflowed_kc:getPageDim() + local kc = self:createContext(doc, pageno) + kc:copyDestBMP(reflowed_kc) + local boxes = kc:getNativeWordBoxes(0, 0, fullwidth, fullheight) + Cache:insert(hash, CacheItem:new{ scratchrfpgboxes = boxes }) + kc:free() + return boxes + end + else + return cached.scratchrfpgboxes + end +end + --[[ get text boxes in native page via optical method, i.e. OCR pre-processing in Tesseract and Leptonica. --]] -function KoptInterface:getTextBoxesFromScratch(doc, pageno) - local hash = "pgboxes|"..doc.file.."|"..pageno +function KoptInterface:getNativeTextBoxesFromScratch(doc, pageno) + local hash = "scratchnativepgboxes|"..doc.file.."|"..pageno local cached = Cache:check(hash) if not cached then local page_size = Document.getNativePageDimensions(doc, pageno) @@ -412,12 +438,12 @@ function KoptInterface:getTextBoxesFromScratch(doc, pageno) local page = doc._document:openPage(pageno) page:getPagePix(kc) local boxes = kc:getNativeWordBoxes(0, 0, page_size.w, page_size.h) - Cache:insert(hash, CacheItem:new{ pgboxes = boxes }) + Cache:insert(hash, CacheItem:new{ scratchnativepgboxes = boxes }) page:close() kc:free() return boxes else - return cached.pgboxes + return cached.scratchnativepgboxes end end @@ -643,7 +669,7 @@ get word and word box from position in reflowed page ]]-- function KoptInterface:getWordFromReflowPosition(doc, boxes, pos) local pageno = pos.page - local reflowed_page_boxes = self:getReflowedTextBoxes(doc, pageno) + local reflowed_page_boxes = self:getReflowedTextBoxesFromScratch(doc, pageno) local reflowed_word_box = self:getWordFromBoxes(reflowed_page_boxes, pos) local reflowed_pos = reflowed_word_box.box:center() local native_pos = self:reflowToNativePosTransform(doc, pageno, reflowed_pos) diff --git a/koreader-base b/koreader-base index 669fb3dad..a2b327dfe 160000 --- a/koreader-base +++ b/koreader-base @@ -1 +1 @@ -Subproject commit 669fb3dad4738f8003ddb4d9bb8da16e95f7ef25 +Subproject commit a2b327dfe1613734eb3600f130be941fb7a1e717