mirror of
https://github.com/koreader/koreader.git
synced 2025-08-10 00:52:38 +00:00
Merge pull request #340 from chrox/master
set auto detect word gap in reflowing by default
This commit is contained in:
@@ -48,13 +48,13 @@ DKOPTREADER_CONFIG_PAGE_MARGIN = 0.10 -- range from 0.0 to 1.0
|
||||
DKOPTREADER_CONFIG_LINE_SPACING = 1.2 -- range from 0.5 to 2.0
|
||||
DKOPTREADER_CONFIG_RENDER_QUALITY = 1.0 -- range from 0.5 to 2.0
|
||||
DKOPTREADER_CONFIG_AUTO_STRAIGHTEN = 0 -- range from 0 to 10
|
||||
DKOPTREADER_CONFIG_JUSTIFICATION = -1 -- -1 = auto, 0 = left, 1 = center, 2 = right, 3 = full
|
||||
DKOPTREADER_CONFIG_JUSTIFICATION = 3 -- -1 = auto, 0 = left, 1 = center, 2 = right, 3 = full
|
||||
DKOPTREADER_CONFIG_MAX_COLUMNS = 2 -- range from 1 to 4
|
||||
DKOPTREADER_CONFIG_CONTRAST = 1.0 -- range from 0.2 to 2.0
|
||||
|
||||
-- word spacing for reflow
|
||||
DKOPTREADER_CONFIG_WORD_SAPCINGS = {0.05, 0.15, 0.375} -- range from 0.05 to 0.5
|
||||
DKOPTREADER_CONFIG_DEFAULT_WORD_SAPCING = 0.15 -- range from 0.05 to 0.5
|
||||
DKOPTREADER_CONFIG_WORD_SAPCINGS = {0.05, -1, 0.375} -- range from 0.05 to 0.5
|
||||
DKOPTREADER_CONFIG_DEFAULT_WORD_SAPCING = -1 -- range from 0.05 to 0.5
|
||||
-- document languages for OCR
|
||||
DKOPTREADER_CONFIG_DOC_LANGS_TEXT = {"English", "Chinese"}
|
||||
DKOPTREADER_CONFIG_DOC_LANGS_CODE = {"eng", "chi_sim"} -- language code, make sure you have corresponding training data
|
||||
|
||||
@@ -58,7 +58,6 @@ function KoptInterface:createContext(doc, pageno, bbox)
|
||||
lang == "jpn" or lang == "kor" then
|
||||
kc:setCJKChar()
|
||||
end
|
||||
DEBUG("configurable", doc.configurable)
|
||||
kc:setLanguage(lang)
|
||||
kc:setTrim(doc.configurable.trim_page)
|
||||
kc:setWrap(doc.configurable.text_wrap)
|
||||
@@ -339,9 +338,8 @@ function KoptInterface:getTextBoxes(doc, pageno)
|
||||
else
|
||||
if doc.configurable.text_wrap == 1 then
|
||||
return self:getNativeTextBoxes(doc, pageno)
|
||||
--return self:getTextBoxesFromScratch(doc, pageno)
|
||||
else
|
||||
return self:getTextBoxesFromScratch(doc, pageno)
|
||||
return self:getNativeTextBoxesFromScratch(doc, pageno)
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -394,12 +392,39 @@ function KoptInterface:getNativeTextBoxes(doc, pageno)
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
get text boxes in reflowed page via optical method,
|
||||
i.e. OCR pre-processing in Tesseract and Leptonica.
|
||||
--]]
|
||||
function KoptInterface:getReflowedTextBoxesFromScratch(doc, pageno)
|
||||
local bbox = doc:getPageBBox(pageno)
|
||||
local context_hash = self:getContextHash(doc, pageno, bbox)
|
||||
local hash = "scratchrfpgboxes|"..context_hash
|
||||
local cached = Cache:check(hash)
|
||||
if not cached then
|
||||
local kctx_hash = "kctx|"..context_hash
|
||||
local cached = Cache:check(kctx_hash)
|
||||
if cached then
|
||||
local reflowed_kc = self:waitForContext(cached.kctx)
|
||||
local fullwidth, fullheight = reflowed_kc:getPageDim()
|
||||
local kc = self:createContext(doc, pageno)
|
||||
kc:copyDestBMP(reflowed_kc)
|
||||
local boxes = kc:getNativeWordBoxes(0, 0, fullwidth, fullheight)
|
||||
Cache:insert(hash, CacheItem:new{ scratchrfpgboxes = boxes })
|
||||
kc:free()
|
||||
return boxes
|
||||
end
|
||||
else
|
||||
return cached.scratchrfpgboxes
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
get text boxes in native page via optical method,
|
||||
i.e. OCR pre-processing in Tesseract and Leptonica.
|
||||
--]]
|
||||
function KoptInterface:getTextBoxesFromScratch(doc, pageno)
|
||||
local hash = "pgboxes|"..doc.file.."|"..pageno
|
||||
function KoptInterface:getNativeTextBoxesFromScratch(doc, pageno)
|
||||
local hash = "scratchnativepgboxes|"..doc.file.."|"..pageno
|
||||
local cached = Cache:check(hash)
|
||||
if not cached then
|
||||
local page_size = Document.getNativePageDimensions(doc, pageno)
|
||||
@@ -413,12 +438,12 @@ function KoptInterface:getTextBoxesFromScratch(doc, pageno)
|
||||
local page = doc._document:openPage(pageno)
|
||||
page:getPagePix(kc)
|
||||
local boxes = kc:getNativeWordBoxes(0, 0, page_size.w, page_size.h)
|
||||
Cache:insert(hash, CacheItem:new{ pgboxes = boxes })
|
||||
Cache:insert(hash, CacheItem:new{ scratchnativepgboxes = boxes })
|
||||
page:close()
|
||||
kc:free()
|
||||
return boxes
|
||||
else
|
||||
return cached.pgboxes
|
||||
return cached.scratchnativepgboxes
|
||||
end
|
||||
end
|
||||
|
||||
@@ -644,7 +669,7 @@ get word and word box from position in reflowed page
|
||||
]]--
|
||||
function KoptInterface:getWordFromReflowPosition(doc, boxes, pos)
|
||||
local pageno = pos.page
|
||||
local reflowed_page_boxes = self:getReflowedTextBoxes(doc, pageno)
|
||||
local reflowed_page_boxes = self:getReflowedTextBoxesFromScratch(doc, pageno)
|
||||
local reflowed_word_box = self:getWordFromBoxes(reflowed_page_boxes, pos)
|
||||
local reflowed_pos = reflowed_word_box.box:center()
|
||||
local native_pos = self:reflowToNativePosTransform(doc, pageno, reflowed_pos)
|
||||
|
||||
@@ -182,7 +182,7 @@ local KoptOptions = {
|
||||
{
|
||||
name = "word_spacing",
|
||||
name_text = S.WORD_GAP,
|
||||
toggle = {S.SMALL, S.MEDIUM, S.LARGE},
|
||||
toggle = {S.SMALL, S.AUTO, S.LARGE},
|
||||
values = DKOPTREADER_CONFIG_WORD_SAPCINGS,
|
||||
default_value = DKOPTREADER_CONFIG_DEFAULT_WORD_SAPCING,
|
||||
},
|
||||
|
||||
Submodule koreader-base updated: 61f28fbbdd...a2b327dfe1
Reference in New Issue
Block a user