mirror of
https://github.com/koreader/koreader.git
synced 2025-08-10 00:52:38 +00:00
synchronize hightlights in non-/reflowing modes
and there is no need to OCR word if we can extract text from original page in reflowing mode thanks to the rect maps between normal page and reflowed page.
This commit is contained in:
@@ -1,7 +1,5 @@
|
||||
require "cache"
|
||||
require "ui/geometry"
|
||||
require "ui/screen"
|
||||
require "ui/device"
|
||||
require "ui/reader/readerconfig"
|
||||
require "ui/data/koptoptions"
|
||||
require "document/koptinterface"
|
||||
@@ -11,8 +9,6 @@ DjvuDocument = Document:new{
|
||||
-- libdjvulibre manages its own additional cache, default value is hard written in c module.
|
||||
djvulibre_cache_size = nil,
|
||||
dc_null = DrawContext.new(),
|
||||
screen_size = Screen:getSize(),
|
||||
screen_dpi = Device:getModel() == "KindlePaperWhite" and 212 or 167,
|
||||
options = KoptOptions,
|
||||
configurable = Configurable,
|
||||
koptinterface = KoptInterface,
|
||||
@@ -47,35 +43,6 @@ function validDjvuFile(filename)
|
||||
return true
|
||||
end
|
||||
|
||||
function DjvuDocument:getTextBoxes(pageno)
|
||||
if self.configurable.text_wrap == 1 then
|
||||
return self.koptinterface:getReflewTextBoxes(self, pageno)
|
||||
else
|
||||
local text = self._document:getPageText(pageno)
|
||||
if not text or #text == 0 then
|
||||
return self.koptinterface:getTextBoxes(self, pageno)
|
||||
else
|
||||
return text
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
function DjvuDocument:getOCRWord(pageno, rect)
|
||||
if self.configurable.text_wrap == 1 then
|
||||
return self.koptinterface:getReflewOCRWord(self, pageno, rect)
|
||||
else
|
||||
return self.koptinterface:getOCRWord(self, pageno, rect)
|
||||
end
|
||||
end
|
||||
|
||||
function DjvuDocument:getUsedBBox(pageno)
|
||||
-- djvu does not support usedbbox, so fake it.
|
||||
local used = {}
|
||||
local native_dim = self:getNativePageDimensions(pageno)
|
||||
used.x0, used.y0, used.x1, used.y1 = 0, 0, native_dim.w, native_dim.h
|
||||
return used
|
||||
end
|
||||
|
||||
function DjvuDocument:invertTextYAxel(pageno, text_table)
|
||||
local _, height = self.doc:getOriginalPageSize(pageno)
|
||||
for _,text in pairs(text_table) do
|
||||
@@ -86,44 +53,52 @@ function DjvuDocument:invertTextYAxel(pageno, text_table)
|
||||
return text_table
|
||||
end
|
||||
|
||||
function DjvuDocument:getPageTextBoxes(pageno)
|
||||
return self._document:getPageText(pageno)
|
||||
end
|
||||
|
||||
function DjvuDocument:getWordFromPosition(spos)
|
||||
return self.koptinterface:getWordFromPosition(self, spos)
|
||||
end
|
||||
|
||||
function DjvuDocument:getTextFromPositions(spos0, spos1)
|
||||
return self.koptinterface:getTextFromPositions(self, spos0, spos1)
|
||||
end
|
||||
|
||||
function DjvuDocument:getPageBoxesFromPositions(pageno, ppos0, ppos1)
|
||||
return self.koptinterface:getPageBoxesFromPositions(self, pageno, ppos0, ppos1)
|
||||
end
|
||||
|
||||
function DjvuDocument:getOCRWord(pageno, rect)
|
||||
return self.koptinterface:getOCRWord(self, pageno, rect)
|
||||
end
|
||||
|
||||
function DjvuDocument:getUsedBBox(pageno)
|
||||
-- djvu does not support usedbbox, so fake it.
|
||||
local used = {}
|
||||
local native_dim = self:getNativePageDimensions(pageno)
|
||||
used.x0, used.y0, used.x1, used.y1 = 0, 0, native_dim.w, native_dim.h
|
||||
return used
|
||||
end
|
||||
|
||||
function DjvuDocument:getPageBBox(pageno)
|
||||
if self.configurable.text_wrap ~= 1 and self.configurable.trim_page > 0 then
|
||||
return self.koptinterface:getAutoBBox(self, pageno)
|
||||
else
|
||||
return Document.getPageBBox(self, pageno)
|
||||
end
|
||||
return self.koptinterface:getPageBBox(self, pageno)
|
||||
end
|
||||
|
||||
function DjvuDocument:getPageDimensions(pageno, zoom, rotation)
|
||||
if self.configurable.text_wrap == 1 then
|
||||
return self.koptinterface:getPageDimensions(self, pageno, zoom, rotation)
|
||||
else
|
||||
return Document.getPageDimensions(self, pageno, zoom, rotation)
|
||||
end
|
||||
return self.koptinterface:getPageDimensions(self, pageno, zoom, rotation)
|
||||
end
|
||||
|
||||
function DjvuDocument:renderPage(pageno, rect, zoom, rotation, gamma, render_mode)
|
||||
if self.configurable.text_wrap == 1 then
|
||||
return self.koptinterface:renderPage(self, pageno, rect, zoom, rotation, render_mode)
|
||||
else
|
||||
return Document.renderPage(self, pageno, rect, zoom, rotation, gamma, render_mode)
|
||||
end
|
||||
return self.koptinterface:renderPage(self, pageno, rect, zoom, rotation, gamma, render_mode)
|
||||
end
|
||||
|
||||
function DjvuDocument:hintPage(pageno, zoom, rotation, gamma, render_mode)
|
||||
if self.configurable.text_wrap == 1 then
|
||||
self.koptinterface:hintPage(self, pageno, zoom, rotation, gamma, render_mode)
|
||||
else
|
||||
Document.hintPage(self, pageno, zoom, rotation, gamma, render_mode)
|
||||
end
|
||||
return self.koptinterface:hintPage(self, pageno, zoom, rotation, gamma, render_mode)
|
||||
end
|
||||
|
||||
function DjvuDocument:drawPage(target, x, y, rect, pageno, zoom, rotation, gamma, render_mode)
|
||||
if self.configurable.text_wrap == 1 then
|
||||
self.koptinterface:drawPage(self, target, x, y, rect, pageno, zoom, rotation, render_mode)
|
||||
else
|
||||
Document.drawPage(self, target, x, y, rect, pageno, zoom, rotation, gamma, render_mode)
|
||||
end
|
||||
return self.koptinterface:drawPage(self, target, x, y, rect, pageno, zoom, rotation, gamma, render_mode)
|
||||
end
|
||||
|
||||
DocumentRegistry:addProvider("djvu", "application/djvu", DjvuDocument)
|
||||
|
||||
@@ -2,6 +2,7 @@ require "dbg"
|
||||
require "cache"
|
||||
require "ui/geometry"
|
||||
require "ui/device"
|
||||
require "ui/screen"
|
||||
require "ui/reader/readerconfig"
|
||||
|
||||
KoptInterface = {
|
||||
@@ -10,6 +11,7 @@ KoptInterface = {
|
||||
ocr_type = 3, -- default 0, for more accuracy use 3
|
||||
last_context_size = nil,
|
||||
default_context_size = 1024*1024,
|
||||
screen_dpi = Screen:getDPI(),
|
||||
}
|
||||
|
||||
ContextCacheItem = CacheItem:new{}
|
||||
@@ -60,7 +62,7 @@ function KoptInterface:createContext(doc, pageno, bbox)
|
||||
kc:setRotate(doc.configurable.screen_rotation)
|
||||
kc:setColumns(doc.configurable.max_columns)
|
||||
kc:setDeviceDim(screen_size.w, screen_size.h)
|
||||
kc:setDeviceDPI(doc.screen_dpi)
|
||||
kc:setDeviceDPI(self.screen_dpi)
|
||||
kc:setStraighten(doc.configurable.auto_straighten)
|
||||
kc:setJustification(doc.configurable.justification)
|
||||
kc:setZoom(doc.configurable.font_size)
|
||||
@@ -82,6 +84,22 @@ function KoptInterface:getContextHash(doc, pageno, bbox)
|
||||
return doc.file.."|"..pageno.."|"..doc.configurable:hash("|").."|"..bbox_hash.."|"..screen_size_hash
|
||||
end
|
||||
|
||||
function KoptInterface:getPageBBox(doc, pageno)
|
||||
if doc.configurable.text_wrap ~= 1 and doc.configurable.trim_page == 1 then
|
||||
-- auto bbox finding
|
||||
return self:getAutoBBox(doc, pageno)
|
||||
elseif doc.configurable.text_wrap ~= 1 and doc.configurable.trim_page == 2 then
|
||||
-- semi-auto bbox finding
|
||||
return self:getSemiAutoBBox(doc, pageno)
|
||||
else
|
||||
-- get saved manual bbox
|
||||
return Document.getPageBBox(doc, pageno)
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
auto detect bbox
|
||||
--]]
|
||||
function KoptInterface:getAutoBBox(doc, pageno)
|
||||
local native_size = Document.getNativePageDimensions(doc, pageno)
|
||||
local bbox = {
|
||||
@@ -97,14 +115,18 @@ function KoptInterface:getAutoBBox(doc, pageno)
|
||||
local kc = self:createContext(doc, pageno, bbox)
|
||||
bbox.x0, bbox.y0, bbox.x1, bbox.y1 = page:getAutoBBox(kc)
|
||||
DEBUG("Auto detected bbox", bbox)
|
||||
page:close()
|
||||
Cache:insert(hash, CacheItem:new{ autobbox = bbox })
|
||||
page:close()
|
||||
kc:free()
|
||||
return bbox
|
||||
else
|
||||
return cached.autobbox
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
detect bbox within user restricted bbox
|
||||
--]]
|
||||
function KoptInterface:getSemiAutoBBox(doc, pageno)
|
||||
-- use manual bbox
|
||||
local bbox = Document.getPageBBox(doc, pageno)
|
||||
@@ -123,114 +145,13 @@ function KoptInterface:getSemiAutoBBox(doc, pageno)
|
||||
DEBUG("Semi-auto detected bbox", auto_bbox)
|
||||
page:close()
|
||||
Cache:insert(hash, CacheItem:new{ semiautobbox = auto_bbox })
|
||||
kc:free()
|
||||
return auto_bbox
|
||||
else
|
||||
return cached.semiautobbox
|
||||
end
|
||||
end
|
||||
|
||||
function KoptInterface:getReflewTextBoxes(doc, pageno)
|
||||
local bbox = doc:getPageBBox(pageno)
|
||||
local context_hash = self:getContextHash(doc, pageno, bbox)
|
||||
local hash = "rfpgboxes|"..context_hash
|
||||
local cached = Cache:check(hash)
|
||||
if not cached then
|
||||
local kctx_hash = "kctx|"..context_hash
|
||||
local cached = Cache:check(kctx_hash)
|
||||
if cached then
|
||||
local kc = self:waitForContext(cached.kctx)
|
||||
--kc:setDebug()
|
||||
local fullwidth, fullheight = kc:getPageDim()
|
||||
local boxes = kc:getWordBoxes(0, 0, fullwidth, fullheight)
|
||||
Cache:insert(hash, CacheItem:new{ rfpgboxes = boxes })
|
||||
return boxes
|
||||
end
|
||||
else
|
||||
return cached.rfpgboxes
|
||||
end
|
||||
end
|
||||
|
||||
function KoptInterface:getTextBoxes(doc, pageno)
|
||||
local hash = "pgboxes|"..doc.file.."|"..pageno
|
||||
local cached = Cache:check(hash)
|
||||
if not cached then
|
||||
local kc_hash = "kctx|"..doc.file.."|"..pageno
|
||||
local kc = self:createContext(doc, pageno)
|
||||
kc:setDebug()
|
||||
local page = doc._document:openPage(pageno)
|
||||
page:getPagePix(kc)
|
||||
local fullwidth, fullheight = kc:getPageDim()
|
||||
local boxes = kc:getWordBoxes(0, 0, fullwidth, fullheight)
|
||||
Cache:insert(hash, CacheItem:new{ pgboxes = boxes })
|
||||
Cache:insert(kc_hash, ContextCacheItem:new{ kctx = kc })
|
||||
return boxes
|
||||
else
|
||||
return cached.pgboxes
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
get word from OCR in reflew page
|
||||
--]]
|
||||
function KoptInterface:getReflewOCRWord(doc, pageno, rect)
|
||||
local ocrengine = "ocrengine"
|
||||
if not Cache:check(ocrengine) then
|
||||
local dummy = KOPTContext.new()
|
||||
Cache:insert(ocrengine, OCREngine:new{ ocrengine = dummy })
|
||||
end
|
||||
self.ocr_lang = doc.configurable.doc_language
|
||||
local bbox = doc:getPageBBox(pageno)
|
||||
local context_hash = self:getContextHash(doc, pageno, bbox)
|
||||
local hash = "rfocrword|"..context_hash..rect.x..rect.y..rect.w..rect.h
|
||||
local cached = Cache:check(hash)
|
||||
if not cached then
|
||||
local kctx_hash = "kctx|"..context_hash
|
||||
local cached = Cache:check(kctx_hash)
|
||||
if cached then
|
||||
local kc = self:waitForContext(cached.kctx)
|
||||
local ok, word = pcall(
|
||||
kc.getTOCRWord, kc,
|
||||
rect.x, rect.y, rect.w, rect.h,
|
||||
self.tessocr_data, self.ocr_lang, self.ocr_type, 0, 1)
|
||||
Cache:insert(hash, CacheItem:new{ rfocrword = word })
|
||||
return word
|
||||
end
|
||||
else
|
||||
return cached.rfocrword
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
get word from OCR in non-reflew page
|
||||
--]]
|
||||
function KoptInterface:getOCRWord(doc, pageno, rect)
|
||||
local ocrengine = "ocrengine"
|
||||
if not Cache:check(ocrengine) then
|
||||
local dummy = KOPTContext.new()
|
||||
Cache:insert(ocrengine, OCREngine:new{ ocrengine = dummy })
|
||||
end
|
||||
self.ocr_lang = doc.configurable.doc_language
|
||||
local hash = "ocrword|"..doc.file.."|"..pageno..rect.x..rect.y..rect.w..rect.h
|
||||
local cached = Cache:check(hash)
|
||||
if not cached then
|
||||
local pgboxes_hash = "pgboxes|"..doc.file.."|"..pageno
|
||||
local pgboxes_cached = Cache:check(pgboxes_hash)
|
||||
local kc_hash = "kctx|"..doc.file.."|"..pageno
|
||||
local kc_cashed = Cache:check(kc_hash)
|
||||
if pgboxes_cached and kc_cashed then
|
||||
local kc = kc_cashed.kctx
|
||||
local ok, word = pcall(
|
||||
kc.getTOCRWord, kc,
|
||||
rect.x, rect.y, rect.w, rect.h,
|
||||
self.tessocr_data, self.ocr_lang, self.ocr_type, 0, 1)
|
||||
Cache:insert(hash, CacheItem:new{ ocrword = word })
|
||||
return word
|
||||
end
|
||||
else
|
||||
return cached.ocrword
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
get cached koptcontext for centain page. if context doesn't exist in cache make
|
||||
new context and reflow the src page immediatly, or wait background thread for
|
||||
@@ -271,19 +192,38 @@ function KoptInterface:getCachedContext(doc, pageno)
|
||||
end
|
||||
|
||||
--[[
|
||||
get reflowed page dimensions
|
||||
get page dimensions
|
||||
--]]
|
||||
function KoptInterface:getPageDimensions(doc, pageno, zoom, rotation)
|
||||
if doc.configurable.text_wrap == 1 then
|
||||
return self:getRFPageDimensions(doc, pageno, zoom, rotation)
|
||||
else
|
||||
return Document.getPageDimensions(doc, pageno, zoom, rotation)
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
get reflowed page dimensions
|
||||
--]]
|
||||
function KoptInterface:getRFPageDimensions(doc, pageno, zoom, rotation)
|
||||
local kc = self:getCachedContext(doc, pageno)
|
||||
local fullwidth, fullheight = kc:getPageDim()
|
||||
return Geom:new{ w = fullwidth, h = fullheight }
|
||||
end
|
||||
|
||||
function KoptInterface:renderPage(doc, pageno, rect, zoom, rotation, gamma, render_mode)
|
||||
if doc.configurable.text_wrap == 1 then
|
||||
return self:renderreflowedPage(doc, pageno, rect, zoom, rotation, render_mode)
|
||||
else
|
||||
return Document.renderPage(doc, pageno, rect, zoom, rotation, gamma, render_mode)
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
inherited from common document interface
|
||||
render reflowed page into tile cache.
|
||||
--]]
|
||||
function KoptInterface:renderPage(doc, pageno, rect, zoom, rotation, render_mode)
|
||||
function KoptInterface:renderreflowedPage(doc, pageno, rect, zoom, rotation, render_mode)
|
||||
doc.render_mode = render_mode
|
||||
local bbox = doc:getPageBBox(pageno)
|
||||
local context_hash = self:getContextHash(doc, pageno, bbox)
|
||||
@@ -315,13 +255,21 @@ function KoptInterface:renderPage(doc, pageno, rect, zoom, rotation, render_mode
|
||||
end
|
||||
end
|
||||
|
||||
function KoptInterface:hintPage(doc, pageno, zoom, rotation, gamma, render_mode)
|
||||
if doc.configurable.text_wrap == 1 then
|
||||
self:hintReflowedPage(doc, pageno, zoom, rotation, gamma, render_mode)
|
||||
else
|
||||
Document.hintPage(doc, pageno, zoom, rotation, gamma, render_mode)
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
inherited from common document interface render reflowed page into cache in
|
||||
background thread. this method returns immediatly leaving the precache flag on
|
||||
in context. subsequent usage of this context should wait for the precache flag
|
||||
off by calling self:waitForContext(kctx)
|
||||
--]]
|
||||
function KoptInterface:hintPage(doc, pageno, zoom, rotation, gamma, render_mode)
|
||||
function KoptInterface:hintReflowedPage(doc, pageno, zoom, rotation, gamma, render_mode)
|
||||
local bbox = doc:getPageBBox(pageno)
|
||||
local context_hash = self:getContextHash(doc, pageno, bbox)
|
||||
local kctx_hash = "kctx|"..context_hash
|
||||
@@ -341,11 +289,19 @@ function KoptInterface:hintPage(doc, pageno, zoom, rotation, gamma, render_mode)
|
||||
end
|
||||
end
|
||||
|
||||
function KoptInterface:drawPage(doc, target, x, y, rect, pageno, zoom, rotation, gamma, render_mode)
|
||||
if doc.configurable.text_wrap == 1 then
|
||||
self:drawReflowedPage(doc, target, x, y, rect, pageno, zoom, rotation, render_mode)
|
||||
else
|
||||
Document.drawPage(doc, target, x, y, rect, pageno, zoom, rotation, gamma, render_mode)
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
inherited from common document interface
|
||||
draw cached tile pixels into target blitbuffer.
|
||||
--]]
|
||||
function KoptInterface:drawPage(doc, target, x, y, rect, pageno, zoom, rotation, render_mode)
|
||||
function KoptInterface:drawReflowedPage(doc, target, x, y, rect, pageno, zoom, rotation, render_mode)
|
||||
local tile = self:renderPage(doc, pageno, rect, zoom, rotation, render_mode)
|
||||
--DEBUG("now painting", tile, rect)
|
||||
target:blitFrom(tile.bb,
|
||||
@@ -355,6 +311,417 @@ function KoptInterface:drawPage(doc, target, x, y, rect, pageno, zoom, rotation,
|
||||
rect.w, rect.h)
|
||||
end
|
||||
|
||||
--[[
|
||||
extract text boxes in a PDF/Djvu page
|
||||
returned boxes are in native page coordinates zoomed at 1.0
|
||||
--]]
|
||||
function KoptInterface:getTextBoxes(doc, pageno)
|
||||
local text = doc:getPageTextBoxes(pageno)
|
||||
if text and #text > 1 then
|
||||
return text
|
||||
-- if we have no text in original page then we will reuse native word boxes
|
||||
-- in reflow mode and find text boxes from scratch in non-reflow mode
|
||||
else
|
||||
if doc.configurable.text_wrap == 1 then
|
||||
return self:getNativeTextBoxes(doc, pageno)
|
||||
--return self:getTextBoxesFromScratch(doc, pageno)
|
||||
else
|
||||
return self:getTextBoxesFromScratch(doc, pageno)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
get text boxes in reflowed page via rectmaps in koptcontext
|
||||
--]]
|
||||
function KoptInterface:getReflowedTextBoxes(doc, pageno)
|
||||
local bbox = doc:getPageBBox(pageno)
|
||||
local context_hash = self:getContextHash(doc, pageno, bbox)
|
||||
local hash = "rfpgboxes|"..context_hash
|
||||
local cached = Cache:check(hash)
|
||||
if not cached then
|
||||
local kctx_hash = "kctx|"..context_hash
|
||||
local cached = Cache:check(kctx_hash)
|
||||
if cached then
|
||||
local kc = self:waitForContext(cached.kctx)
|
||||
--kc:setDebug()
|
||||
local fullwidth, fullheight = kc:getPageDim()
|
||||
local boxes = kc:getReflowedWordBoxes(0, 0, fullwidth, fullheight)
|
||||
Cache:insert(hash, CacheItem:new{ rfpgboxes = boxes })
|
||||
return boxes
|
||||
end
|
||||
else
|
||||
return cached.rfpgboxes
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
get text boxes in native page via rectmaps in koptcontext
|
||||
--]]
|
||||
function KoptInterface:getNativeTextBoxes(doc, pageno)
|
||||
local bbox = doc:getPageBBox(pageno)
|
||||
local context_hash = self:getContextHash(doc, pageno, bbox)
|
||||
local hash = "nativepgboxes|"..context_hash
|
||||
local cached = Cache:check(hash)
|
||||
if not cached then
|
||||
local kctx_hash = "kctx|"..context_hash
|
||||
local cached = Cache:check(kctx_hash)
|
||||
if cached then
|
||||
local kc = self:waitForContext(cached.kctx)
|
||||
--kc:setDebug()
|
||||
local fullwidth, fullheight = kc:getPageDim()
|
||||
local boxes = kc:getNativeWordBoxes(0, 0, fullwidth, fullheight)
|
||||
Cache:insert(hash, CacheItem:new{ nativepgboxes = boxes })
|
||||
return boxes
|
||||
end
|
||||
else
|
||||
return cached.nativepgboxes
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
get text boxes in native page via optical method,
|
||||
i.e. OCR pre-processing in Tesseract and Leptonica.
|
||||
--]]
|
||||
function KoptInterface:getTextBoxesFromScratch(doc, pageno)
|
||||
local hash = "pgboxes|"..doc.file.."|"..pageno
|
||||
local cached = Cache:check(hash)
|
||||
if not cached then
|
||||
local page_size = Document.getNativePageDimensions(doc, pageno)
|
||||
local bbox = {
|
||||
x0 = 0, y0 = 0,
|
||||
x1 = page_size.w,
|
||||
y1 = page_size.h,
|
||||
}
|
||||
local kc = self:createContext(doc, pageno, bbox)
|
||||
kc:setZoom(1.0)
|
||||
local page = doc._document:openPage(pageno)
|
||||
page:getPagePix(kc)
|
||||
local boxes = kc:getNativeWordBoxes(0, 0, page_size.w, page_size.h)
|
||||
Cache:insert(hash, CacheItem:new{ pgboxes = boxes })
|
||||
page:close()
|
||||
kc:free()
|
||||
return boxes
|
||||
else
|
||||
return cached.pgboxes
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
OCR word inside the rect area of the page
|
||||
rect should be in native page coordinates
|
||||
--]]
|
||||
function KoptInterface:getOCRWord(doc, pageno, rect)
|
||||
local ocrengine = "ocrengine"
|
||||
if not Cache:check(ocrengine) then
|
||||
local dummy = KOPTContext.new()
|
||||
Cache:insert(ocrengine, OCREngine:new{ ocrengine = dummy })
|
||||
end
|
||||
self.ocr_lang = doc.configurable.doc_language
|
||||
local hash = "ocrword|"..doc.file.."|"..pageno..rect.x..rect.y..rect.w..rect.h
|
||||
local cached = Cache:check(hash)
|
||||
if not cached then
|
||||
local bbox = {
|
||||
x0 = rect.x,
|
||||
y0 = rect.y,
|
||||
x1 = rect.x + rect.w,
|
||||
y1 = rect.y + rect.h,
|
||||
}
|
||||
local kc = self:createContext(doc, pageno, bbox)
|
||||
--kc:setZoom(30/rect.h)
|
||||
kc:setZoom(1.0)
|
||||
local page = doc._document:openPage(pageno)
|
||||
page:getPagePix(kc)
|
||||
local word_w, word_h = kc:getPageDim()
|
||||
local ok, word = pcall(
|
||||
kc.getTOCRWord, kc,
|
||||
0, 0, word_w, word_h,
|
||||
self.tessocr_data, self.ocr_lang, self.ocr_type, 0, 1)
|
||||
Cache:insert(hash, CacheItem:new{ ocrword = word })
|
||||
page:close()
|
||||
kc:free()
|
||||
return word
|
||||
else
|
||||
return cached.ocrword
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
get index of nearest word box around pos
|
||||
--]]
|
||||
local function getWordBoxIndices(boxes, pos)
|
||||
local function inside_box(box)
|
||||
local x, y = pos.x, pos.y
|
||||
if box.x0 <= x and box.y0 <= y and box.x1 >= x and box.y1 >= y then
|
||||
return true
|
||||
end
|
||||
return false
|
||||
end
|
||||
local function box_distance(i, j)
|
||||
local wb = boxes[i][j]
|
||||
if inside_box(wb) then
|
||||
return 0
|
||||
else
|
||||
local x0, y0 = pos.x, pos.y
|
||||
local x1, y1 = (wb.x0 + wb.x1) / 2, (wb.y0 + wb.y1) / 2
|
||||
return (x0 - x1)*(x0 - x1) + (y0 - y1)*(y0 - y1)
|
||||
end
|
||||
end
|
||||
|
||||
local m, n = 1, 1
|
||||
for i = 1, #boxes do
|
||||
for j = 1, #boxes[i] do
|
||||
if box_distance(i, j) < box_distance(m, n) then
|
||||
m, n = i, j
|
||||
end
|
||||
end
|
||||
end
|
||||
return m, n
|
||||
end
|
||||
|
||||
--[[
|
||||
get word and word box around pos
|
||||
--]]
|
||||
function KoptInterface:getWordFromBoxes(boxes, pos)
|
||||
local i, j = getWordBoxIndices(boxes, pos)
|
||||
local lb = boxes[i]
|
||||
local wb = boxes[i][j]
|
||||
if lb and wb then
|
||||
local box = Geom:new{
|
||||
x = wb.x0, y = lb.y0,
|
||||
w = wb.x1 - wb.x0,
|
||||
h = lb.y1 - lb.y0,
|
||||
}
|
||||
return {
|
||||
word = wb.word,
|
||||
box = box,
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
get text and text boxes between pos0 and pos1
|
||||
--]]
|
||||
function KoptInterface:getTextFromBoxes(boxes, pos0, pos1)
|
||||
local line_text = ""
|
||||
local line_boxes = {}
|
||||
local i_start, j_start = getWordBoxIndices(boxes, pos0)
|
||||
local i_stop, j_stop = getWordBoxIndices(boxes, pos1)
|
||||
if i_start == i_stop and j_start > j_stop or i_start > i_stop then
|
||||
i_start, i_stop = i_stop, i_start
|
||||
j_start, j_stop = j_stop, j_start
|
||||
end
|
||||
for i = i_start, i_stop do
|
||||
if i_start == i_stop and #boxes[i] == 0 then break end
|
||||
-- insert line words
|
||||
local j0 = i > i_start and 1 or j_start
|
||||
local j1 = i < i_stop and #boxes[i] or j_stop
|
||||
for j = j0, j1 do
|
||||
local word = boxes[i][j].word
|
||||
if word then
|
||||
-- if last character of this word is an ascii char then append a space
|
||||
local space = (word:match("[%z\194-\244][\128-\191]*$") or j == j1)
|
||||
and "" or " "
|
||||
line_text = line_text..word..space
|
||||
end
|
||||
end
|
||||
-- insert line box
|
||||
local lb = boxes[i]
|
||||
if i > i_start and i < i_stop then
|
||||
local line_box = Geom:new{
|
||||
x = lb.x0, y = lb.y0,
|
||||
w = lb.x1 - lb.x0,
|
||||
h = lb.y1 - lb.y0,
|
||||
}
|
||||
table.insert(line_boxes, line_box)
|
||||
elseif i == i_start and i < i_stop then
|
||||
local wb = boxes[i][j_start]
|
||||
local line_box = Geom:new{
|
||||
x = wb.x0, y = lb.y0,
|
||||
w = lb.x1 - wb.x0,
|
||||
h = lb.y1 - lb.y0,
|
||||
}
|
||||
table.insert(line_boxes, line_box)
|
||||
elseif i > i_start and i == i_stop then
|
||||
local wb = boxes[i][j_stop]
|
||||
local line_box = Geom:new{
|
||||
x = lb.x0, y = lb.y0,
|
||||
w = wb.x1 - lb.x0,
|
||||
h = lb.y1 - lb.y0,
|
||||
}
|
||||
table.insert(line_boxes, line_box)
|
||||
elseif i == i_start and i == i_stop then
|
||||
local wb_start = boxes[i][j_start]
|
||||
local wb_stop = boxes[i][j_stop]
|
||||
local line_box = Geom:new{
|
||||
x = wb_start.x0, y = lb.y0,
|
||||
w = wb_stop.x1 - wb_start.x0,
|
||||
h = lb.y1 - lb.y0,
|
||||
}
|
||||
table.insert(line_boxes, line_box)
|
||||
end
|
||||
end
|
||||
return {
|
||||
text = line_text,
|
||||
boxes = line_boxes,
|
||||
}
|
||||
end
|
||||
|
||||
--[[
|
||||
get word and word box from doc position
|
||||
]]--
|
||||
function KoptInterface:getWordFromPosition(doc, pos)
|
||||
local text_boxes = self:getTextBoxes(doc, pos.page)
|
||||
if text_boxes then
|
||||
if doc.configurable.text_wrap == 1 then
|
||||
return self:getWordFromReflowPosition(doc, text_boxes, pos)
|
||||
else
|
||||
return self:getWordFromNativePosition(doc, text_boxes, pos)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
get word and word box from position in reflowed page
|
||||
]]--
|
||||
function KoptInterface:getWordFromReflowPosition(doc, boxes, pos)
|
||||
local pageno = pos.page
|
||||
local reflowed_page_boxes = self:getReflowedTextBoxes(doc, pageno)
|
||||
local reflowed_word_box = self:getWordFromBoxes(reflowed_page_boxes, pos)
|
||||
local reflowed_pos = reflowed_word_box.box:center()
|
||||
local native_pos = self:reflowToNativePosTransform(doc, pageno, reflowed_pos)
|
||||
local native_word_box = self:getWordFromBoxes(boxes, native_pos)
|
||||
local word_box = {
|
||||
word = native_word_box.word,
|
||||
pbox = native_word_box.box, -- box on page
|
||||
sbox = reflowed_word_box.box, -- box on screen
|
||||
pos = native_pos,
|
||||
}
|
||||
return word_box
|
||||
end
|
||||
|
||||
--[[
|
||||
get word and word box from position in native page
|
||||
]]--
|
||||
function KoptInterface:getWordFromNativePosition(doc, boxes, pos)
|
||||
DEBUG("boxes", boxes)
|
||||
local native_word_box = self:getWordFromBoxes(boxes, pos)
|
||||
local word_box = {
|
||||
word = native_word_box.word,
|
||||
pbox = native_word_box.box, -- box on page
|
||||
sbox = native_word_box.box, -- box on screen
|
||||
pos = pos,
|
||||
}
|
||||
return word_box
|
||||
end
|
||||
|
||||
--[[
|
||||
transform position in native page to reflowed page
|
||||
]]--
|
||||
function KoptInterface:nativeToReflowPosTransform(doc, pageno, pos)
|
||||
local bbox = doc:getPageBBox(pageno)
|
||||
local context_hash = self:getContextHash(doc, pageno, bbox)
|
||||
local kctx_hash = "kctx|"..context_hash
|
||||
local cached = Cache:check(kctx_hash)
|
||||
local kc = self:waitForContext(cached.kctx)
|
||||
--kc:setDebug()
|
||||
--DEBUG("transform native pos", pos)
|
||||
local rpos = {}
|
||||
rpos.x, rpos.y = kc:nativeToReflowPosTransform(pos.x, pos.y)
|
||||
--DEBUG("transformed reflowed pos", rpos)
|
||||
return rpos
|
||||
end
|
||||
|
||||
--[[
|
||||
transform position in reflowed page to native page
|
||||
]]--
|
||||
function KoptInterface:reflowToNativePosTransform(doc, pageno, pos)
|
||||
local bbox = doc:getPageBBox(pageno)
|
||||
local context_hash = self:getContextHash(doc, pageno, bbox)
|
||||
local kctx_hash = "kctx|"..context_hash
|
||||
local cached = Cache:check(kctx_hash)
|
||||
local kc = self:waitForContext(cached.kctx)
|
||||
--kc:setDebug()
|
||||
--DEBUG("transform reflowed pos", pos)
|
||||
local npos = {}
|
||||
npos.x, npos.y = kc:reflowToNativePosTransform(pos.x, pos.y)
|
||||
--DEBUG("transformed native pos", npos)
|
||||
return npos
|
||||
end
|
||||
|
||||
--[[
|
||||
get text and text boxes from screen positions
|
||||
--]]
|
||||
function KoptInterface:getTextFromPositions(doc, pos0, pos1)
|
||||
local text_boxes = self:getTextBoxes(doc, pos0.page)
|
||||
if text_boxes then
|
||||
if doc.configurable.text_wrap == 1 then
|
||||
return self:getTextFromReflowPositions(doc, text_boxes, pos0, pos1)
|
||||
else
|
||||
return self:getTextFromNativePositions(doc, text_boxes, pos0, pos1)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
get text and text boxes from screen positions for reflowed page
|
||||
]]--
|
||||
function KoptInterface:getTextFromReflowPositions(doc, native_boxes, pos0, pos1)
|
||||
local pageno = pos0.page
|
||||
local reflowed_page_boxes = self:getReflowedTextBoxes(doc, pageno)
|
||||
local reflowed_box0 = self:getWordFromBoxes(reflowed_page_boxes, pos0)
|
||||
local reflowed_pos0 = reflowed_box0.box:center()
|
||||
local native_pos0 = self:reflowToNativePosTransform(doc, pageno, reflowed_pos0)
|
||||
|
||||
local reflowed_box1 = self:getWordFromBoxes(reflowed_page_boxes, pos1)
|
||||
local reflowed_pos1 = reflowed_box1.box:center()
|
||||
local native_pos1 = self:reflowToNativePosTransform(doc, pageno, reflowed_pos1)
|
||||
|
||||
local reflowed_text_boxes = self:getTextFromBoxes(reflowed_page_boxes, pos0, pos1)
|
||||
local native_text_boxes = self:getTextFromBoxes(native_boxes, pos0, pos1)
|
||||
local text_boxes = {
|
||||
text = native_text_boxes.text,
|
||||
pboxes = native_text_boxes.boxes, -- boxes on page
|
||||
sboxes = reflowed_text_boxes.boxes, -- boxes on screen
|
||||
pos0 = native_pos0,
|
||||
pos1 = native_pos1
|
||||
}
|
||||
return text_boxes
|
||||
end
|
||||
|
||||
--[[
|
||||
get text and text boxes from screen positions for native page
|
||||
]]--
|
||||
function KoptInterface:getTextFromNativePositions(doc, native_boxes, pos0, pos1)
|
||||
local native_text_boxes = self:getTextFromBoxes(native_boxes, pos0, pos1)
|
||||
local text_boxes = {
|
||||
word = native_text_boxes.text,
|
||||
pboxes = native_text_boxes.boxes, -- boxes on page
|
||||
sboxes = native_text_boxes.boxes, -- boxes on screen
|
||||
pos0 = pos0,
|
||||
pos1 = pos1,
|
||||
}
|
||||
return text_boxes
|
||||
end
|
||||
|
||||
--[[
|
||||
get text boxes from page positions
|
||||
--]]
|
||||
function KoptInterface:getPageBoxesFromPositions(doc, pageno, ppos0, ppos1)
|
||||
if not ppos0 or not ppos1 then return end
|
||||
if doc.configurable.text_wrap == 1 then
|
||||
local spos0 = self:nativeToReflowPosTransform(doc, pageno, ppos0)
|
||||
local spos1 = self:nativeToReflowPosTransform(doc, pageno, ppos1)
|
||||
local page_boxes = self:getReflowedTextBoxes(doc, pageno)
|
||||
local text_boxes = self:getTextFromBoxes(page_boxes, spos0, spos1)
|
||||
return text_boxes.boxes
|
||||
else
|
||||
local page_boxes = self:getTextBoxes(doc, pageno)
|
||||
local text_boxes = self:getTextFromBoxes(page_boxes, ppos0, ppos1)
|
||||
return text_boxes.boxes
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
helper functions
|
||||
--]]
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
require "cache"
|
||||
require "ui/geometry"
|
||||
require "ui/screen"
|
||||
require "ui/reader/readerconfig"
|
||||
require "ui/data/koptoptions"
|
||||
require "document/koptinterface"
|
||||
@@ -10,8 +9,6 @@ PdfDocument = Document:new{
|
||||
-- muPDF manages its own additional cache
|
||||
mupdf_cache_size = 5 * 1024 * 1024,
|
||||
dc_null = DrawContext.new(),
|
||||
screen_size = Screen:getSize(),
|
||||
screen_dpi = Screen:getDPI(),
|
||||
options = KoptOptions,
|
||||
configurable = Configurable,
|
||||
koptinterface = KoptInterface,
|
||||
@@ -44,27 +41,27 @@ function PdfDocument:unlock(password)
|
||||
return self:_readMetadata()
|
||||
end
|
||||
|
||||
function PdfDocument:getTextBoxes(pageno)
|
||||
if self.configurable.text_wrap == 1 then
|
||||
return self.koptinterface:getReflewTextBoxes(self, pageno)
|
||||
else
|
||||
local page = self._document:openPage(pageno)
|
||||
local text = page:getPageText()
|
||||
page:close()
|
||||
if not text or #text == 0 then
|
||||
return self.koptinterface:getTextBoxes(self, pageno)
|
||||
else
|
||||
return text
|
||||
end
|
||||
end
|
||||
function PdfDocument:getPageTextBoxes(pageno)
|
||||
local page = self._document:openPage(pageno)
|
||||
local text = page:getPageText()
|
||||
page:close()
|
||||
return text
|
||||
end
|
||||
|
||||
function PdfDocument:getWordFromPosition(spos)
|
||||
return self.koptinterface:getWordFromPosition(self, spos)
|
||||
end
|
||||
|
||||
function PdfDocument:getTextFromPositions(spos0, spos1)
|
||||
return self.koptinterface:getTextFromPositions(self, spos0, spos1)
|
||||
end
|
||||
|
||||
function PdfDocument:getPageBoxesFromPositions(pageno, ppos0, ppos1)
|
||||
return self.koptinterface:getPageBoxesFromPositions(self, pageno, ppos0, ppos1)
|
||||
end
|
||||
|
||||
function PdfDocument:getOCRWord(pageno, rect)
|
||||
if self.configurable.text_wrap == 1 then
|
||||
return self.koptinterface:getReflewOCRWord(self, pageno, rect)
|
||||
else
|
||||
return self.koptinterface:getOCRWord(self, pageno, rect)
|
||||
end
|
||||
return self.koptinterface:getOCRWord(self, pageno, rect)
|
||||
end
|
||||
|
||||
function PdfDocument:getUsedBBox(pageno)
|
||||
@@ -91,48 +88,23 @@ function PdfDocument:getUsedBBox(pageno)
|
||||
end
|
||||
|
||||
function PdfDocument:getPageBBox(pageno)
|
||||
if self.configurable.text_wrap ~= 1 and self.configurable.trim_page == 1 then
|
||||
-- auto bbox finding
|
||||
return self.koptinterface:getAutoBBox(self, pageno)
|
||||
elseif self.configurable.text_wrap ~= 1 and self.configurable.trim_page == 2 then
|
||||
-- semi-auto bbox finding
|
||||
return self.koptinterface:getSemiAutoBBox(self, pageno)
|
||||
else
|
||||
-- get saved manual bbox
|
||||
return Document.getPageBBox(self, pageno)
|
||||
end
|
||||
return self.koptinterface:getPageBBox(self, pageno)
|
||||
end
|
||||
|
||||
function PdfDocument:getPageDimensions(pageno, zoom, rotation)
|
||||
if self.configurable.text_wrap == 1 then
|
||||
return self.koptinterface:getPageDimensions(self, pageno, zoom, rotation)
|
||||
else
|
||||
return Document.getPageDimensions(self, pageno, zoom, rotation)
|
||||
end
|
||||
return self.koptinterface:getPageDimensions(self, pageno, zoom, rotation)
|
||||
end
|
||||
|
||||
function PdfDocument:renderPage(pageno, rect, zoom, rotation, gamma, render_mode)
|
||||
if self.configurable.text_wrap == 1 then
|
||||
return self.koptinterface:renderPage(self, pageno, rect, zoom, rotation, render_mode)
|
||||
else
|
||||
return Document.renderPage(self, pageno, rect, zoom, rotation, gamma, render_mode)
|
||||
end
|
||||
return self.koptinterface:renderPage(self, pageno, rect, zoom, rotation, gamma, render_mode)
|
||||
end
|
||||
|
||||
function PdfDocument:hintPage(pageno, zoom, rotation, gamma, render_mode)
|
||||
if self.configurable.text_wrap == 1 then
|
||||
self.koptinterface:hintPage(self, pageno, zoom, rotation, gamma, render_mode)
|
||||
else
|
||||
Document.hintPage(self, pageno, zoom, rotation, gamma, render_mode)
|
||||
end
|
||||
return self.koptinterface:hintPage(self, pageno, zoom, rotation, gamma, render_mode)
|
||||
end
|
||||
|
||||
function PdfDocument:drawPage(target, x, y, rect, pageno, zoom, rotation, gamma, render_mode)
|
||||
if self.configurable.text_wrap == 1 then
|
||||
self.koptinterface:drawPage(self, target, x, y, rect, pageno, zoom, rotation, render_mode)
|
||||
else
|
||||
Document.drawPage(self, target, x, y, rect, pageno, zoom, rotation, gamma, render_mode)
|
||||
end
|
||||
return self.koptinterface:drawPage(self, target, x, y, rect, pageno, zoom, rotation, gamma, render_mode)
|
||||
end
|
||||
|
||||
DocumentRegistry:addProvider("pdf", "application/pdf", PdfDocument)
|
||||
|
||||
@@ -295,3 +295,13 @@ function Geom:midpoint(geom)
|
||||
}
|
||||
end
|
||||
|
||||
--[[
|
||||
return center point in this geom
|
||||
]]--
|
||||
function Geom:center()
|
||||
return Geom:new{
|
||||
x = self.x + self.w / 2,
|
||||
y = self.y + self.h / 2,
|
||||
w = 0, h = 0,
|
||||
}
|
||||
end
|
||||
|
||||
@@ -88,32 +88,36 @@ function ReaderHighlight:onTap(arg, ges)
|
||||
local items = self.view.highlight.saved[page]
|
||||
if not items then items = {} end
|
||||
for i = 1, #items do
|
||||
for j = 1, #items[i].boxes do
|
||||
if inside_box(ges, items[i].boxes[j]) then
|
||||
DEBUG("Tap on hightlight")
|
||||
self.edit_highlight_dialog = HighlightDialog:new{
|
||||
buttons = {
|
||||
{
|
||||
local pos0, pos1 = items[i].pos0, items[i].pos1
|
||||
local boxes = self.ui.document:getPageBoxesFromPositions(page, pos0, pos1)
|
||||
if boxes then
|
||||
for index, box in pairs(boxes) do
|
||||
if inside_box(ges, box) then
|
||||
DEBUG("Tap on hightlight")
|
||||
self.edit_highlight_dialog = HighlightDialog:new{
|
||||
buttons = {
|
||||
{
|
||||
text = _("Delete"),
|
||||
callback = function()
|
||||
self:deleteHighlight(page, i)
|
||||
UIManager:close(self.edit_highlight_dialog)
|
||||
end,
|
||||
},
|
||||
{
|
||||
text = _("Edit"),
|
||||
enabled = false,
|
||||
callback = function()
|
||||
self:editHighlight()
|
||||
UIManager:close(self.edit_highlight_dialog)
|
||||
end,
|
||||
{
|
||||
text = _("Delete"),
|
||||
callback = function()
|
||||
self:deleteHighlight(page, i)
|
||||
UIManager:close(self.edit_highlight_dialog)
|
||||
end,
|
||||
},
|
||||
{
|
||||
text = _("Edit"),
|
||||
enabled = false,
|
||||
callback = function()
|
||||
self:editHighlight()
|
||||
UIManager:close(self.edit_highlight_dialog)
|
||||
end,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
UIManager:show(self.edit_highlight_dialog)
|
||||
return true
|
||||
}
|
||||
UIManager:show(self.edit_highlight_dialog)
|
||||
return true
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -127,19 +131,12 @@ function ReaderHighlight:onHold(arg, ges)
|
||||
DEBUG("not inside page area")
|
||||
return true
|
||||
end
|
||||
self.page_boxes = self.ui.document:getTextBoxes(self.hold_pos.page)
|
||||
--DEBUG("page text", page_boxes)
|
||||
|
||||
if not self.page_boxes or #self.page_boxes == 0 then
|
||||
DEBUG("no page boxes detected")
|
||||
return true
|
||||
end
|
||||
|
||||
self.selected_word = self:getWordFromPosition(self.page_boxes, self.hold_pos)
|
||||
|
||||
self.selected_word = self.ui.document:getWordFromPosition(self.hold_pos)
|
||||
DEBUG("selected word:", self.selected_word)
|
||||
if self.selected_word then
|
||||
local boxes = {}
|
||||
table.insert(boxes, self.selected_word.box)
|
||||
table.insert(boxes, self.selected_word.sbox)
|
||||
self.view.highlight.temp[self.hold_pos.page] = boxes
|
||||
UIManager:setDirty(self.dialog, "partial")
|
||||
end
|
||||
@@ -147,19 +144,19 @@ function ReaderHighlight:onHold(arg, ges)
|
||||
end
|
||||
|
||||
function ReaderHighlight:onHoldPan(arg, ges)
|
||||
if not self.page_boxes or #self.page_boxes == 0 or self.hold_pos == nil then
|
||||
DEBUG("no page boxes detected")
|
||||
if self.hold_pos == nil then
|
||||
DEBUG("no previous hold position")
|
||||
return true
|
||||
end
|
||||
self.holdpan_pos = self.view:screenToPageTransform(ges.pos)
|
||||
DEBUG("holdpan position in page", self.holdpan_pos)
|
||||
self.selected_text = self:getTextFromPositions(self.page_boxes, self.hold_pos, self.holdpan_pos)
|
||||
--DEBUG("selected text:", self.selected_text)
|
||||
self.selected_text = self.ui.document:getTextFromPositions(self.hold_pos, self.holdpan_pos)
|
||||
DEBUG("selected text:", self.selected_text)
|
||||
if self.selected_text then
|
||||
self.view.highlight.temp[self.hold_pos.page] = self.selected_text.boxes
|
||||
self.view.highlight.temp[self.hold_pos.page] = self.selected_text.sboxes
|
||||
-- remove selected word if hold moves out of word box
|
||||
if self.selected_word and
|
||||
not self.selected_word.box:contains(self.selected_text.boxes[1]) then
|
||||
not self.selected_word.sbox:contains(self.selected_text.sboxes[1]) then
|
||||
self.selected_word = nil
|
||||
end
|
||||
UIManager:setDirty(self.dialog, "partial")
|
||||
@@ -172,11 +169,11 @@ function ReaderHighlight:lookup(selected_word)
|
||||
self.ui:handleEvent(Event:new("LookupWord", selected_word.word))
|
||||
-- or we will do OCR
|
||||
else
|
||||
local word_box = selected_word.box
|
||||
--word_box.x = word_box.x - math.floor(word_box.h * 0.1)
|
||||
--word_box.y = word_box.y - math.floor(word_box.h * 0.2)
|
||||
--word_box.w = word_box.w + math.floor(word_box.h * 0.2)
|
||||
--word_box.h = word_box.h + math.floor(word_box.h * 0.4)
|
||||
local word_box = selected_word.pbox:copy()
|
||||
word_box.x = word_box.x - math.floor(word_box.h * 0.1)
|
||||
word_box.y = word_box.y - math.floor(word_box.h * 0.1)
|
||||
word_box.w = word_box.w + math.floor(word_box.h * 0.2)
|
||||
word_box.h = word_box.h + math.floor(word_box.h * 0.2)
|
||||
local word = self.ui.document:getOCRWord(self.hold_pos.page, word_box)
|
||||
DEBUG("OCRed word:", word)
|
||||
self.ui:handleEvent(Event:new("LookupWord", word))
|
||||
@@ -188,12 +185,12 @@ function ReaderHighlight:translate(selected_text)
|
||||
self.ui:handleEvent(Event:new("LookupWord", selected_text.text))
|
||||
-- or we will do OCR
|
||||
else
|
||||
local text_box = selected_text.boxes[1]
|
||||
local text_pboxes = selected_text.pboxes[1]:copy()
|
||||
--text_box.x = text_box.x - math.floor(text_box.h * 0.1)
|
||||
text_box.y = text_box.y - math.floor(text_box.h * 0.2)
|
||||
text_pboxes.y = text_pboxes.y - math.floor(text_pboxes.h * 0.2)
|
||||
--text_box.w = text_box.w + math.floor(text_box.h * 0.2)
|
||||
text_box.h = text_box.h + math.floor(text_box.h * 0.4)
|
||||
local text = self.ui.document:getOCRWord(self.hold_pos.page, text_box)
|
||||
text_pboxes.h = text_pboxes.h + math.floor(text_pboxes.h * 0.4)
|
||||
local text = self.ui.document:getOCRWord(self.hold_pos.page, text_pboxes)
|
||||
DEBUG("OCRed text:", text)
|
||||
self.ui:handleEvent(Event:new("LookupWord", text))
|
||||
end
|
||||
@@ -319,7 +316,8 @@ function ReaderHighlight:saveHighlight()
|
||||
end
|
||||
local hl_item = {}
|
||||
hl_item["text"] = self.selected_text.text
|
||||
hl_item["boxes"] = self.selected_text.boxes
|
||||
hl_item["pos0"] = self.selected_text.pos0
|
||||
hl_item["pos1"] = self.selected_text.pos1
|
||||
hl_item["datetime"] = os.date("%Y-%m-%d %H:%M:%S"),
|
||||
table.insert(self.view.highlight.saved[page], hl_item)
|
||||
if self.selected_text.text ~= "" then
|
||||
@@ -365,125 +363,3 @@ end
|
||||
function ReaderHighlight:editHighlight()
|
||||
DEBUG("edit highlight")
|
||||
end
|
||||
|
||||
--[[
|
||||
get index of nearest word box around pos
|
||||
--]]
|
||||
local function getWordBoxIndices(boxes, pos)
|
||||
local function inside_box(box)
|
||||
local x, y = pos.x, pos.y
|
||||
if box.x0 <= x and box.y0 <= y and box.x1 >= x and box.y1 >= y then
|
||||
return true
|
||||
end
|
||||
return false
|
||||
end
|
||||
local function box_distance(i, j)
|
||||
local wb = boxes[i][j]
|
||||
if inside_box(wb) then
|
||||
return 0
|
||||
else
|
||||
local x0, y0 = pos.x, pos.y
|
||||
local x1, y1 = (wb.x0 + wb.x1) / 2, (wb.y0 + wb.y1) / 2
|
||||
return (x0 - x1)*(x0 - x1) + (y0 - y1)*(y0 - y1)
|
||||
end
|
||||
end
|
||||
|
||||
local m, n = 1, 1
|
||||
for i = 1, #boxes do
|
||||
for j = 1, #boxes[i] do
|
||||
if box_distance(i, j) < box_distance(m, n) then
|
||||
m, n = i, j
|
||||
end
|
||||
end
|
||||
end
|
||||
return m, n
|
||||
end
|
||||
|
||||
--[[
|
||||
get word and word box around pos
|
||||
--]]
|
||||
function ReaderHighlight:getWordFromPosition(boxes, pos)
|
||||
local i, j = getWordBoxIndices(boxes, pos)
|
||||
local lb = boxes[i]
|
||||
local wb = boxes[i][j]
|
||||
if lb and wb then
|
||||
local box = Geom:new{
|
||||
x = wb.x0, y = lb.y0,
|
||||
w = wb.x1 - wb.x0,
|
||||
h = lb.y1 - lb.y0,
|
||||
}
|
||||
return {
|
||||
word = wb.word,
|
||||
box = box,
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
get text and text boxes between pos0 and pos1
|
||||
--]]
|
||||
function ReaderHighlight:getTextFromPositions(boxes, pos0, pos1)
|
||||
local line_text = ""
|
||||
local line_boxes = {}
|
||||
local i_start, j_start = getWordBoxIndices(boxes, pos0)
|
||||
local i_stop, j_stop = getWordBoxIndices(boxes, pos1)
|
||||
if i_start == i_stop and j_start > j_stop or i_start > i_stop then
|
||||
i_start, i_stop = i_stop, i_start
|
||||
j_start, j_stop = j_stop, j_start
|
||||
end
|
||||
for i = i_start, i_stop do
|
||||
if i_start == i_stop and #boxes[i] == 0 then break end
|
||||
-- insert line words
|
||||
local j0 = i > i_start and 1 or j_start
|
||||
local j1 = i < i_stop and #boxes[i] or j_stop
|
||||
for j = j0, j1 do
|
||||
local word = boxes[i][j].word
|
||||
if word then
|
||||
-- if last character of this word is an ascii char then append a space
|
||||
local space = (word:match("[%z\194-\244][\128-\191]*$") or j == j1)
|
||||
and "" or " "
|
||||
line_text = line_text..word..space
|
||||
end
|
||||
end
|
||||
-- insert line box
|
||||
local lb = boxes[i]
|
||||
if i > i_start and i < i_stop then
|
||||
local line_box = Geom:new{
|
||||
x = lb.x0, y = lb.y0,
|
||||
w = lb.x1 - lb.x0,
|
||||
h = lb.y1 - lb.y0,
|
||||
}
|
||||
table.insert(line_boxes, line_box)
|
||||
elseif i == i_start and i < i_stop then
|
||||
local wb = boxes[i][j_start]
|
||||
local line_box = Geom:new{
|
||||
x = wb.x0, y = lb.y0,
|
||||
w = lb.x1 - wb.x0,
|
||||
h = lb.y1 - lb.y0,
|
||||
}
|
||||
table.insert(line_boxes, line_box)
|
||||
elseif i > i_start and i == i_stop then
|
||||
local wb = boxes[i][j_stop]
|
||||
local line_box = Geom:new{
|
||||
x = lb.x0, y = lb.y0,
|
||||
w = wb.x1 - lb.x0,
|
||||
h = lb.y1 - lb.y0,
|
||||
}
|
||||
table.insert(line_boxes, line_box)
|
||||
elseif i == i_start and i == i_stop then
|
||||
local wb_start = boxes[i][j_start]
|
||||
local wb_stop = boxes[i][j_stop]
|
||||
local line_box = Geom:new{
|
||||
x = wb_start.x0, y = lb.y0,
|
||||
w = wb_stop.x1 - wb_start.x0,
|
||||
h = lb.y1 - lb.y0,
|
||||
}
|
||||
table.insert(line_boxes, line_box)
|
||||
end
|
||||
end
|
||||
return {
|
||||
text = line_text,
|
||||
boxes = line_boxes,
|
||||
}
|
||||
end
|
||||
|
||||
|
||||
@@ -331,12 +331,16 @@ function ReaderView:drawSavedHighlight(bb, x, y)
|
||||
local items = self.highlight.saved[page]
|
||||
if not items then items = {} end
|
||||
for i = 1, #items do
|
||||
for j = 1, #items[i].boxes do
|
||||
local rect = self:pageToScreenTransform(page, items[i].boxes[j])
|
||||
if rect then
|
||||
self:drawHighlightRect(bb, x, y, rect, self.highlight.saved_drawer)
|
||||
end
|
||||
end -- end for each box
|
||||
local pos0, pos1 = items[i].pos0, items[i].pos1
|
||||
local boxes = self.ui.document:getPageBoxesFromPositions(page, pos0, pos1)
|
||||
if boxes then
|
||||
for _, box in pairs(boxes) do
|
||||
local rect = self:pageToScreenTransform(page, box)
|
||||
if rect then
|
||||
self:drawHighlightRect(bb, x, y, rect, self.highlight.saved_drawer)
|
||||
end
|
||||
end -- end for each box
|
||||
end -- end if boxes
|
||||
end -- end for each hightlight
|
||||
end -- end for each page
|
||||
end
|
||||
|
||||
Submodule koreader-base updated: c5ac9c236b...aba8feea83
Reference in New Issue
Block a user