mirror of
https://github.com/koreader/koreader.git
synced 2025-08-10 00:52:38 +00:00
synchronize hightlights in non-/reflowing modes
and there is no need to OCR word if we can extract text from original page in reflowing mode thanks to the rect maps between normal page and reflowed page.
This commit is contained in:
@@ -2,6 +2,7 @@ require "dbg"
|
||||
require "cache"
|
||||
require "ui/geometry"
|
||||
require "ui/device"
|
||||
require "ui/screen"
|
||||
require "ui/reader/readerconfig"
|
||||
|
||||
KoptInterface = {
|
||||
@@ -10,6 +11,7 @@ KoptInterface = {
|
||||
ocr_type = 3, -- default 0, for more accuracy use 3
|
||||
last_context_size = nil,
|
||||
default_context_size = 1024*1024,
|
||||
screen_dpi = Screen:getDPI(),
|
||||
}
|
||||
|
||||
ContextCacheItem = CacheItem:new{}
|
||||
@@ -60,7 +62,7 @@ function KoptInterface:createContext(doc, pageno, bbox)
|
||||
kc:setRotate(doc.configurable.screen_rotation)
|
||||
kc:setColumns(doc.configurable.max_columns)
|
||||
kc:setDeviceDim(screen_size.w, screen_size.h)
|
||||
kc:setDeviceDPI(doc.screen_dpi)
|
||||
kc:setDeviceDPI(self.screen_dpi)
|
||||
kc:setStraighten(doc.configurable.auto_straighten)
|
||||
kc:setJustification(doc.configurable.justification)
|
||||
kc:setZoom(doc.configurable.font_size)
|
||||
@@ -82,6 +84,22 @@ function KoptInterface:getContextHash(doc, pageno, bbox)
|
||||
return doc.file.."|"..pageno.."|"..doc.configurable:hash("|").."|"..bbox_hash.."|"..screen_size_hash
|
||||
end
|
||||
|
||||
function KoptInterface:getPageBBox(doc, pageno)
|
||||
if doc.configurable.text_wrap ~= 1 and doc.configurable.trim_page == 1 then
|
||||
-- auto bbox finding
|
||||
return self:getAutoBBox(doc, pageno)
|
||||
elseif doc.configurable.text_wrap ~= 1 and doc.configurable.trim_page == 2 then
|
||||
-- semi-auto bbox finding
|
||||
return self:getSemiAutoBBox(doc, pageno)
|
||||
else
|
||||
-- get saved manual bbox
|
||||
return Document.getPageBBox(doc, pageno)
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
auto detect bbox
|
||||
--]]
|
||||
function KoptInterface:getAutoBBox(doc, pageno)
|
||||
local native_size = Document.getNativePageDimensions(doc, pageno)
|
||||
local bbox = {
|
||||
@@ -97,14 +115,18 @@ function KoptInterface:getAutoBBox(doc, pageno)
|
||||
local kc = self:createContext(doc, pageno, bbox)
|
||||
bbox.x0, bbox.y0, bbox.x1, bbox.y1 = page:getAutoBBox(kc)
|
||||
DEBUG("Auto detected bbox", bbox)
|
||||
page:close()
|
||||
Cache:insert(hash, CacheItem:new{ autobbox = bbox })
|
||||
page:close()
|
||||
kc:free()
|
||||
return bbox
|
||||
else
|
||||
return cached.autobbox
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
detect bbox within user restricted bbox
|
||||
--]]
|
||||
function KoptInterface:getSemiAutoBBox(doc, pageno)
|
||||
-- use manual bbox
|
||||
local bbox = Document.getPageBBox(doc, pageno)
|
||||
@@ -123,114 +145,13 @@ function KoptInterface:getSemiAutoBBox(doc, pageno)
|
||||
DEBUG("Semi-auto detected bbox", auto_bbox)
|
||||
page:close()
|
||||
Cache:insert(hash, CacheItem:new{ semiautobbox = auto_bbox })
|
||||
kc:free()
|
||||
return auto_bbox
|
||||
else
|
||||
return cached.semiautobbox
|
||||
end
|
||||
end
|
||||
|
||||
function KoptInterface:getReflewTextBoxes(doc, pageno)
|
||||
local bbox = doc:getPageBBox(pageno)
|
||||
local context_hash = self:getContextHash(doc, pageno, bbox)
|
||||
local hash = "rfpgboxes|"..context_hash
|
||||
local cached = Cache:check(hash)
|
||||
if not cached then
|
||||
local kctx_hash = "kctx|"..context_hash
|
||||
local cached = Cache:check(kctx_hash)
|
||||
if cached then
|
||||
local kc = self:waitForContext(cached.kctx)
|
||||
--kc:setDebug()
|
||||
local fullwidth, fullheight = kc:getPageDim()
|
||||
local boxes = kc:getWordBoxes(0, 0, fullwidth, fullheight)
|
||||
Cache:insert(hash, CacheItem:new{ rfpgboxes = boxes })
|
||||
return boxes
|
||||
end
|
||||
else
|
||||
return cached.rfpgboxes
|
||||
end
|
||||
end
|
||||
|
||||
function KoptInterface:getTextBoxes(doc, pageno)
|
||||
local hash = "pgboxes|"..doc.file.."|"..pageno
|
||||
local cached = Cache:check(hash)
|
||||
if not cached then
|
||||
local kc_hash = "kctx|"..doc.file.."|"..pageno
|
||||
local kc = self:createContext(doc, pageno)
|
||||
kc:setDebug()
|
||||
local page = doc._document:openPage(pageno)
|
||||
page:getPagePix(kc)
|
||||
local fullwidth, fullheight = kc:getPageDim()
|
||||
local boxes = kc:getWordBoxes(0, 0, fullwidth, fullheight)
|
||||
Cache:insert(hash, CacheItem:new{ pgboxes = boxes })
|
||||
Cache:insert(kc_hash, ContextCacheItem:new{ kctx = kc })
|
||||
return boxes
|
||||
else
|
||||
return cached.pgboxes
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
get word from OCR in reflew page
|
||||
--]]
|
||||
function KoptInterface:getReflewOCRWord(doc, pageno, rect)
|
||||
local ocrengine = "ocrengine"
|
||||
if not Cache:check(ocrengine) then
|
||||
local dummy = KOPTContext.new()
|
||||
Cache:insert(ocrengine, OCREngine:new{ ocrengine = dummy })
|
||||
end
|
||||
self.ocr_lang = doc.configurable.doc_language
|
||||
local bbox = doc:getPageBBox(pageno)
|
||||
local context_hash = self:getContextHash(doc, pageno, bbox)
|
||||
local hash = "rfocrword|"..context_hash..rect.x..rect.y..rect.w..rect.h
|
||||
local cached = Cache:check(hash)
|
||||
if not cached then
|
||||
local kctx_hash = "kctx|"..context_hash
|
||||
local cached = Cache:check(kctx_hash)
|
||||
if cached then
|
||||
local kc = self:waitForContext(cached.kctx)
|
||||
local ok, word = pcall(
|
||||
kc.getTOCRWord, kc,
|
||||
rect.x, rect.y, rect.w, rect.h,
|
||||
self.tessocr_data, self.ocr_lang, self.ocr_type, 0, 1)
|
||||
Cache:insert(hash, CacheItem:new{ rfocrword = word })
|
||||
return word
|
||||
end
|
||||
else
|
||||
return cached.rfocrword
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
get word from OCR in non-reflew page
|
||||
--]]
|
||||
function KoptInterface:getOCRWord(doc, pageno, rect)
|
||||
local ocrengine = "ocrengine"
|
||||
if not Cache:check(ocrengine) then
|
||||
local dummy = KOPTContext.new()
|
||||
Cache:insert(ocrengine, OCREngine:new{ ocrengine = dummy })
|
||||
end
|
||||
self.ocr_lang = doc.configurable.doc_language
|
||||
local hash = "ocrword|"..doc.file.."|"..pageno..rect.x..rect.y..rect.w..rect.h
|
||||
local cached = Cache:check(hash)
|
||||
if not cached then
|
||||
local pgboxes_hash = "pgboxes|"..doc.file.."|"..pageno
|
||||
local pgboxes_cached = Cache:check(pgboxes_hash)
|
||||
local kc_hash = "kctx|"..doc.file.."|"..pageno
|
||||
local kc_cashed = Cache:check(kc_hash)
|
||||
if pgboxes_cached and kc_cashed then
|
||||
local kc = kc_cashed.kctx
|
||||
local ok, word = pcall(
|
||||
kc.getTOCRWord, kc,
|
||||
rect.x, rect.y, rect.w, rect.h,
|
||||
self.tessocr_data, self.ocr_lang, self.ocr_type, 0, 1)
|
||||
Cache:insert(hash, CacheItem:new{ ocrword = word })
|
||||
return word
|
||||
end
|
||||
else
|
||||
return cached.ocrword
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
get cached koptcontext for centain page. if context doesn't exist in cache make
|
||||
new context and reflow the src page immediatly, or wait background thread for
|
||||
@@ -271,19 +192,38 @@ function KoptInterface:getCachedContext(doc, pageno)
|
||||
end
|
||||
|
||||
--[[
|
||||
get reflowed page dimensions
|
||||
get page dimensions
|
||||
--]]
|
||||
function KoptInterface:getPageDimensions(doc, pageno, zoom, rotation)
|
||||
if doc.configurable.text_wrap == 1 then
|
||||
return self:getRFPageDimensions(doc, pageno, zoom, rotation)
|
||||
else
|
||||
return Document.getPageDimensions(doc, pageno, zoom, rotation)
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
get reflowed page dimensions
|
||||
--]]
|
||||
function KoptInterface:getRFPageDimensions(doc, pageno, zoom, rotation)
|
||||
local kc = self:getCachedContext(doc, pageno)
|
||||
local fullwidth, fullheight = kc:getPageDim()
|
||||
return Geom:new{ w = fullwidth, h = fullheight }
|
||||
end
|
||||
|
||||
function KoptInterface:renderPage(doc, pageno, rect, zoom, rotation, gamma, render_mode)
|
||||
if doc.configurable.text_wrap == 1 then
|
||||
return self:renderreflowedPage(doc, pageno, rect, zoom, rotation, render_mode)
|
||||
else
|
||||
return Document.renderPage(doc, pageno, rect, zoom, rotation, gamma, render_mode)
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
inherited from common document interface
|
||||
render reflowed page into tile cache.
|
||||
--]]
|
||||
function KoptInterface:renderPage(doc, pageno, rect, zoom, rotation, render_mode)
|
||||
function KoptInterface:renderreflowedPage(doc, pageno, rect, zoom, rotation, render_mode)
|
||||
doc.render_mode = render_mode
|
||||
local bbox = doc:getPageBBox(pageno)
|
||||
local context_hash = self:getContextHash(doc, pageno, bbox)
|
||||
@@ -315,13 +255,21 @@ function KoptInterface:renderPage(doc, pageno, rect, zoom, rotation, render_mode
|
||||
end
|
||||
end
|
||||
|
||||
function KoptInterface:hintPage(doc, pageno, zoom, rotation, gamma, render_mode)
|
||||
if doc.configurable.text_wrap == 1 then
|
||||
self:hintReflowedPage(doc, pageno, zoom, rotation, gamma, render_mode)
|
||||
else
|
||||
Document.hintPage(doc, pageno, zoom, rotation, gamma, render_mode)
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
inherited from common document interface render reflowed page into cache in
|
||||
background thread. this method returns immediatly leaving the precache flag on
|
||||
in context. subsequent usage of this context should wait for the precache flag
|
||||
off by calling self:waitForContext(kctx)
|
||||
--]]
|
||||
function KoptInterface:hintPage(doc, pageno, zoom, rotation, gamma, render_mode)
|
||||
function KoptInterface:hintReflowedPage(doc, pageno, zoom, rotation, gamma, render_mode)
|
||||
local bbox = doc:getPageBBox(pageno)
|
||||
local context_hash = self:getContextHash(doc, pageno, bbox)
|
||||
local kctx_hash = "kctx|"..context_hash
|
||||
@@ -341,11 +289,19 @@ function KoptInterface:hintPage(doc, pageno, zoom, rotation, gamma, render_mode)
|
||||
end
|
||||
end
|
||||
|
||||
function KoptInterface:drawPage(doc, target, x, y, rect, pageno, zoom, rotation, gamma, render_mode)
|
||||
if doc.configurable.text_wrap == 1 then
|
||||
self:drawReflowedPage(doc, target, x, y, rect, pageno, zoom, rotation, render_mode)
|
||||
else
|
||||
Document.drawPage(doc, target, x, y, rect, pageno, zoom, rotation, gamma, render_mode)
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
inherited from common document interface
|
||||
draw cached tile pixels into target blitbuffer.
|
||||
--]]
|
||||
function KoptInterface:drawPage(doc, target, x, y, rect, pageno, zoom, rotation, render_mode)
|
||||
function KoptInterface:drawReflowedPage(doc, target, x, y, rect, pageno, zoom, rotation, render_mode)
|
||||
local tile = self:renderPage(doc, pageno, rect, zoom, rotation, render_mode)
|
||||
--DEBUG("now painting", tile, rect)
|
||||
target:blitFrom(tile.bb,
|
||||
@@ -355,6 +311,417 @@ function KoptInterface:drawPage(doc, target, x, y, rect, pageno, zoom, rotation,
|
||||
rect.w, rect.h)
|
||||
end
|
||||
|
||||
--[[
|
||||
extract text boxes in a PDF/Djvu page
|
||||
returned boxes are in native page coordinates zoomed at 1.0
|
||||
--]]
|
||||
function KoptInterface:getTextBoxes(doc, pageno)
|
||||
local text = doc:getPageTextBoxes(pageno)
|
||||
if text and #text > 1 then
|
||||
return text
|
||||
-- if we have no text in original page then we will reuse native word boxes
|
||||
-- in reflow mode and find text boxes from scratch in non-reflow mode
|
||||
else
|
||||
if doc.configurable.text_wrap == 1 then
|
||||
return self:getNativeTextBoxes(doc, pageno)
|
||||
--return self:getTextBoxesFromScratch(doc, pageno)
|
||||
else
|
||||
return self:getTextBoxesFromScratch(doc, pageno)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
get text boxes in reflowed page via rectmaps in koptcontext
|
||||
--]]
|
||||
function KoptInterface:getReflowedTextBoxes(doc, pageno)
|
||||
local bbox = doc:getPageBBox(pageno)
|
||||
local context_hash = self:getContextHash(doc, pageno, bbox)
|
||||
local hash = "rfpgboxes|"..context_hash
|
||||
local cached = Cache:check(hash)
|
||||
if not cached then
|
||||
local kctx_hash = "kctx|"..context_hash
|
||||
local cached = Cache:check(kctx_hash)
|
||||
if cached then
|
||||
local kc = self:waitForContext(cached.kctx)
|
||||
--kc:setDebug()
|
||||
local fullwidth, fullheight = kc:getPageDim()
|
||||
local boxes = kc:getReflowedWordBoxes(0, 0, fullwidth, fullheight)
|
||||
Cache:insert(hash, CacheItem:new{ rfpgboxes = boxes })
|
||||
return boxes
|
||||
end
|
||||
else
|
||||
return cached.rfpgboxes
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
get text boxes in native page via rectmaps in koptcontext
|
||||
--]]
|
||||
function KoptInterface:getNativeTextBoxes(doc, pageno)
|
||||
local bbox = doc:getPageBBox(pageno)
|
||||
local context_hash = self:getContextHash(doc, pageno, bbox)
|
||||
local hash = "nativepgboxes|"..context_hash
|
||||
local cached = Cache:check(hash)
|
||||
if not cached then
|
||||
local kctx_hash = "kctx|"..context_hash
|
||||
local cached = Cache:check(kctx_hash)
|
||||
if cached then
|
||||
local kc = self:waitForContext(cached.kctx)
|
||||
--kc:setDebug()
|
||||
local fullwidth, fullheight = kc:getPageDim()
|
||||
local boxes = kc:getNativeWordBoxes(0, 0, fullwidth, fullheight)
|
||||
Cache:insert(hash, CacheItem:new{ nativepgboxes = boxes })
|
||||
return boxes
|
||||
end
|
||||
else
|
||||
return cached.nativepgboxes
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
get text boxes in native page via optical method,
|
||||
i.e. OCR pre-processing in Tesseract and Leptonica.
|
||||
--]]
|
||||
function KoptInterface:getTextBoxesFromScratch(doc, pageno)
|
||||
local hash = "pgboxes|"..doc.file.."|"..pageno
|
||||
local cached = Cache:check(hash)
|
||||
if not cached then
|
||||
local page_size = Document.getNativePageDimensions(doc, pageno)
|
||||
local bbox = {
|
||||
x0 = 0, y0 = 0,
|
||||
x1 = page_size.w,
|
||||
y1 = page_size.h,
|
||||
}
|
||||
local kc = self:createContext(doc, pageno, bbox)
|
||||
kc:setZoom(1.0)
|
||||
local page = doc._document:openPage(pageno)
|
||||
page:getPagePix(kc)
|
||||
local boxes = kc:getNativeWordBoxes(0, 0, page_size.w, page_size.h)
|
||||
Cache:insert(hash, CacheItem:new{ pgboxes = boxes })
|
||||
page:close()
|
||||
kc:free()
|
||||
return boxes
|
||||
else
|
||||
return cached.pgboxes
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
OCR word inside the rect area of the page
|
||||
rect should be in native page coordinates
|
||||
--]]
|
||||
function KoptInterface:getOCRWord(doc, pageno, rect)
|
||||
local ocrengine = "ocrengine"
|
||||
if not Cache:check(ocrengine) then
|
||||
local dummy = KOPTContext.new()
|
||||
Cache:insert(ocrengine, OCREngine:new{ ocrengine = dummy })
|
||||
end
|
||||
self.ocr_lang = doc.configurable.doc_language
|
||||
local hash = "ocrword|"..doc.file.."|"..pageno..rect.x..rect.y..rect.w..rect.h
|
||||
local cached = Cache:check(hash)
|
||||
if not cached then
|
||||
local bbox = {
|
||||
x0 = rect.x,
|
||||
y0 = rect.y,
|
||||
x1 = rect.x + rect.w,
|
||||
y1 = rect.y + rect.h,
|
||||
}
|
||||
local kc = self:createContext(doc, pageno, bbox)
|
||||
--kc:setZoom(30/rect.h)
|
||||
kc:setZoom(1.0)
|
||||
local page = doc._document:openPage(pageno)
|
||||
page:getPagePix(kc)
|
||||
local word_w, word_h = kc:getPageDim()
|
||||
local ok, word = pcall(
|
||||
kc.getTOCRWord, kc,
|
||||
0, 0, word_w, word_h,
|
||||
self.tessocr_data, self.ocr_lang, self.ocr_type, 0, 1)
|
||||
Cache:insert(hash, CacheItem:new{ ocrword = word })
|
||||
page:close()
|
||||
kc:free()
|
||||
return word
|
||||
else
|
||||
return cached.ocrword
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
get index of nearest word box around pos
|
||||
--]]
|
||||
local function getWordBoxIndices(boxes, pos)
|
||||
local function inside_box(box)
|
||||
local x, y = pos.x, pos.y
|
||||
if box.x0 <= x and box.y0 <= y and box.x1 >= x and box.y1 >= y then
|
||||
return true
|
||||
end
|
||||
return false
|
||||
end
|
||||
local function box_distance(i, j)
|
||||
local wb = boxes[i][j]
|
||||
if inside_box(wb) then
|
||||
return 0
|
||||
else
|
||||
local x0, y0 = pos.x, pos.y
|
||||
local x1, y1 = (wb.x0 + wb.x1) / 2, (wb.y0 + wb.y1) / 2
|
||||
return (x0 - x1)*(x0 - x1) + (y0 - y1)*(y0 - y1)
|
||||
end
|
||||
end
|
||||
|
||||
local m, n = 1, 1
|
||||
for i = 1, #boxes do
|
||||
for j = 1, #boxes[i] do
|
||||
if box_distance(i, j) < box_distance(m, n) then
|
||||
m, n = i, j
|
||||
end
|
||||
end
|
||||
end
|
||||
return m, n
|
||||
end
|
||||
|
||||
--[[
|
||||
get word and word box around pos
|
||||
--]]
|
||||
function KoptInterface:getWordFromBoxes(boxes, pos)
|
||||
local i, j = getWordBoxIndices(boxes, pos)
|
||||
local lb = boxes[i]
|
||||
local wb = boxes[i][j]
|
||||
if lb and wb then
|
||||
local box = Geom:new{
|
||||
x = wb.x0, y = lb.y0,
|
||||
w = wb.x1 - wb.x0,
|
||||
h = lb.y1 - lb.y0,
|
||||
}
|
||||
return {
|
||||
word = wb.word,
|
||||
box = box,
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
get text and text boxes between pos0 and pos1
|
||||
--]]
|
||||
function KoptInterface:getTextFromBoxes(boxes, pos0, pos1)
|
||||
local line_text = ""
|
||||
local line_boxes = {}
|
||||
local i_start, j_start = getWordBoxIndices(boxes, pos0)
|
||||
local i_stop, j_stop = getWordBoxIndices(boxes, pos1)
|
||||
if i_start == i_stop and j_start > j_stop or i_start > i_stop then
|
||||
i_start, i_stop = i_stop, i_start
|
||||
j_start, j_stop = j_stop, j_start
|
||||
end
|
||||
for i = i_start, i_stop do
|
||||
if i_start == i_stop and #boxes[i] == 0 then break end
|
||||
-- insert line words
|
||||
local j0 = i > i_start and 1 or j_start
|
||||
local j1 = i < i_stop and #boxes[i] or j_stop
|
||||
for j = j0, j1 do
|
||||
local word = boxes[i][j].word
|
||||
if word then
|
||||
-- if last character of this word is an ascii char then append a space
|
||||
local space = (word:match("[%z\194-\244][\128-\191]*$") or j == j1)
|
||||
and "" or " "
|
||||
line_text = line_text..word..space
|
||||
end
|
||||
end
|
||||
-- insert line box
|
||||
local lb = boxes[i]
|
||||
if i > i_start and i < i_stop then
|
||||
local line_box = Geom:new{
|
||||
x = lb.x0, y = lb.y0,
|
||||
w = lb.x1 - lb.x0,
|
||||
h = lb.y1 - lb.y0,
|
||||
}
|
||||
table.insert(line_boxes, line_box)
|
||||
elseif i == i_start and i < i_stop then
|
||||
local wb = boxes[i][j_start]
|
||||
local line_box = Geom:new{
|
||||
x = wb.x0, y = lb.y0,
|
||||
w = lb.x1 - wb.x0,
|
||||
h = lb.y1 - lb.y0,
|
||||
}
|
||||
table.insert(line_boxes, line_box)
|
||||
elseif i > i_start and i == i_stop then
|
||||
local wb = boxes[i][j_stop]
|
||||
local line_box = Geom:new{
|
||||
x = lb.x0, y = lb.y0,
|
||||
w = wb.x1 - lb.x0,
|
||||
h = lb.y1 - lb.y0,
|
||||
}
|
||||
table.insert(line_boxes, line_box)
|
||||
elseif i == i_start and i == i_stop then
|
||||
local wb_start = boxes[i][j_start]
|
||||
local wb_stop = boxes[i][j_stop]
|
||||
local line_box = Geom:new{
|
||||
x = wb_start.x0, y = lb.y0,
|
||||
w = wb_stop.x1 - wb_start.x0,
|
||||
h = lb.y1 - lb.y0,
|
||||
}
|
||||
table.insert(line_boxes, line_box)
|
||||
end
|
||||
end
|
||||
return {
|
||||
text = line_text,
|
||||
boxes = line_boxes,
|
||||
}
|
||||
end
|
||||
|
||||
--[[
|
||||
get word and word box from doc position
|
||||
]]--
|
||||
function KoptInterface:getWordFromPosition(doc, pos)
|
||||
local text_boxes = self:getTextBoxes(doc, pos.page)
|
||||
if text_boxes then
|
||||
if doc.configurable.text_wrap == 1 then
|
||||
return self:getWordFromReflowPosition(doc, text_boxes, pos)
|
||||
else
|
||||
return self:getWordFromNativePosition(doc, text_boxes, pos)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
get word and word box from position in reflowed page
|
||||
]]--
|
||||
function KoptInterface:getWordFromReflowPosition(doc, boxes, pos)
|
||||
local pageno = pos.page
|
||||
local reflowed_page_boxes = self:getReflowedTextBoxes(doc, pageno)
|
||||
local reflowed_word_box = self:getWordFromBoxes(reflowed_page_boxes, pos)
|
||||
local reflowed_pos = reflowed_word_box.box:center()
|
||||
local native_pos = self:reflowToNativePosTransform(doc, pageno, reflowed_pos)
|
||||
local native_word_box = self:getWordFromBoxes(boxes, native_pos)
|
||||
local word_box = {
|
||||
word = native_word_box.word,
|
||||
pbox = native_word_box.box, -- box on page
|
||||
sbox = reflowed_word_box.box, -- box on screen
|
||||
pos = native_pos,
|
||||
}
|
||||
return word_box
|
||||
end
|
||||
|
||||
--[[
|
||||
get word and word box from position in native page
|
||||
]]--
|
||||
function KoptInterface:getWordFromNativePosition(doc, boxes, pos)
|
||||
DEBUG("boxes", boxes)
|
||||
local native_word_box = self:getWordFromBoxes(boxes, pos)
|
||||
local word_box = {
|
||||
word = native_word_box.word,
|
||||
pbox = native_word_box.box, -- box on page
|
||||
sbox = native_word_box.box, -- box on screen
|
||||
pos = pos,
|
||||
}
|
||||
return word_box
|
||||
end
|
||||
|
||||
--[[
|
||||
transform position in native page to reflowed page
|
||||
]]--
|
||||
function KoptInterface:nativeToReflowPosTransform(doc, pageno, pos)
|
||||
local bbox = doc:getPageBBox(pageno)
|
||||
local context_hash = self:getContextHash(doc, pageno, bbox)
|
||||
local kctx_hash = "kctx|"..context_hash
|
||||
local cached = Cache:check(kctx_hash)
|
||||
local kc = self:waitForContext(cached.kctx)
|
||||
--kc:setDebug()
|
||||
--DEBUG("transform native pos", pos)
|
||||
local rpos = {}
|
||||
rpos.x, rpos.y = kc:nativeToReflowPosTransform(pos.x, pos.y)
|
||||
--DEBUG("transformed reflowed pos", rpos)
|
||||
return rpos
|
||||
end
|
||||
|
||||
--[[
|
||||
transform position in reflowed page to native page
|
||||
]]--
|
||||
function KoptInterface:reflowToNativePosTransform(doc, pageno, pos)
|
||||
local bbox = doc:getPageBBox(pageno)
|
||||
local context_hash = self:getContextHash(doc, pageno, bbox)
|
||||
local kctx_hash = "kctx|"..context_hash
|
||||
local cached = Cache:check(kctx_hash)
|
||||
local kc = self:waitForContext(cached.kctx)
|
||||
--kc:setDebug()
|
||||
--DEBUG("transform reflowed pos", pos)
|
||||
local npos = {}
|
||||
npos.x, npos.y = kc:reflowToNativePosTransform(pos.x, pos.y)
|
||||
--DEBUG("transformed native pos", npos)
|
||||
return npos
|
||||
end
|
||||
|
||||
--[[
|
||||
get text and text boxes from screen positions
|
||||
--]]
|
||||
function KoptInterface:getTextFromPositions(doc, pos0, pos1)
|
||||
local text_boxes = self:getTextBoxes(doc, pos0.page)
|
||||
if text_boxes then
|
||||
if doc.configurable.text_wrap == 1 then
|
||||
return self:getTextFromReflowPositions(doc, text_boxes, pos0, pos1)
|
||||
else
|
||||
return self:getTextFromNativePositions(doc, text_boxes, pos0, pos1)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
get text and text boxes from screen positions for reflowed page
|
||||
]]--
|
||||
function KoptInterface:getTextFromReflowPositions(doc, native_boxes, pos0, pos1)
|
||||
local pageno = pos0.page
|
||||
local reflowed_page_boxes = self:getReflowedTextBoxes(doc, pageno)
|
||||
local reflowed_box0 = self:getWordFromBoxes(reflowed_page_boxes, pos0)
|
||||
local reflowed_pos0 = reflowed_box0.box:center()
|
||||
local native_pos0 = self:reflowToNativePosTransform(doc, pageno, reflowed_pos0)
|
||||
|
||||
local reflowed_box1 = self:getWordFromBoxes(reflowed_page_boxes, pos1)
|
||||
local reflowed_pos1 = reflowed_box1.box:center()
|
||||
local native_pos1 = self:reflowToNativePosTransform(doc, pageno, reflowed_pos1)
|
||||
|
||||
local reflowed_text_boxes = self:getTextFromBoxes(reflowed_page_boxes, pos0, pos1)
|
||||
local native_text_boxes = self:getTextFromBoxes(native_boxes, pos0, pos1)
|
||||
local text_boxes = {
|
||||
text = native_text_boxes.text,
|
||||
pboxes = native_text_boxes.boxes, -- boxes on page
|
||||
sboxes = reflowed_text_boxes.boxes, -- boxes on screen
|
||||
pos0 = native_pos0,
|
||||
pos1 = native_pos1
|
||||
}
|
||||
return text_boxes
|
||||
end
|
||||
|
||||
--[[
|
||||
get text and text boxes from screen positions for native page
|
||||
]]--
|
||||
function KoptInterface:getTextFromNativePositions(doc, native_boxes, pos0, pos1)
|
||||
local native_text_boxes = self:getTextFromBoxes(native_boxes, pos0, pos1)
|
||||
local text_boxes = {
|
||||
word = native_text_boxes.text,
|
||||
pboxes = native_text_boxes.boxes, -- boxes on page
|
||||
sboxes = native_text_boxes.boxes, -- boxes on screen
|
||||
pos0 = pos0,
|
||||
pos1 = pos1,
|
||||
}
|
||||
return text_boxes
|
||||
end
|
||||
|
||||
--[[
|
||||
get text boxes from page positions
|
||||
--]]
|
||||
function KoptInterface:getPageBoxesFromPositions(doc, pageno, ppos0, ppos1)
|
||||
if not ppos0 or not ppos1 then return end
|
||||
if doc.configurable.text_wrap == 1 then
|
||||
local spos0 = self:nativeToReflowPosTransform(doc, pageno, ppos0)
|
||||
local spos1 = self:nativeToReflowPosTransform(doc, pageno, ppos1)
|
||||
local page_boxes = self:getReflowedTextBoxes(doc, pageno)
|
||||
local text_boxes = self:getTextFromBoxes(page_boxes, spos0, spos1)
|
||||
return text_boxes.boxes
|
||||
else
|
||||
local page_boxes = self:getTextBoxes(doc, pageno)
|
||||
local text_boxes = self:getTextFromBoxes(page_boxes, ppos0, ppos1)
|
||||
return text_boxes.boxes
|
||||
end
|
||||
end
|
||||
|
||||
--[[
|
||||
helper functions
|
||||
--]]
|
||||
|
||||
Reference in New Issue
Block a user