mirror of
https://github.com/koreader/koreader.git
synced 2025-08-10 00:52:38 +00:00
Fulltext search: all entries in entire document (#11313)
This commit is contained in:
@@ -1097,49 +1097,60 @@ function KoptInterface:getWordFromNativePosition(doc, boxes, pos)
|
||||
return word_box
|
||||
end
|
||||
|
||||
function KoptInterface:getSelectedWordContext(word, nb_words, pos)
|
||||
local boxes = self.last_text_boxes
|
||||
if not pos or not boxes or #boxes == 0 then return end
|
||||
local i, j = getWordBoxIndices(boxes, pos)
|
||||
if boxes[i][j].word ~= word then return end
|
||||
|
||||
local li, wi = i, j
|
||||
local prev_count, next_count = 0, 0
|
||||
local prev_text, next_text = {}, {}
|
||||
local function get_prev_text(boxes, i, j, nb_words)
|
||||
local prev_count = 0
|
||||
local prev_text = {}
|
||||
while prev_count < nb_words do
|
||||
if li == 1 and wi == 1 then
|
||||
if i == 1 and j == 1 then
|
||||
break
|
||||
elseif wi == 1 then
|
||||
li = li - 1
|
||||
wi = #boxes[li]
|
||||
elseif j == 1 then
|
||||
i = i - 1
|
||||
j = #boxes[i]
|
||||
else
|
||||
wi = wi - 1
|
||||
j = j - 1
|
||||
end
|
||||
local current_word = boxes[li][wi].word
|
||||
local current_word = boxes[i][j].word
|
||||
if #current_word > 0 then
|
||||
table.insert(prev_text, 1, current_word)
|
||||
prev_count = prev_count + 1
|
||||
end
|
||||
end
|
||||
if #prev_text > 0 then
|
||||
return table.concat(prev_text, " ")
|
||||
end
|
||||
end
|
||||
|
||||
li, wi = i, j
|
||||
local function get_next_text(boxes, i, j, nb_words)
|
||||
local next_count = 0
|
||||
local next_text = {}
|
||||
while next_count < nb_words do
|
||||
if li == #boxes and wi == #boxes[li] then
|
||||
if i == #boxes and j == #boxes[i] then
|
||||
break
|
||||
elseif wi == #boxes[li] then
|
||||
li = li + 1
|
||||
wi = 1
|
||||
elseif j == #boxes[i] then
|
||||
i = i + 1
|
||||
j = 1
|
||||
else
|
||||
wi = wi + 1
|
||||
j = j + 1
|
||||
end
|
||||
local current_word = boxes[li][wi].word
|
||||
local current_word = boxes[i][j].word
|
||||
if #current_word > 0 then
|
||||
table.insert(next_text, current_word)
|
||||
next_count = next_count + 1
|
||||
end
|
||||
end
|
||||
if #prev_text == 0 and #next_text == 0 then return end
|
||||
return table.concat(prev_text, " "), table.concat(next_text, " ")
|
||||
if #next_text > 0 then
|
||||
return table.concat(next_text, " ")
|
||||
end
|
||||
end
|
||||
|
||||
function KoptInterface:getSelectedWordContext(word, nb_words, pos)
|
||||
local boxes = self.last_text_boxes
|
||||
if not pos or not boxes or #boxes == 0 then return end
|
||||
local i, j = getWordBoxIndices(boxes, pos)
|
||||
if boxes[i][j].word ~= word then return end
|
||||
local prev_text = get_prev_text(boxes, i, j, nb_words)
|
||||
local next_text = get_next_text(boxes, i, j, nb_words)
|
||||
return prev_text, next_text
|
||||
end
|
||||
|
||||
--[[--
|
||||
@@ -1336,19 +1347,23 @@ function KoptInterface:nativeToPageRectTransform(doc, pageno, rect)
|
||||
end
|
||||
end
|
||||
|
||||
local function all_matches(boxes, pattern, caseInsensitive)
|
||||
local function get_pattern_list(pattern, case_insensitive)
|
||||
-- pattern list of single words
|
||||
local plist = {}
|
||||
-- (as in util.splitToWords(), but only splitting on spaces, keeping punctuations)
|
||||
for word in util.gsplit(pattern, "%s+") do
|
||||
if util.hasCJKChar(word) then
|
||||
for char in util.gsplit(word, "[\192-\255][\128-\191]+", true) do
|
||||
table.insert(plist, caseInsensitive and Utf8Proc.lowercase(util.fixUtf8(char, "?")) or char)
|
||||
table.insert(plist, case_insensitive and Utf8Proc.lowercase(util.fixUtf8(char, "?")) or char)
|
||||
end
|
||||
else
|
||||
table.insert(plist, caseInsensitive and Utf8Proc.lowercase(util.fixUtf8(word, "?")) or word)
|
||||
table.insert(plist, case_insensitive and Utf8Proc.lowercase(util.fixUtf8(word, "?")) or word)
|
||||
end
|
||||
end
|
||||
return plist
|
||||
end
|
||||
|
||||
local function all_matches(boxes, plist, case_insensitive)
|
||||
local pnb = #plist
|
||||
-- return mached word indices from index i, j
|
||||
local function match(i, j)
|
||||
@@ -1362,7 +1377,7 @@ local function all_matches(boxes, pattern, caseInsensitive)
|
||||
end
|
||||
if i > #boxes then break end
|
||||
local box = boxes[i][j]
|
||||
local word = caseInsensitive and Utf8Proc.lowercase(util.fixUtf8(box.word, "?")) or box.word
|
||||
local word = case_insensitive and Utf8Proc.lowercase(util.fixUtf8(box.word, "?")) or box.word
|
||||
local pword = plist[pindex]
|
||||
local matched
|
||||
if pnb == 1 then -- single word in plist
|
||||
@@ -1407,11 +1422,12 @@ local function all_matches(boxes, pattern, caseInsensitive)
|
||||
end)
|
||||
end
|
||||
|
||||
function KoptInterface:findAllMatches(doc, pattern, caseInsensitive, page)
|
||||
function KoptInterface:findAllMatches(doc, pattern, case_insensitive, page)
|
||||
local text_boxes = doc:getPageTextBoxes(page)
|
||||
if not text_boxes then return end
|
||||
local plist = get_pattern_list(pattern, case_insensitive)
|
||||
local matches = {}
|
||||
for indices in all_matches(text_boxes or {}, pattern, caseInsensitive) do
|
||||
for indices in all_matches(text_boxes, plist, case_insensitive) do
|
||||
for _, index in ipairs(indices) do
|
||||
local i, j = unpack(index)
|
||||
local word = text_boxes[i][j]
|
||||
@@ -1427,8 +1443,8 @@ function KoptInterface:findAllMatches(doc, pattern, caseInsensitive, page)
|
||||
return matches
|
||||
end
|
||||
|
||||
function KoptInterface:findText(doc, pattern, origin, reverse, caseInsensitive, pageno)
|
||||
logger.dbg("Koptinterface: find text", pattern, origin, reverse, caseInsensitive, pageno)
|
||||
function KoptInterface:findText(doc, pattern, origin, reverse, case_insensitive, pageno)
|
||||
logger.dbg("Koptinterface: find text", pattern, origin, reverse, case_insensitive, pageno)
|
||||
local last_pageno = doc:getPageCount()
|
||||
local start_page, end_page
|
||||
if reverse == 1 then
|
||||
@@ -1456,7 +1472,7 @@ function KoptInterface:findText(doc, pattern, origin, reverse, caseInsensitive,
|
||||
end
|
||||
end
|
||||
for i = start_page, end_page, (reverse == 1) and -1 or 1 do
|
||||
local matches = self:findAllMatches(doc, pattern, caseInsensitive, i)
|
||||
local matches = self:findAllMatches(doc, pattern, case_insensitive, i)
|
||||
if #matches > 0 then
|
||||
matches.page = i
|
||||
return matches
|
||||
@@ -1464,6 +1480,58 @@ function KoptInterface:findText(doc, pattern, origin, reverse, caseInsensitive,
|
||||
end
|
||||
end
|
||||
|
||||
function KoptInterface:findAllText(doc, pattern, case_insensitive, nb_context_words, max_hits)
|
||||
local plist = get_pattern_list(pattern, case_insensitive)
|
||||
local res = {}
|
||||
for page = 1, doc:getPageCount() do
|
||||
local text_boxes = doc:getPageTextBoxes(page)
|
||||
if text_boxes then
|
||||
for indices in all_matches(text_boxes, plist, case_insensitive) do -- each found pattern in the page
|
||||
local res_item = { -- item of the Menu item_table
|
||||
text = nil,
|
||||
mandatory = page,
|
||||
boxes = {}, -- to draw temp highlight in onMenuSelect
|
||||
}
|
||||
local text = {}
|
||||
local i_prev, j_prev, i_next, j_next
|
||||
for ind, index in ipairs(indices) do -- each word in the pattern
|
||||
local i, j = unpack(index)
|
||||
local word = text_boxes[i][j]
|
||||
res_item.boxes[ind] = {
|
||||
x = word.x0, y = word.y0,
|
||||
w = word.x1 - word.x0,
|
||||
h = word.y1 - word.y0,
|
||||
}
|
||||
text[ind] = word.word
|
||||
if ind == 1 then
|
||||
i_prev, j_prev = i, j
|
||||
end
|
||||
if ind == #indices then
|
||||
i_next, j_next = i, j
|
||||
end
|
||||
end
|
||||
text = "【" .. table.concat(text, " ") .. "】"
|
||||
local prev_text = get_prev_text(text_boxes, i_prev, j_prev, nb_context_words)
|
||||
if prev_text then
|
||||
text = prev_text .. text
|
||||
end
|
||||
local next_text = get_next_text(text_boxes, i_next, j_next, nb_context_words)
|
||||
if next_text then
|
||||
text = text .. next_text
|
||||
end
|
||||
res_item.text = text
|
||||
table.insert(res, res_item)
|
||||
if #res == max_hits then
|
||||
return res
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
if #res > 0 then
|
||||
return res
|
||||
end
|
||||
end
|
||||
|
||||
--[[--
|
||||
Log reflow duration.
|
||||
--]]
|
||||
|
||||
Reference in New Issue
Block a user