mirror of
https://github.com/koreader/koreader.git
synced 2025-08-10 00:52:38 +00:00
PDF text selection: fix/tweak spacing between words/boxes
We may get multiple boxes when selecting texts, one for each word, and we have to add spaces between the extracted words ourselves. Previously, we were only adding a space if the last char of previous word was ASCII, so missing spaces after accents or greek words. Try to do better by measuring the distances between boxes and comparing to box heights, with a few heuristics.
This commit is contained in:
@@ -1287,16 +1287,20 @@ function ReaderHighlight:onUnhighlight(bookmark_item)
|
||||
sel_pos0 = self.selected_text.pos0
|
||||
end
|
||||
if self.ui.document.info.has_pages then -- We can safely use page
|
||||
-- As we may have changed spaces and hyphens handling in the extracted
|
||||
-- text over the years, check text identities with them removed
|
||||
local sel_text_cleaned = sel_text:gsub("[ -]", ""):gsub("\xC2\xAD", "")
|
||||
for index = 1, #self.view.highlight.saved[page] do
|
||||
local highlight = self.view.highlight.saved[page][index]
|
||||
-- pos0 are tables and can't be compared directly, except when from
|
||||
-- DictQuickLookup where these are the same object.
|
||||
-- If bookmark_item provided, just check datetime
|
||||
if highlight.text == sel_text and (
|
||||
(datetime == nil and highlight.pos0 == sel_pos0) or
|
||||
(datetime ~= nil and highlight.datetime == datetime)) then
|
||||
idx = index
|
||||
break
|
||||
if ( (datetime == nil and highlight.pos0 == sel_pos0) or
|
||||
(datetime ~= nil and highlight.datetime == datetime) ) then
|
||||
if highlight.text:gsub("[ -]", ""):gsub("\xC2\xAD", "") == sel_text_cleaned then
|
||||
idx = index
|
||||
break
|
||||
end
|
||||
end
|
||||
end
|
||||
else -- page is a xpointer
|
||||
|
||||
Reference in New Issue
Block a user