diff --git a/frontend/apps/reader/modules/readerdictionary.lua b/frontend/apps/reader/modules/readerdictionary.lua index ccdc1f907..ab214814c 100644 --- a/frontend/apps/reader/modules/readerdictionary.lua +++ b/frontend/apps/reader/modules/readerdictionary.lua @@ -214,7 +214,8 @@ function ReaderDictionary:addToMainMenu(menu_items) os.date("%Y-%m-%d %H:%M:%S", value.time), value.word, callback = function() - self:onLookupWord(value.word) + -- Word had been cleaned before being added to history + self:onLookupWord(value.word, true) end }) end @@ -385,10 +386,10 @@ function ReaderDictionary:addToMainMenu(menu_items) end end -function ReaderDictionary:onLookupWord(word, box, highlight, link) +function ReaderDictionary:onLookupWord(word, is_sane, box, highlight, link) logger.dbg("dict lookup word:", word, box) -- escape quotes and other funny characters in word - word = self:cleanSelection(word) + word = self:cleanSelection(word, is_sane) logger.dbg("dict stripped word:", word) self.highlight = highlight @@ -609,7 +610,7 @@ local function tidyMarkup(results) return results end -function ReaderDictionary:cleanSelection(text) +function ReaderDictionary:cleanSelection(text, is_sane) -- Will be used by ReaderWikipedia too if not text then return "" @@ -618,31 +619,33 @@ function ReaderDictionary:cleanSelection(text) -- some cleanup is still needed for selection we get from other engines -- (example: pdf selection "qu’autrefois," will be cleaned to "autrefois") -- + -- Replace no-break space with regular space + text = text:gsub("\xC2\xA0", ' ') -- U+00A0 no-break space -- Trim any space at start or end text = text:gsub("^%s+", "") text = text:gsub("%s+$", "") - -- Replace extended quote (included in the general puncturation range) - -- with plain ascii quote (for french words like "aujourd’hui") - text = text:gsub("\xE2\x80\x99", "'") -- U+2019 (right single quotation mark) - -- Strip punctuation characters around selection - text = util.stripPunctuation(text) - -- Strip some common english grammatical construct - text = text:gsub("'s$", '') -- english possessive - -- Strip some common french grammatical constructs - text = text:gsub("^[LSDMNTlsdmnt]'", '') -- french l' s' t'... - text = text:gsub("^[Qq][Uu]'", '') -- french qu' - -- Replace no-break space with regular space - text = text:gsub("\xC2\xA0", ' ') -- U+00A0 no-break space - -- There may be a need to remove some (all?) diacritical marks - -- https://en.wikipedia.org/wiki/Combining_character#Unicode_ranges - -- see discussion at https://github.com/koreader/koreader/issues/1649 - -- Commented for now, will have to be checked by people who read - -- languages and texts that use them. - -- text = text:gsub("\204[\128-\191]", '') -- U+0300 to U+033F - -- text = text:gsub("\205[\128-\175]", '') -- U+0340 to U+036F - -- Trim any space now at start or end after above changes - text = text:gsub("^%s+", "") - text = text:gsub("%s+$", "") + if not is_sane then + -- Replace extended quote (included in the general puncturation range) + -- with plain ascii quote (for french words like "aujourd’hui") + text = text:gsub("\xE2\x80\x99", "'") -- U+2019 (right single quotation mark) + -- Strip punctuation characters around selection + text = util.stripPunctuation(text) + -- Strip some common english grammatical construct + text = text:gsub("'s$", '') -- english possessive + -- Strip some common french grammatical constructs + text = text:gsub("^[LSDMNTlsdmnt]'", '') -- french l' s' t'... + text = text:gsub("^[Qq][Uu]'", '') -- french qu' + -- There may be a need to remove some (all?) diacritical marks + -- https://en.wikipedia.org/wiki/Combining_character#Unicode_ranges + -- see discussion at https://github.com/koreader/koreader/issues/1649 + -- Commented for now, will have to be checked by people who read + -- languages and texts that use them. + -- text = text:gsub("\204[\128-\191]", '') -- U+0300 to U+033F + -- text = text:gsub("\205[\128-\175]", '') -- U+0340 to U+036F + -- Trim any space now at start or end after above changes + text = text:gsub("^%s+", "") + text = text:gsub("%s+$", "") + end return text end @@ -680,7 +683,8 @@ function ReaderDictionary:onShowDictionaryLookup() is_enter_default = true, callback = function() UIManager:close(self.dictionary_lookup_dialog) - self:onLookupWord(self.dictionary_lookup_dialog:getInputText()) + -- Trust that input text does not need any cleaning (allows querying for "-suffix") + self:onLookupWord(self.dictionary_lookup_dialog:getInputText(), true) end, }, } diff --git a/frontend/apps/reader/modules/readerhighlight.lua b/frontend/apps/reader/modules/readerhighlight.lua index ba32f6f76..8739c6d5c 100644 --- a/frontend/apps/reader/modules/readerhighlight.lua +++ b/frontend/apps/reader/modules/readerhighlight.lua @@ -937,14 +937,14 @@ function ReaderHighlight:lookup(selected_word, selected_link) -- if we extracted text directly if selected_word.word then local word_box = self.view:pageToScreenTransform(self.hold_pos.page, selected_word.sbox) - self.ui:handleEvent(Event:new("LookupWord", selected_word.word, word_box, self, selected_link)) + self.ui:handleEvent(Event:new("LookupWord", selected_word.word, false, word_box, self, selected_link)) -- or we will do OCR elseif selected_word.sbox and self.hold_pos then local word = self.ui.document:getOCRWord(self.hold_pos.page, selected_word) logger.dbg("OCRed word:", word) if word and word ~= "" then local word_box = self.view:pageToScreenTransform(self.hold_pos.page, selected_word.sbox) - self.ui:handleEvent(Event:new("LookupWord", word, word_box, self, selected_link)) + self.ui:handleEvent(Event:new("LookupWord", word, false, word_box, self, selected_link)) else UIManager:show(InfoMessage:new{ text = info_message_ocr_text, diff --git a/frontend/apps/reader/modules/readerlink.lua b/frontend/apps/reader/modules/readerlink.lua index e08dceb4b..7b55216b0 100644 --- a/frontend/apps/reader/modules/readerlink.lua +++ b/frontend/apps/reader/modules/readerlink.lua @@ -711,7 +711,7 @@ function ReaderLink:onGoToExternalLink(link_url) callback = function() UIManager:nextTick(function() UIManager:close(dialog) - self.ui:handleEvent(Event:new("LookupWikipedia", wiki_page, false, true, wiki_lang)) + self.ui:handleEvent(Event:new("LookupWikipedia", wiki_page, true, false, true, wiki_lang)) end) end, }) diff --git a/frontend/apps/reader/modules/readerwikipedia.lua b/frontend/apps/reader/modules/readerwikipedia.lua index fd89d2ee8..7668748f9 100644 --- a/frontend/apps/reader/modules/readerwikipedia.lua +++ b/frontend/apps/reader/modules/readerwikipedia.lua @@ -53,7 +53,8 @@ function ReaderWikipedia:lookupInput() is_enter_default = true, callback = function() UIManager:close(self.input_dialog) - self:onLookupWikipedia(self.input_dialog:getInputText()) + -- Trust that input text does not need any cleaning (allows querying for "-suffix") + self:onLookupWikipedia(self.input_dialog:getInputText(), true) end, }, } @@ -98,7 +99,8 @@ function ReaderWikipedia:addToMainMenu(menu_items) os.date("%Y-%m-%d %H:%M:%S", value.time), text, callback = function() - self:onLookupWikipedia(value.word, nil, value.page, value.lang) + -- Word had been cleaned before being added to history + self:onLookupWikipedia(value.word, true, nil, value.page, value.lang) end }) end @@ -375,16 +377,16 @@ function ReaderWikipedia:initLanguages(word) end end -function ReaderWikipedia:onLookupWikipedia(word, box, get_fullpage, forced_lang) +function ReaderWikipedia:onLookupWikipedia(word, is_sane, box, get_fullpage, forced_lang) -- Wrapped through Trapper, as we may be using Trapper:dismissableRunInSubprocess() in it Trapper:wrap(function() - self:lookupWikipedia(word, box, get_fullpage, forced_lang) + self:lookupWikipedia(word, is_sane, box, get_fullpage, forced_lang) end) return true end -function ReaderWikipedia:lookupWikipedia(word, box, get_fullpage, forced_lang) - if NetworkMgr:willRerunWhenOnline(function() self:lookupWikipedia(word, box, get_fullpage, forced_lang) end) then +function ReaderWikipedia:lookupWikipedia(word, is_sane, box, get_fullpage, forced_lang) + if NetworkMgr:willRerunWhenOnline(function() self:lookupWikipedia(word, is_sane, box, get_fullpage, forced_lang) end) then -- Not online yet, nothing more to do here, NetworkMgr will forward the callback and run it once connected! return end @@ -404,7 +406,7 @@ function ReaderWikipedia:lookupWikipedia(word, box, get_fullpage, forced_lang) -- no need to clean word if get_fullpage, as it is the exact wikipetia page title if word and not get_fullpage then -- escape quotes and other funny characters in word - word = self:cleanSelection(word) + word = self:cleanSelection(word, is_sane) -- no need to lower() word with wikipedia search end logger.dbg("stripped word:", word) diff --git a/frontend/ui/widget/dictquicklookup.lua b/frontend/ui/widget/dictquicklookup.lua index ff4f17c92..c2450d222 100644 --- a/frontend/ui/widget/dictquicklookup.lua +++ b/frontend/ui/widget/dictquicklookup.lua @@ -1102,7 +1102,8 @@ function DictQuickLookup:inputLookup() else event = "LookupWord" end - self.ui:handleEvent(Event:new(event, word)) + -- Trust that input text does not need any cleaning (allows querying for "-suffix") + self.ui:handleEvent(Event:new(event, word, true)) end end @@ -1131,18 +1132,21 @@ end function DictQuickLookup:lookupWikipedia(get_fullpage) local word + local is_sane if get_fullpage then -- we use the word of the displayed result's definition, which -- is the exact title of the full wikipedia page word = self.lookupword + is_sane = true else -- we use the original word that was querried word = self.word + is_sane = false end self:resyncWikiLanguages() - -- strange : we need to pass false instead of nil if word_box is nil, - -- otherwise get_fullpage is not passed - self.ui:handleEvent(Event:new("LookupWikipedia", word, self.word_box and self.word_box or false, get_fullpage)) + -- (With Event, we need to pass false instead of nil if word_box is nil, + -- otherwise next arguments are discarded) + self.ui:handleEvent(Event:new("LookupWikipedia", word, is_sane, self.word_box and self.word_box or false, get_fullpage)) end return DictQuickLookup