Dictionary and wikipedia enhancements (#2393)

Stardict:
- remove duplicate results
- better cleaning of selection
- append results from a 2nd query of a 2nd set of dictionaries
  in data/dict_ext/

Wikipedia:
- use the search API for better results
- allow viewing the full page content of a result in a bigger window
- allow queries for multiple languages
- available languages can be set in settings.reader.lua :
    ["wikipedia_languages"] = {"en", "fr", "it"}
- "Wikipedia lookup" added to Tools menu

For both:
- allow selection of multiple words for a new lookup (so one can
  actually browse wikipedia)
- allow continuous reading with Tap
- display "current result / total number of results"

Details in #2393
This commit is contained in:
poire-z
2016-12-06 22:15:52 +01:00
committed by Qingping Hou
parent 5040bfe4c5
commit 1708fd5e1c
5 changed files with 476 additions and 108 deletions

View File

@@ -7,12 +7,14 @@ local Screen = require("device").screen
local Device = require("device")
local JSON = require("json")
local DEBUG = require("dbg")
local util = require("util")
local _ = require("gettext")
local T = require("ffi/util").template
local ReaderDictionary = InputContainer:new{
data_dir = nil,
dict_window_list = {},
lookup_msg = _("Searching dictionary for:\n%1")
}
function ReaderDictionary:init()
@@ -40,7 +42,7 @@ function ReaderDictionary:onLookupWord(word, box, highlight)
return true
end
local function tidy_markup(results)
local function tidyMarkup(results)
local cdata_tag = "<!%[CDATA%[(.-)%]%]>"
local format_escape = "&[29Ib%+]{(.-)}"
for _, result in ipairs(results) do
@@ -64,20 +66,97 @@ local function tidy_markup(results)
return results
end
function ReaderDictionary:cleanSelection(text)
-- Will be used by ReaderWikipedia too
if not text then
return ""
end
-- We do multiple times the same replacements, which is most of the time overkill,
-- but sometimes provices better cleaning
-- Some extremes cases to explain the multiples gsub :
--
-- Sample epub html: mais, qu« absolument, on ne peut décidément pas » revenir en arrière
-- Holding on "qu" or "absolument" will make crengine returns: qu« absolument,
-- We want to only get: absolument
--
-- Sample epub html: car « létat, actuel, de notre connaissance » sy oppose
-- Holding on "état" will make crengine returns: « létat,
-- We want to get: état
--
-- Some of these gsub could be removed when crengine does a better job
-- at finding word boundaries
--
-- Strip some quotations marks
text = string.gsub(text, "\xC2\xAB", '') -- U+00AB << (left double angle quotation mark)
text = string.gsub(text, "\xC2\xBB", '') -- U+00BB >> (right double angle quotation mark)
text = string.gsub(text, "\xE2\x80\x9D", '') -- U+201D '' (right double quotation mark)
text = string.gsub(text, "\xE2\x80\x9C", '') -- U+201C `` (left double quotation mark)
text = string.gsub(text, "\xE2\x80\x94", '') -- U+2014 - (em dash)
text = string.gsub(text, "\xE2\x80\x95", '') -- U+2015 - (horizontal bar)
text = string.gsub(text, "\xC2\xA0", '') -- U+00A0 no-break space
-- Replace some meaningful quotes with ascii quote
text = string.gsub(text, "\xE2\x80\x99", "'") -- U+2019 (right single quotation mark)
-- Strip punctuation characters around selection
-- (this had to be done after the utf8 gsubs above, or it would strip part of these utf8 chars)
text = util.stripePunctuations(text)
-- Strip leading and trailing spaces
text = string.gsub(text, "^%s+", '')
text = string.gsub(text, "%s+$", '')
-- Strip some french grammatical constructs
text = string.gsub(text, "^[LSDMNTlsdmnt]'", '') -- french l' s' t'
text = string.gsub(text, "^[Qq][Uu]'", '') -- french qu'
-- Strip again leading and trailing spaces
text = string.gsub(text, "^%s+", '')
text = string.gsub(text, "%s+$", '')
return text
end
function ReaderDictionary:onLookupStarted(word)
local text = T(self.lookup_msg, word)
self.lookup_progress_msg = InfoMessage:new{text=text}
UIManager:show(self.lookup_progress_msg)
UIManager:forceRePaint()
end
function ReaderDictionary:onLookupDone()
if self.lookup_progress_msg then
UIManager:close(self.lookup_progress_msg)
UIManager:forceRePaint()
end
self.lookup_progress_msg = nil
end
function ReaderDictionary:stardictLookup(word, box)
DEBUG("lookup word:", word, box)
if word then
word = require("util").stripePunctuations(word)
DEBUG("stripped word:", word)
-- escape quotes and other funny characters in word
-- escape quotes and other funny characters in word
word = self:cleanSelection(word)
DEBUG("stripped word:", word)
if word == "" then
return
end
self:onLookupStarted(word)
local final_results = {}
local seen_results = {}
-- Allow for two sdcv calls : one in the classic data/dict, and
-- another one in data/dict_ext if it exists
-- We could put in data/dict_ext dictionaries with a great number of words
-- but poor definitions as a fall back. If these were in data/dict,
-- they would prevent fuzzy searches in other dictories with better
-- definitions, and masks such results. This way, we can get both.
local dict_dirs = {self.data_dir}
local dict_ext = self.data_dir.."_ext"
if lfs.attributes(dict_ext, "mode") == "directory" then
table.insert(dict_dirs, dict_ext)
end
for _, dict_dir in ipairs(dict_dirs) do
local results_str = nil
if Device:isAndroid() then
local A = require("android")
results_str = A.stdout("./sdcv", "--utf8-input", "--utf8-output",
"-nj", word, "--data-dir", self.data_dir)
"-nj", word, "--data-dir", dict_dir)
else
local std_out = io.popen("./sdcv --utf8-input --utf8-output -nj "
.. ("%q"):format(word) .. " --data-dir " .. self.data_dir, "r")
.. ("%q"):format(word) .. " --data-dir " .. dict_dir, "r")
if std_out then
results_str = std_out:read("*all")
std_out:close()
@@ -86,21 +165,33 @@ function ReaderDictionary:stardictLookup(word, box)
--DEBUG("result str:", word, results_str)
local ok, results = pcall(JSON.decode, results_str)
if ok and results then
--DEBUG("lookup result table:", word, results)
self:showDict(word, tidy_markup(results), box)
-- we may get duplicates (sdcv may do multiple queries,
-- in fixed mode then in fuzzy mode), we have to remove them
local h
for _,r in ipairs(results) do
h = r.dict .. r.word .. r.definition
if seen_results[h] == nil then
table.insert(final_results, r)
seen_results[h] = true
end
end
else
DEBUG("JSON data cannot be decoded", results)
-- dummy results
results = {
{
dict = "",
word = word,
definition = _("No definition found."),
}
}
self:showDict(word, results, box)
end
end
if #final_results == 0 then
-- dummy results
final_results = {
{
dict = "",
word = word,
definition = _("No definition found."),
}
}
end
self:onLookupDone()
--DEBUG("lookup result table:", word, final_results)
self:showDict(word, tidyMarkup(final_results), box)
end
function ReaderDictionary:showDict(word, results, box)
@@ -118,7 +209,8 @@ function ReaderDictionary:showDict(word, results, box)
width = Screen:getWidth() - Screen:scaleBySize(80),
word_box = box,
-- differentiate between dict and wiki
wiki = self.wiki,
is_wiki = self.is_wiki,
wiki_languages = self.wiki_languages,
}
table.insert(self.dict_window_list, self.dict_window)
UIManager:show(self.dict_window)