mirror of
https://github.com/koreader/koreader.git
synced 2025-08-10 00:52:38 +00:00
Wikipedia: show images and allow interrupting queries
Reword some english messages
This commit is contained in:
@@ -6,6 +6,7 @@ local KeyValuePage = require("ui/widget/keyvaluepage")
|
||||
local LuaData = require("luadata")
|
||||
local NetworkMgr = require("ui/network/manager")
|
||||
local ReaderDictionary = require("apps/reader/modules/readerdictionary")
|
||||
local Trapper = require("ui/trapper")
|
||||
local Translator = require("ui/translator")
|
||||
local UIManager = require("ui/uimanager")
|
||||
local Wikipedia = require("ui/wikipedia")
|
||||
@@ -21,7 +22,6 @@ local ReaderWikipedia = ReaderDictionary:extend{
|
||||
-- identify itself
|
||||
is_wiki = true,
|
||||
wiki_languages = {},
|
||||
no_page = _("No wiki page found."),
|
||||
disable_history = G_reader_settings:isTrue("wikipedia_disable_history"),
|
||||
}
|
||||
|
||||
@@ -271,7 +271,29 @@ function ReaderWikipedia:addToMainMenu(menu_items)
|
||||
end,
|
||||
})
|
||||
end,
|
||||
}
|
||||
separator = true,
|
||||
},
|
||||
{ -- setting used in wikipedia.lua
|
||||
text = _("Show image in search results"),
|
||||
checked_func = function()
|
||||
return G_reader_settings:nilOrTrue("wikipedia_show_image")
|
||||
end,
|
||||
callback = function()
|
||||
G_reader_settings:flipNilOrTrue("wikipedia_show_image")
|
||||
end,
|
||||
},
|
||||
{ -- setting used in wikipedia.lua
|
||||
text = _("Show more images in full article"),
|
||||
enabled_func = function()
|
||||
return G_reader_settings:nilOrTrue("wikipedia_show_image")
|
||||
end,
|
||||
checked_func = function()
|
||||
return G_reader_settings:nilOrTrue("wikipedia_show_more_images") and G_reader_settings:nilOrTrue("wikipedia_show_image")
|
||||
end,
|
||||
callback = function()
|
||||
G_reader_settings:flipNilOrTrue("wikipedia_show_more_images")
|
||||
end,
|
||||
},
|
||||
}
|
||||
}
|
||||
end
|
||||
@@ -319,6 +341,14 @@ function ReaderWikipedia:initLanguages(word)
|
||||
end
|
||||
|
||||
function ReaderWikipedia:onLookupWikipedia(word, box, get_fullpage, forced_lang)
|
||||
-- Wrapped through Trapper, as we may be using Trapper:dismissableRunInSubprocess() in it
|
||||
Trapper:wrap(function()
|
||||
self:lookupWikipedia(word, box, get_fullpage, forced_lang)
|
||||
end)
|
||||
return true
|
||||
end
|
||||
|
||||
function ReaderWikipedia:lookupWikipedia(word, box, get_fullpage, forced_lang)
|
||||
if not NetworkMgr:isOnline() then
|
||||
NetworkMgr:promptWifiOn()
|
||||
return
|
||||
@@ -358,19 +388,35 @@ function ReaderWikipedia:onLookupWikipedia(word, box, get_fullpage, forced_lang)
|
||||
})
|
||||
end
|
||||
|
||||
-- Fix lookup message to include lang
|
||||
-- Fix lookup message to include lang and set appropriate error texts
|
||||
local no_result_text, req_failure_text
|
||||
if get_fullpage then
|
||||
self.lookup_msg = T(_("Getting Wikipedia %2 page:\n%1"), "%1", lang:upper())
|
||||
self.lookup_msg = T(_("Retrieving Wikipedia %2 article:\n%1"), "%1", lang:upper())
|
||||
req_failure_text = _("Failed to retrieve Wikipedia article.")
|
||||
no_result_text = _("Wikipedia article not found.")
|
||||
else
|
||||
self.lookup_msg = T(_("Searching Wikipedia %2 for:\n%1"), "%1", lang:upper())
|
||||
req_failure_text = _("Failed searching Wikipedia.")
|
||||
no_result_text = _("No Wikipedia articles matching search term.")
|
||||
end
|
||||
self:showLookupInfo(display_word)
|
||||
|
||||
local results = {}
|
||||
local ok, pages
|
||||
local lookup_cancelled = false
|
||||
Wikipedia:setTrapWidget(self.lookup_progress_msg)
|
||||
if get_fullpage then
|
||||
ok, pages = pcall(Wikipedia.wikifull, Wikipedia, word, lang)
|
||||
ok, pages = pcall(Wikipedia.getFullPage, Wikipedia, word, lang)
|
||||
else
|
||||
ok, pages = pcall(Wikipedia.wikintro, Wikipedia, word, lang)
|
||||
ok, pages = pcall(Wikipedia.searchAndGetIntros, Wikipedia, word, lang)
|
||||
end
|
||||
Wikipedia:resetTrapWidget()
|
||||
if not ok and pages and string.find(pages, Wikipedia.dismissed_error_code) then
|
||||
-- So we can display an alternate dummy result
|
||||
lookup_cancelled = true
|
||||
-- Or we could just not show anything with:
|
||||
-- self:dismissLookupInfo()
|
||||
-- return
|
||||
end
|
||||
if ok and pages then
|
||||
-- sort pages according to 'index' attribute if present (not present
|
||||
@@ -387,14 +433,14 @@ function ReaderWikipedia:onLookupWikipedia(word, box, get_fullpage, forced_lang)
|
||||
pages = sorted_pages
|
||||
end
|
||||
for pageid, page in pairs(pages) do
|
||||
local definition = page.extract or self.no_page
|
||||
local definition = page.extract or no_result_text
|
||||
if page.length then
|
||||
-- we get 'length' only for intro results
|
||||
-- let's append it to definition so we know
|
||||
-- how big/valuable the full page is
|
||||
local fullkb = math.ceil(page.length/1024)
|
||||
local more_factor = math.ceil( page.length / (1+definition:len()) ) -- +1 just in case len()=0
|
||||
definition = definition .. "\n" .. T(_("(full page : %1 kB, = %2 x this intro length)"), fullkb, more_factor)
|
||||
definition = definition .. "\n" .. T(_("(full article : %1 kB, = %2 x this intro length)"), fullkb, more_factor)
|
||||
end
|
||||
local result = {
|
||||
dict = T(_("Wikipedia %1"), lang:upper()),
|
||||
@@ -402,18 +448,27 @@ function ReaderWikipedia:onLookupWikipedia(word, box, get_fullpage, forced_lang)
|
||||
definition = definition,
|
||||
is_fullpage = get_fullpage,
|
||||
lang = lang,
|
||||
images = page.images,
|
||||
}
|
||||
table.insert(results, result)
|
||||
end
|
||||
-- logger.dbg of results will be done by ReaderDictionary:showDict()
|
||||
else
|
||||
logger.dbg("error:", pages)
|
||||
-- dummy results
|
||||
local definition
|
||||
if lookup_cancelled then
|
||||
definition = _("Wikipedia request canceled.")
|
||||
elseif ok then
|
||||
definition = no_result_text
|
||||
else
|
||||
definition = req_failure_text
|
||||
logger.dbg("error:", pages)
|
||||
end
|
||||
results = {
|
||||
{
|
||||
dict = T(_("Wikipedia %1"), lang:upper()),
|
||||
word = word,
|
||||
definition = self.no_page,
|
||||
definition = definition,
|
||||
is_fullpage = get_fullpage,
|
||||
lang = lang,
|
||||
}
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
local JSON = require("json")
|
||||
local Screen = require("device").screen
|
||||
local ffiutil = require("ffi/util")
|
||||
local logger = require("logger")
|
||||
local util = require("ffi/util")
|
||||
local util = require("util")
|
||||
local _ = require("gettext")
|
||||
local T = require("ffi/util").template
|
||||
local T = ffiutil.template
|
||||
|
||||
--[[
|
||||
-- Query wikipedia using Wikimedia Web API.
|
||||
@@ -18,18 +20,9 @@ local T = require("ffi/util").template
|
||||
local Wikipedia = {
|
||||
wiki_server = "https://%s.wikipedia.org",
|
||||
wiki_path = "/w/api.php",
|
||||
wiki_params = {
|
||||
action = "query",
|
||||
prop = "extracts",
|
||||
format = "json",
|
||||
-- exintro = nil, -- get more than only the intro
|
||||
explaintext = "",
|
||||
redirects = "",
|
||||
-- title = nil, -- text to lookup, will be added below
|
||||
},
|
||||
default_lang = "en",
|
||||
-- Search query for better results
|
||||
-- see https://www.mediawiki.org/wiki/API:Main_page
|
||||
-- See https://www.mediawiki.org/wiki/API:Main_page for details.
|
||||
-- Search query, returns introductory texts (+ main thumbnail image)
|
||||
wiki_search_params = {
|
||||
action = "query",
|
||||
generator = "search",
|
||||
@@ -37,7 +30,7 @@ local Wikipedia = {
|
||||
-- gsrsearch = nil, -- text to lookup, will be added below
|
||||
gsrlimit = 20, -- max nb of results to get
|
||||
exlimit = "max",
|
||||
prop = "extracts|info", -- 'extracts' to get text, 'info' to get full page length
|
||||
prop = "extracts|info|pageimages", -- 'extracts' to get text, 'info' to get full page length
|
||||
format = "json",
|
||||
explaintext = "",
|
||||
exintro = "",
|
||||
@@ -45,6 +38,17 @@ local Wikipedia = {
|
||||
-- (otherwise, we get the full text for only the first result, and
|
||||
-- no text at all for the others
|
||||
},
|
||||
-- Full article, parsed to output text (+ main thumbnail image)
|
||||
wiki_full_params = {
|
||||
action = "query",
|
||||
prop = "extracts|pageimages",
|
||||
format = "json",
|
||||
-- exintro = nil, -- get more than only the intro
|
||||
explaintext = "",
|
||||
redirects = "",
|
||||
-- title = nil, -- text to lookup, will be added below
|
||||
},
|
||||
-- Full article, parsed to output HTML, for Save as EPUB
|
||||
wiki_phtml_params = {
|
||||
action = "parse",
|
||||
format = "json",
|
||||
@@ -55,32 +59,152 @@ local Wikipedia = {
|
||||
disablelimitreport = "",
|
||||
disableeditsection = "",
|
||||
},
|
||||
-- allow for disabling prettifying full page text
|
||||
-- Full article, parsed to output HTML, for images extraction
|
||||
-- (used with full article as text, if "show more images" enabled)
|
||||
wiki_images_params = { -- same as previous one, with just text html
|
||||
action = "parse",
|
||||
format = "json",
|
||||
-- we only need the following informations
|
||||
prop = "text",
|
||||
-- page = nil, -- text to lookup, will be added below
|
||||
redirects = "",
|
||||
disabletoc = "", -- remove toc in html
|
||||
disablelimitreport = "",
|
||||
disableeditsection = "",
|
||||
},
|
||||
-- There is an alternative for obtaining page's images:
|
||||
-- prop=imageinfo&action=query&iiprop=url|dimensions|mime|extmetadata&generator=images&pageids=49448&iiurlwidth=100&iiextmetadatafilter=ImageDescription
|
||||
-- but it gives all images (including wikipedia icons) in any order, without
|
||||
-- any score or information that would help considering if they matter or not
|
||||
--
|
||||
|
||||
-- Allow for disabling prettifying full page text
|
||||
wiki_prettify = G_reader_settings:nilOrTrue("wikipedia_prettify"),
|
||||
|
||||
-- Can be set so HTTP requests will be done under Trapper and
|
||||
-- be interruptible
|
||||
trap_widget = nil,
|
||||
-- For actions done with Trapper:dismissable methods, we may throw
|
||||
-- and error() with this code. We make the value of this error
|
||||
-- accessible here so that caller can know it's a user dismiss.
|
||||
dismissed_error_code = "Interrupted by user",
|
||||
}
|
||||
|
||||
function Wikipedia:getWikiServer(lang)
|
||||
return string.format(self.wiki_server, lang or self.default_lang)
|
||||
end
|
||||
|
||||
-- Codes that getUrlContent may get from requester.request()
|
||||
local TIMEOUT_CODE = "timeout" -- from socket.lua
|
||||
local MAXTIME_CODE = "maxtime reached" -- from sink_table_with_maxtime
|
||||
|
||||
-- Sink that stores into a table, aborting if maxtime has elapsed
|
||||
local function sink_table_with_maxtime(t, maxtime)
|
||||
-- Start counting as soon as this sink is created
|
||||
local start_secs, start_usecs = ffiutil.gettime()
|
||||
local starttime = start_secs + start_usecs/1000000
|
||||
t = t or {}
|
||||
local f = function(chunk, err)
|
||||
local secs, usecs = ffiutil.gettime()
|
||||
if secs + usecs/1000000 - starttime > maxtime then
|
||||
return nil, MAXTIME_CODE
|
||||
end
|
||||
if chunk then table.insert(t, chunk) end
|
||||
return 1
|
||||
end
|
||||
return f, t
|
||||
end
|
||||
|
||||
-- Get URL content
|
||||
local function getUrlContent(url, timeout, maxtime)
|
||||
local socket = require('socket')
|
||||
local ltn12 = require('ltn12')
|
||||
local http = require('socket.http')
|
||||
local https = require('ssl.https')
|
||||
|
||||
local requester
|
||||
if url:sub(1,7) == "http://" then
|
||||
requester = http
|
||||
elseif url:sub(1,8) == "https://" then
|
||||
requester = https
|
||||
else
|
||||
return false, "Unsupported protocol"
|
||||
end
|
||||
if not timeout then timeout = 10 end
|
||||
-- timeout needs to be set to 'http', even if we use 'https'
|
||||
http.TIMEOUT, https.TIMEOUT = timeout, timeout
|
||||
|
||||
local request = {}
|
||||
local sink = {}
|
||||
request['url'] = url
|
||||
request['method'] = 'GET'
|
||||
-- 'timeout' delay works on socket, and is triggered when
|
||||
-- that time has passed trying to connect, or after connection
|
||||
-- when no data has been read for this time.
|
||||
-- On a slow connection, it may not be triggered (as we could read
|
||||
-- 1 byte every 1 second, not triggering any timeout).
|
||||
-- 'maxtime' can be provided to overcome that, and we start counting
|
||||
-- as soon as the first content byte is received (but it is checked
|
||||
-- for only when data is received).
|
||||
-- Setting 'maxtime' and 'timeout' gives more chance to abort the request when
|
||||
-- it takes too much time (in the worst case: in timeout+maxtime seconds).
|
||||
-- But time taken by DNS lookup cannot easily be accounted for, so
|
||||
-- a request may (when dns lookup takes time) exceed timeout and maxtime...
|
||||
if maxtime then
|
||||
request['sink'] = sink_table_with_maxtime(sink, maxtime)
|
||||
else
|
||||
request['sink'] = ltn12.sink.table(sink)
|
||||
end
|
||||
|
||||
local code, headers, status = socket.skip(1, requester.request(request))
|
||||
local content = table.concat(sink) -- empty or content accumulated till now
|
||||
-- logger.dbg("code:", code)
|
||||
-- logger.dbg("headers:", headers)
|
||||
-- logger.dbg("status:", status)
|
||||
-- logger.dbg("#content:", #content)
|
||||
|
||||
if code == TIMEOUT_CODE or code == MAXTIME_CODE then
|
||||
logger.warn("request interrupted:", code)
|
||||
return false, code
|
||||
end
|
||||
if headers == nil then
|
||||
logger.warn("No HTTP headers:", code, status)
|
||||
return false, "Network or remote server unavailable"
|
||||
end
|
||||
if not code or string.sub(code, 1, 1) ~= "2" then -- all 200..299 HTTP codes are OK
|
||||
logger.warn("HTTP status not okay:", code, status)
|
||||
return false, "Remote server error or unavailable"
|
||||
end
|
||||
if headers and headers["content-length"] then
|
||||
-- Check we really got the announced content size
|
||||
local content_length = tonumber(headers["content-length"])
|
||||
if #content ~= content_length then
|
||||
return false, "Incomplete content received"
|
||||
end
|
||||
end
|
||||
return true, content
|
||||
end
|
||||
|
||||
function Wikipedia:setTrapWidget(trap_widget)
|
||||
self.trap_widget = trap_widget
|
||||
end
|
||||
|
||||
function Wikipedia:resetTrapWidget()
|
||||
self.trap_widget = nil
|
||||
end
|
||||
|
||||
-- Possible values for page_type parameter to loadPage()
|
||||
local WIKIPEDIA_INTRO = 1
|
||||
local WIKIPEDIA_FULL = 2
|
||||
local WIKIPEDIA_PHTML = 3
|
||||
local WIKIPEDIA_IMAGES = 4
|
||||
|
||||
--[[
|
||||
-- return decoded JSON table from Wikipedia
|
||||
--]]
|
||||
function Wikipedia:loadPage(text, lang, page_type, plain)
|
||||
local socket = require('socket')
|
||||
local url = require('socket.url')
|
||||
local http = require('socket.http')
|
||||
local https = require('ssl.https')
|
||||
local ltn12 = require('ltn12')
|
||||
|
||||
local request, sink = {}, {}
|
||||
local query = ""
|
||||
|
||||
local parsed = url.parse(self:getWikiServer(lang))
|
||||
parsed.path = self.wiki_path
|
||||
if page_type == WIKIPEDIA_INTRO then -- search query
|
||||
@@ -90,8 +214,8 @@ function Wikipedia:loadPage(text, lang, page_type, plain)
|
||||
end
|
||||
parsed.query = query .. "gsrsearch=" .. url.escape(text)
|
||||
elseif page_type == WIKIPEDIA_FULL then -- full page content
|
||||
self.wiki_params.explaintext = plain and "" or nil
|
||||
for k,v in pairs(self.wiki_params) do
|
||||
self.wiki_full_params.explaintext = plain and "" or nil
|
||||
for k,v in pairs(self.wiki_full_params) do
|
||||
query = string.format("%s%s=%s&", query, k, v)
|
||||
end
|
||||
parsed.query = query .. "titles=" .. url.escape(text)
|
||||
@@ -100,66 +224,90 @@ function Wikipedia:loadPage(text, lang, page_type, plain)
|
||||
query = string.format("%s%s=%s&", query, k, v)
|
||||
end
|
||||
parsed.query = query .. "page=" .. url.escape(text)
|
||||
elseif page_type == WIKIPEDIA_IMAGES then -- images found in page html
|
||||
for k,v in pairs(self.wiki_images_params) do
|
||||
query = string.format("%s%s=%s&", query, k, v)
|
||||
end
|
||||
parsed.query = query .. "page=" .. url.escape(text)
|
||||
else
|
||||
return
|
||||
end
|
||||
|
||||
-- HTTP request
|
||||
request['url'] = url.build(parsed)
|
||||
request['method'] = 'GET'
|
||||
request['sink'] = ltn12.sink.table(sink)
|
||||
http.TIMEOUT, https.TIMEOUT = 10, 10
|
||||
local httpRequest = parsed.scheme == 'http' and http.request or https.request
|
||||
-- first argument returned by skip is code
|
||||
local _, headers, status = socket.skip(1, httpRequest(request))
|
||||
|
||||
-- raise error message when network is unavailable
|
||||
if headers == nil then
|
||||
error("Network is unreachable")
|
||||
local built_url = url.build(parsed)
|
||||
local completed, success, content
|
||||
if self.trap_widget then -- if previously set with Wikipedia:setTrapWidget()
|
||||
local Trapper = require("ui/trapper")
|
||||
local timeout, maxtime = 30, 60
|
||||
-- We use dismissableRunInSubprocess with complex return values:
|
||||
completed, success, content = Trapper:dismissableRunInSubprocess(function()
|
||||
return getUrlContent(built_url, timeout, maxtime)
|
||||
end, self.trap_widget)
|
||||
if not completed then
|
||||
error(self.dismissed_error_code) -- "Interrupted by user"
|
||||
end
|
||||
else
|
||||
local timeout, maxtime = 10, 60
|
||||
success, content = getUrlContent(built_url, timeout, maxtime)
|
||||
end
|
||||
if not success then
|
||||
error(content)
|
||||
end
|
||||
|
||||
if status ~= "HTTP/1.1 200 OK" then
|
||||
logger.warn("HTTP status not okay:", status)
|
||||
return
|
||||
end
|
||||
|
||||
local content = table.concat(sink)
|
||||
if content ~= "" and string.sub(content, 1,1) == "{" then
|
||||
local ok, result = pcall(JSON.decode, content)
|
||||
if ok and result then
|
||||
logger.dbg("wiki result", result)
|
||||
logger.dbg("wiki result json:", result)
|
||||
return result
|
||||
else
|
||||
logger.warn("wiki error:", result)
|
||||
logger.warn("wiki result json decoding error:", result)
|
||||
error("Failed decoding JSON")
|
||||
end
|
||||
else
|
||||
logger.warn("not JSON from wiki response:", content)
|
||||
logger.warn("wiki response is not json:", content)
|
||||
error("Response is not JSON")
|
||||
end
|
||||
end
|
||||
|
||||
-- search wikipedia and get intros for results
|
||||
function Wikipedia:wikintro(text, lang)
|
||||
function Wikipedia:searchAndGetIntros(text, lang)
|
||||
local result = self:loadPage(text, lang, WIKIPEDIA_INTRO, true)
|
||||
if result then
|
||||
local query = result.query
|
||||
if query then
|
||||
local show_image = G_reader_settings:nilOrTrue("wikipedia_show_image")
|
||||
-- Scale wikipedia normalized (we hope) thumbnail by 2 (adjusted
|
||||
-- to screen size/dpi) for intros (and x8 more for highres image)
|
||||
local image_size_factor = Screen:scaleBySize(200)/100.0
|
||||
if show_image then
|
||||
for pageid, page in pairs(query.pages) do
|
||||
self:addImages(page, lang, false, image_size_factor, 8)
|
||||
end
|
||||
end
|
||||
return query.pages
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
-- get full content of a wiki page
|
||||
function Wikipedia:wikifull(text, lang)
|
||||
local result = self:loadPage(text, lang, WIKIPEDIA_FULL, true)
|
||||
function Wikipedia:getFullPage(wiki_title, lang)
|
||||
local result = self:loadPage(wiki_title, lang, WIKIPEDIA_FULL, true)
|
||||
if result then
|
||||
local query = result.query
|
||||
if query then
|
||||
if self.wiki_prettify then
|
||||
-- Prettification of the plain text full page
|
||||
local show_image = G_reader_settings:nilOrTrue("wikipedia_show_image")
|
||||
local show_more_images = G_reader_settings:nilOrTrue("wikipedia_show_more_images")
|
||||
-- Scale wikipedia normalized (we hope) thumbnails by 4 (adjusted
|
||||
-- to screen size/dpi) for full page (and this *4 for highres image)
|
||||
local image_size_factor = Screen:scaleBySize(400)/100.0
|
||||
if self.wiki_prettify or show_image then
|
||||
for pageid, page in pairs(query.pages) do
|
||||
if page.extract then
|
||||
if self.wiki_prettify and page.extract then
|
||||
-- Prettification of the plain text full page
|
||||
page.extract = self:prettifyText(page.extract)
|
||||
end
|
||||
if show_image then
|
||||
self:addImages(page, lang, show_more_images, image_size_factor, 4)
|
||||
end
|
||||
end
|
||||
end
|
||||
return query.pages
|
||||
@@ -168,8 +316,8 @@ function Wikipedia:wikifull(text, lang)
|
||||
end
|
||||
|
||||
-- get parsed html content and other infos of a wiki page
|
||||
function Wikipedia:wikiphtml(text, lang)
|
||||
local result = self:loadPage(text, lang, WIKIPEDIA_PHTML, true)
|
||||
function Wikipedia:getFullPageHtml(wiki_title, lang)
|
||||
local result = self:loadPage(wiki_title, lang, WIKIPEDIA_PHTML, true)
|
||||
if result and result.parse then
|
||||
return result.parse
|
||||
end
|
||||
@@ -178,6 +326,247 @@ function Wikipedia:wikiphtml(text, lang)
|
||||
end
|
||||
end
|
||||
|
||||
-- get images extracted from parsed html
|
||||
function Wikipedia:getFullPageImages(wiki_title, lang)
|
||||
local images = {} -- will be returned, each in a format similar to page.thumbnail
|
||||
local result = self:loadPage(wiki_title, lang, WIKIPEDIA_IMAGES, true)
|
||||
if result and result.parse and result.parse.text and result.parse.text["*"] then
|
||||
local html = result.parse.text["*"] -- html content
|
||||
local url = require('socket.url')
|
||||
local wiki_base_url = self:getWikiServer(lang)
|
||||
|
||||
local thumbs = {} -- bits of HTML containing an image
|
||||
-- We first try to catch images in <div class=thumbinner>, which should exclude
|
||||
-- wikipedia icons, flags... These seem to all end with a double </div>.
|
||||
for thtml in html:gmatch([[<div class="thumbinner".-</div>%s*</div>]]) do
|
||||
table.insert(thumbs, thtml)
|
||||
end
|
||||
-- We then also try to catch images in galleries (which often are less
|
||||
-- interesting than those in thumbinner) as a 2nd set.
|
||||
for thtml in html:gmatch([[<li class="gallerybox".-<div class="thumb".-</div>%s*</div>%s*<div class="gallerytext">.-</div>%s*</div>]]) do
|
||||
table.insert(thumbs, thtml)
|
||||
end
|
||||
-- We may miss some interesting images in the page's top right table, but
|
||||
-- there's no easy way to distinguish them from icons/flags in this table...
|
||||
|
||||
for _, thtml in ipairs(thumbs) do
|
||||
-- We get <a href="/wiki/File:real_file_name.jpg (or /wiki/Fichier:real_file_name.jpg
|
||||
-- depending on Wikipedia lang)
|
||||
local filename = thtml:match([[<a href="/wiki/[^:]*:([^"]*)" class="image"]])
|
||||
if filename then
|
||||
filename = url.unescape(filename)
|
||||
end
|
||||
logger.dbg("found image with filename:", filename)
|
||||
-- logger.dbg(thtml)
|
||||
local timg, tremain = thtml:match([[(<img .->)(.*)]])
|
||||
if timg and tremain then
|
||||
-- (Should we discard those without caption ?)
|
||||
local caption = tremain and util.htmlToPlainText(tremain)
|
||||
if caption == "" then caption = nil end
|
||||
logger.dbg(" caption:", caption)
|
||||
-- logger.dbg(timg)
|
||||
local src = timg:match([[src="([^"]*)"]])
|
||||
if src and src ~= "" then
|
||||
if src:sub(1,2) == "//" then
|
||||
src = "https:" .. src
|
||||
elseif src:sub(1,1) == "/" then -- non absolute url
|
||||
src = wiki_base_url .. src
|
||||
end
|
||||
local width = tonumber(timg:match([[width="([^"]*)"]]))
|
||||
local height = tonumber(timg:match([[height="([^"]*)"]]))
|
||||
-- Ignore img without width and height, which should exlude
|
||||
-- javascript maps and other unsupported stuff
|
||||
if width and height then
|
||||
-- Images in the html we got seem to be x4.5 the size of
|
||||
-- the thumbnail we get with searchAndGetIntros() or
|
||||
-- getFullPage(). Normalize them to the size of the thumbnail,
|
||||
-- so we can resize them all later with the same rules.
|
||||
width = math.ceil(width/4.5)
|
||||
height = math.ceil(height/4.5)
|
||||
-- No need to adjust width in src url here, as it will be
|
||||
-- done in addImages() anyway
|
||||
-- src = src:gsub("(.*/)%d+(px-[^/]*)", "%1"..width.."%2")
|
||||
logger.dbg(" size:", width, "x", height, "url:", src)
|
||||
table.insert(images, {
|
||||
source = src,
|
||||
width = width,
|
||||
height = height,
|
||||
filename = filename,
|
||||
caption = caption,
|
||||
})
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
return images
|
||||
end
|
||||
|
||||
-- Function wrapped and plugged to image objects returned by :addImages()
|
||||
local function image_load_bb_func(image, highres)
|
||||
local source, trap_widget
|
||||
if not highres then
|
||||
-- We use an invisible widget that will resend the dismiss event,
|
||||
-- so that image loading in TextBoxWdiget is unobtrusive and
|
||||
-- interruptible
|
||||
trap_widget = false
|
||||
source = image.source
|
||||
else
|
||||
-- We need to let the user know image loading is happening,
|
||||
-- with a discreet TrapWidget
|
||||
trap_widget = _("Loading high-res image… (tap to cancel)")
|
||||
source = image.hi_source
|
||||
end
|
||||
-- Image may be big or take some time to be resized on wikipedia servers.
|
||||
-- As we use dismissableRunInSubprocess and can interrupt this loading,
|
||||
-- we can use quite high timeouts
|
||||
local timeout, maxtime = 60, 120
|
||||
|
||||
logger.dbg("fetching", source)
|
||||
local Trapper = require("ui/trapper")
|
||||
-- We use dismissableRunInSubprocess with simple string return value to
|
||||
-- avoid dump()/load() a long string of image bytes
|
||||
local completed, data = Trapper:dismissableRunInSubprocess(function()
|
||||
local success, data = getUrlContent(source, timeout, maxtime)
|
||||
-- With simple string value, we're not able to return the failure
|
||||
-- reason, so log it here
|
||||
if not success then
|
||||
logger.warn("failed fetching image from", source, ":", data)
|
||||
end
|
||||
return success and data or nil
|
||||
end, trap_widget, true) -- task_returns_simple_string=true
|
||||
|
||||
local success = data and true or false -- guess success from data
|
||||
|
||||
if not completed then
|
||||
logger.dbg("image fetching interrupted by user")
|
||||
return true -- let caller know it was interrupted
|
||||
end
|
||||
if not success then
|
||||
-- log it again (on Android, log from sub-process seem to not work)
|
||||
logger.warn("failed fetching image from", source)
|
||||
return
|
||||
end
|
||||
logger.dbg(" fetched", #data)
|
||||
|
||||
-- Use mupdf to render image to blitbuffer
|
||||
local mupdf = require("ffi/mupdf")
|
||||
local ok, bb_or_error
|
||||
if not highres then
|
||||
-- For low-res, we should ensure the image we got from wikipedia is
|
||||
-- the right size, so it does not overflow our reserved area
|
||||
-- (TextBoxWidget may have adjusted image.width and height)
|
||||
ok, bb_or_error = pcall(mupdf.renderImage, data, #data, image.width, image.height)
|
||||
else
|
||||
-- No need for width and height for high-res
|
||||
ok, bb_or_error = pcall(mupdf.renderImage, data, #data)
|
||||
end
|
||||
if not ok then
|
||||
logger.warn("failed building image from", source, ":", bb_or_error)
|
||||
return
|
||||
end
|
||||
if not highres then
|
||||
image.bb = bb_or_error
|
||||
else
|
||||
image.hi_bb = bb_or_error
|
||||
end
|
||||
end
|
||||
|
||||
function Wikipedia:addImages(page, lang, more_images, image_size_factor, hi_image_size_factor)
|
||||
-- List of images, table with keys as expected by TextBoxWidget
|
||||
page.images = {}
|
||||
-- List of wikipedia images data structures (page.thumbnail and images
|
||||
-- extracted from html) made to have the same keys for common processing
|
||||
local wimages = {}
|
||||
|
||||
-- We got what Wikipedia scored as the most interesting image for this
|
||||
-- page in page.thumbnail, and its filename in page.pageimage, ie:
|
||||
-- "thumbnail": {
|
||||
-- "source": "https://upload.wikimedia.org/wikipedia/commons/thumb/4/45/Reading_on_the_bus_train_or_transit.jpg/37px-Reading_on_the_bus_train_or_transit.jpg",
|
||||
-- "width": 37,
|
||||
-- "height": 50
|
||||
-- },
|
||||
-- "pageimage": "Reading_on_the_bus_train_or_transit.jpg"
|
||||
--
|
||||
local first_image_filename = nil
|
||||
if page.thumbnail and page.thumbnail.source then
|
||||
page.thumbnail.filename = page.pageimage
|
||||
first_image_filename = page.pageimage
|
||||
table.insert(wimages, page.thumbnail)
|
||||
end
|
||||
-- To get more images, we need to make a second request to wikipedia
|
||||
if more_images then
|
||||
local ok, images_or_err = pcall(Wikipedia.getFullPageImages, Wikipedia, page.title, lang)
|
||||
if not ok then
|
||||
logger.warn("error getting more images", images_or_err)
|
||||
else
|
||||
for _, wimage in ipairs(images_or_err) do
|
||||
if first_image_filename and wimage.filename == first_image_filename then
|
||||
-- We got the same image as the thumbnail one, but it may have
|
||||
-- a caption: replace thumbnail one with this one
|
||||
table.remove(wimages, 1)
|
||||
table.insert(wimages, 1, wimage)
|
||||
else
|
||||
table.insert(wimages, wimage)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
-- All our wimages now have the keys: source, width, height, filename, caption
|
||||
for _, wimage in ipairs(wimages) do
|
||||
-- We trust wikipedia, and our x4.5 factor in :getFullPageImages(), for adequate
|
||||
-- and homogeneous images' sizes. We'll just scale them according to the
|
||||
-- provided 'image_size_factor' (which should account for screen size/DPI)
|
||||
local width = wimage.width or 100 -- in case we don't get any width or height
|
||||
local height = wimage.height or 100
|
||||
-- Give a little boost in size to thin images
|
||||
if width < height / 2 or height < width / 2 then
|
||||
width = width * 1.3
|
||||
height = height * 1.3
|
||||
end
|
||||
width = math.ceil(width * image_size_factor)
|
||||
height = math.ceil(height * image_size_factor)
|
||||
-- All wikipedia image urls like .../wikipedia/commons/A/BC/<filename>
|
||||
-- or .../wikipedia/commons/thumb/A/BC/<filename>/<width>px-<filename>
|
||||
-- can be transformed to another url with a requested new_width with the form:
|
||||
-- /wikipedia/commons/thumb/A/BC/<filename>/<new_width>px-<filename>
|
||||
-- (Additionally, the image format can be changed by appending .png,
|
||||
-- .jpg or .gif to it)
|
||||
-- The resize is so done on Wikipedia servers from the source image for
|
||||
-- the best quality.
|
||||
local source = wimage.source:gsub("(.*/)%d+(px-[^/]*)", "%1"..width.."%2")
|
||||
-- We build values for a high resolution version of the image, to be displayed
|
||||
-- with ImageViewer (x 4 by default)
|
||||
local hi_width = width * (hi_image_size_factor or 4)
|
||||
local hi_height = height * (hi_image_size_factor or 4)
|
||||
local hi_source = wimage.source:gsub("(.*/)%d+(px-[^/]*)", "%1"..hi_width.."%2")
|
||||
local title = wimage.filename
|
||||
if title then
|
||||
title = title:gsub("_", " ")
|
||||
end
|
||||
local image = {
|
||||
-- As expected by TextBoxWidget (with additional source and
|
||||
-- hi_source, that will be used by load_bb_func)
|
||||
title = title,
|
||||
caption = wimage.caption,
|
||||
source = source,
|
||||
width = width,
|
||||
height = height,
|
||||
bb = nil, -- will be loaded and build only if needed
|
||||
hi_source = hi_source,
|
||||
hi_width = hi_width,
|
||||
hi_height = hi_height,
|
||||
hi_bb = nil, -- will be loaded and build only if needed
|
||||
}
|
||||
-- If bb or hi_bb is nil, TextBoxWidget will call a method named "load_bb_func"
|
||||
image.load_bb_func = function(highres)
|
||||
return image_load_bb_func(image, highres)
|
||||
end
|
||||
table.insert(page.images, image)
|
||||
end
|
||||
end
|
||||
|
||||
-- UTF8 of unicode geometrical shapes we can use to replace
|
||||
-- the "=== title ===" of wkipedia plaintext pages
|
||||
-- These chosen ones are available in most fonts (prettier symbols
|
||||
@@ -218,38 +607,6 @@ function Wikipedia:prettifyText(text)
|
||||
end
|
||||
|
||||
|
||||
local function getUrlContent(url, timeout)
|
||||
local socket = require('socket')
|
||||
local ltn12 = require('ltn12')
|
||||
local requester
|
||||
if url:sub(1,7) == "http://" then
|
||||
requester = require('socket.http')
|
||||
elseif url:sub(1,8) == "https://" then
|
||||
requester = require('ssl.https')
|
||||
else
|
||||
return false, "Unsupported protocol"
|
||||
end
|
||||
requester.TIMEOUT = timeout or 10
|
||||
local request = {}
|
||||
local sink = {}
|
||||
request['url'] = url
|
||||
request['method'] = 'GET'
|
||||
request['sink'] = ltn12.sink.table(sink)
|
||||
-- first argument returned by skip is code
|
||||
local _, headers, status = socket.skip(1, requester.request(request))
|
||||
|
||||
if headers == nil then
|
||||
logger.warn("No HTTP headers")
|
||||
return false, "Network unavailable"
|
||||
end
|
||||
if status ~= "HTTP/1.1 200 OK" then
|
||||
logger.warn("HTTP status not okay:", status)
|
||||
return false, "Network unavailable"
|
||||
end
|
||||
|
||||
return true, table.concat(sink)
|
||||
end
|
||||
|
||||
-- UTF8 of unicode geometrical shapes we'll prepend to wikipedia section headers,
|
||||
-- to help identifying hierarchy (othewise, the small font size differences helps).
|
||||
-- Best if identical to the ones used above for prettifying full plain text page.
|
||||
@@ -292,12 +649,12 @@ function Wikipedia:createEpub(epub_path, page, lang, with_images)
|
||||
-- Trapper:info() and Trapper:confirm() will just use logger.
|
||||
local UI = require("ui/trapper")
|
||||
|
||||
UI:info(_("Fetching Wikipedia page…"))
|
||||
local ok, phtml = pcall(self.wikiphtml, self, page, lang)
|
||||
UI:info(_("Retrieving Wikipedia article…"))
|
||||
local ok, phtml = pcall(self.getFullPageHtml, self, page, lang)
|
||||
if not ok then
|
||||
UI:info(phtml) -- display error in InfoMessage
|
||||
-- Sleep a bit to make that error seen
|
||||
util.sleep(2)
|
||||
ffiutil.sleep(2)
|
||||
UI:reset()
|
||||
return false
|
||||
end
|
||||
@@ -403,13 +760,13 @@ function Wikipedia:createEpub(epub_path, page, lang, with_images)
|
||||
if with_images then
|
||||
-- If no UI (Trapper:wrap() not called), UI:confirm() will answer true
|
||||
if #images > 0 then
|
||||
include_images = UI:confirm(T(_("The page contains %1 images.\nWould you like to download and include them in the generated EPUB file?"), #images), _("Don't include"), _("Include"))
|
||||
include_images = UI:confirm(T(_("This article contains %1 images.\nWould you like to download and include them in the generated EPUB file?"), #images), _("Don't include"), _("Include"))
|
||||
if include_images then
|
||||
use_img_2x = UI:confirm(_("Would you like to use slightly higher quality images? This will result in a bigger file size."), _("Standard quality"), _("Higher quality"))
|
||||
end
|
||||
else
|
||||
UI:info(_("The page does not contain any images."))
|
||||
util.sleep(1) -- Let the user see that
|
||||
UI:info(_("This article does not contain any images."))
|
||||
ffiutil.sleep(1) -- Let the user see that
|
||||
end
|
||||
end
|
||||
if not include_images then
|
||||
@@ -568,6 +925,10 @@ div.thumb {
|
||||
ul, ol {
|
||||
margin-left: 0em;
|
||||
}
|
||||
/* avoid a line with a standalone bullet */
|
||||
li.gallerybox {
|
||||
display: inline;
|
||||
}
|
||||
/* helps crengine to not display them as block elements */
|
||||
time, abbr, sup {
|
||||
display: inline;
|
||||
@@ -655,6 +1016,10 @@ time, abbr, sup {
|
||||
-- external link for us, so let's remove this link.
|
||||
html = html:gsub("<a[^>]*>%s*(<%s*img [^>]*>)%s*</a>", "%1")
|
||||
|
||||
-- TODO: do something for <li class="gallerybox"...> so they are no more
|
||||
-- a <li> (crengine displays them one above the other) and can be displayed
|
||||
-- side by side
|
||||
|
||||
-- For some <div class="thumb tright"> , which include nested divs, although
|
||||
-- perfectly balanced, crengine seems to miss some closing </div> and we
|
||||
-- end up having our image bordered box including the remaining main wiki text.
|
||||
@@ -771,7 +1136,7 @@ time, abbr, sup {
|
||||
-- Process can be interrupted at this point between each image download
|
||||
-- by tapping while the InfoMessage is displayed
|
||||
-- We use the fast_refresh option from image #2 for a quicker download
|
||||
local go_on = UI:info(T(_("Fetching image %1 / %2 …"), inum, nb_images), inum >= 2)
|
||||
local go_on = UI:info(T(_("Retrieving image %1 / %2 …"), inum, nb_images), inum >= 2)
|
||||
if not go_on then
|
||||
cancelled = true
|
||||
break
|
||||
@@ -813,7 +1178,7 @@ time, abbr, sup {
|
||||
end
|
||||
epub:close()
|
||||
-- This was nearly a no-op, so sleep a bit to make that progress step seen
|
||||
util.usleep(300000)
|
||||
ffiutil.usleep(300000)
|
||||
UI:reset() -- close last InfoMessage
|
||||
|
||||
if cancelled then
|
||||
|
||||
Reference in New Issue
Block a user