mirror of
https://github.com/koreader/koreader.git
synced 2025-08-10 00:52:38 +00:00
NewsDownloader: new option to allow EPUB volumization (#8263)
When this feature is enabled on a feed and that feed is synced, all new feed entries will be collected into a new single EPUB file. This is achieved by implementing a feed history feature (downloaded feeds are added as M5D hashes to a LuaSettings file), and by introducing additional methods into epubdownloader.lua that allow for multiple HTML documents to be added into single EPUB file.
This commit is contained in:
@@ -1,15 +1,10 @@
|
||||
local NewsHelpers = require("http_utilities")
|
||||
local Version = require("version")
|
||||
local ffiutil = require("ffi/util")
|
||||
local http = require("socket.http")
|
||||
local logger = require("logger")
|
||||
local ltn12 = require("ltn12")
|
||||
local socket = require("socket")
|
||||
local socket_url = require("socket.url")
|
||||
local socketutil = require("socketutil")
|
||||
local _ = require("gettext")
|
||||
local T = ffiutil.template
|
||||
|
||||
local EpubDownloadBackend = {
|
||||
local EpubBuilder = {
|
||||
-- Can be set so HTTP requests will be done under Trapper and
|
||||
-- be interruptible
|
||||
trap_widget = nil,
|
||||
@@ -17,8 +12,89 @@ local EpubDownloadBackend = {
|
||||
-- and error() with this code. We make the value of this error
|
||||
-- accessible here so that caller can know it's a user dismiss.
|
||||
dismissed_error_code = "Interrupted by user",
|
||||
title = nil,
|
||||
ncx_toc = nil,
|
||||
ncx_manifest = nil,
|
||||
ncx_contents = nil,
|
||||
ncx_images = nil,
|
||||
}
|
||||
local max_redirects = 5; --prevent infinite redirects
|
||||
|
||||
function EpubBuilder:new(o)
|
||||
o = o or {}
|
||||
self.__index = self
|
||||
setmetatable(o, self)
|
||||
|
||||
return o
|
||||
end
|
||||
|
||||
function EpubBuilder:build(abs_output_path)
|
||||
-- Open the zip file (with .tmp for now, as crengine may still
|
||||
-- have a handle to the final epub_path, and we don't want to
|
||||
-- delete a good one if we fail/cancel later)
|
||||
local tmp_path = abs_output_path .. ".tmp"
|
||||
local ZipWriter = require("ffi/zipwriter")
|
||||
local epub = ZipWriter:new{}
|
||||
|
||||
if not epub:open(tmp_path) then
|
||||
logger.dbg("Failed to open tmp_path")
|
||||
return false
|
||||
end
|
||||
|
||||
epub:add("mimetype", "application/epub+zip")
|
||||
epub:add("META-INF/container.xml", [[
|
||||
<?xml version="1.0"?>
|
||||
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
|
||||
<rootfiles>
|
||||
<rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>
|
||||
</rootfiles>
|
||||
</container>]])
|
||||
|
||||
-- Add the manifest.
|
||||
if not self.ncx_manifest or #self.ncx_manifest == 0 then
|
||||
error("EPUB does not contain a valid manifest.")
|
||||
end
|
||||
--logger.dbg("Adding Manifest:", self.ncx_manifest)
|
||||
epub:add("OEBPS/content.opf", table.concat(self.ncx_manifest))
|
||||
|
||||
-- Add the table of contents.
|
||||
if not self.ncx_toc or #self.ncx_toc == 0 then
|
||||
error("EPUB does not contain a valid table of contents.")
|
||||
end
|
||||
--logger.dbg("Adding TOC:", self.ncx_toc)
|
||||
epub:add("OEBPS/toc.ncx", table.concat(self.ncx_toc))
|
||||
|
||||
-- Add the contents.
|
||||
if not self.ncx_contents or #self.ncx_manifest == 0 then
|
||||
error("EPUB does not contain any content.")
|
||||
end
|
||||
--logger.dbg("Adding Content:", self.ncx_contents)
|
||||
|
||||
for index, content in ipairs(self.ncx_contents) do
|
||||
epub:add("OEBPS/" .. content.filename, content.html)
|
||||
end
|
||||
|
||||
-- Add the images.
|
||||
--logger.dbg("Adding Images:", self.ncx_images)
|
||||
if self.ncx_images then
|
||||
for index, image in ipairs(self.ncx_images) do
|
||||
epub:add(
|
||||
"OEBPS/" .. image.path,
|
||||
image.content,
|
||||
image.no_compression
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
epub:close()
|
||||
os.rename(tmp_path, abs_output_path)
|
||||
|
||||
collectgarbage()
|
||||
|
||||
end
|
||||
|
||||
function EpubBuilder:release()
|
||||
-- Stub for cleanup methods
|
||||
end
|
||||
|
||||
-- filter HTML using CSS selector
|
||||
local function filter(text, element)
|
||||
@@ -68,79 +144,9 @@ local function filter(text, element)
|
||||
return "<!DOCTYPE html><html><head></head><body>" .. filtered .. "</body></html>"
|
||||
end
|
||||
|
||||
-- Get URL content
|
||||
local function getUrlContent(url, timeout, maxtime, redirectCount)
|
||||
logger.dbg("getUrlContent(", url, ",", timeout, ",", maxtime, ",", redirectCount, ")")
|
||||
if not redirectCount then
|
||||
redirectCount = 0
|
||||
elseif redirectCount == max_redirects then
|
||||
error("EpubDownloadBackend: reached max redirects: ", redirectCount)
|
||||
end
|
||||
|
||||
if not timeout then timeout = 10 end
|
||||
logger.dbg("timeout:", timeout)
|
||||
|
||||
local sink = {}
|
||||
local parsed = socket_url.parse(url)
|
||||
socketutil:set_timeout(timeout, maxtime or 30)
|
||||
local request = {
|
||||
url = url,
|
||||
method = "GET",
|
||||
sink = maxtime and socketutil.table_sink(sink) or ltn12.sink.table(sink),
|
||||
}
|
||||
logger.dbg("request:", request)
|
||||
local code, headers, status = socket.skip(1, http.request(request))
|
||||
socketutil:reset_timeout()
|
||||
logger.dbg("After http.request")
|
||||
local content = table.concat(sink) -- empty or content accumulated till now
|
||||
logger.dbg("type(code):", type(code))
|
||||
logger.dbg("code:", code)
|
||||
logger.dbg("headers:", headers)
|
||||
logger.dbg("status:", status)
|
||||
logger.dbg("#content:", #content)
|
||||
|
||||
if code == socketutil.TIMEOUT_CODE or
|
||||
code == socketutil.SSL_HANDSHAKE_CODE or
|
||||
code == socketutil.SINK_TIMEOUT_CODE
|
||||
then
|
||||
logger.warn("request interrupted:", code)
|
||||
return false, code
|
||||
end
|
||||
if headers == nil then
|
||||
logger.warn("No HTTP headers:", code, status)
|
||||
return false, "Network or remote server unavailable"
|
||||
end
|
||||
if not code or string.sub(code, 1, 1) ~= "2" then -- all 200..299 HTTP codes are OK
|
||||
if code and code > 299 and code < 400 and headers and headers.location then -- handle 301, 302...
|
||||
local redirected_url = headers.location
|
||||
local parsed_redirect_location = socket_url.parse(redirected_url)
|
||||
if not parsed_redirect_location.host then
|
||||
parsed_redirect_location.host = parsed.host
|
||||
parsed_redirect_location.scheme = parsed.scheme
|
||||
redirected_url = socket_url.build(parsed_redirect_location)
|
||||
end
|
||||
logger.dbg("getUrlContent: Redirecting to url: ", redirected_url)
|
||||
return getUrlContent(redirected_url, timeout, maxtime, redirectCount + 1)
|
||||
else
|
||||
error("EpubDownloadBackend: Don't know how to handle HTTP response status: ", status)
|
||||
end
|
||||
logger.warn("HTTP status not okay:", code, status)
|
||||
return false, "Remote server error or unavailable"
|
||||
end
|
||||
if headers and headers["content-length"] then
|
||||
-- Check we really got the announced content size
|
||||
local content_length = tonumber(headers["content-length"])
|
||||
if #content ~= content_length then
|
||||
return false, "Incomplete content received"
|
||||
end
|
||||
end
|
||||
logger.dbg("Returning content ok")
|
||||
return true, content
|
||||
end
|
||||
|
||||
function EpubDownloadBackend:getResponseAsString(url)
|
||||
logger.dbg("EpubDownloadBackend:getResponseAsString(", url, ")")
|
||||
local success, content = getUrlContent(url)
|
||||
function EpubBuilder:getResponseAsString(url)
|
||||
logger.dbg("EpubBuilder:getResponseAsString(", url, ")")
|
||||
local success, content = NewsHelpers:getUrlContent(url)
|
||||
if (success) then
|
||||
return content
|
||||
else
|
||||
@@ -148,38 +154,14 @@ function EpubDownloadBackend:getResponseAsString(url)
|
||||
end
|
||||
end
|
||||
|
||||
function EpubDownloadBackend:setTrapWidget(trap_widget)
|
||||
function EpubBuilder:setTrapWidget(trap_widget)
|
||||
self.trap_widget = trap_widget
|
||||
end
|
||||
|
||||
function EpubDownloadBackend:resetTrapWidget()
|
||||
function EpubBuilder:resetTrapWidget()
|
||||
self.trap_widget = nil
|
||||
end
|
||||
|
||||
function EpubDownloadBackend:loadPage(url)
|
||||
local completed, success, content
|
||||
if self.trap_widget then -- if previously set with EpubDownloadBackend:setTrapWidget()
|
||||
local Trapper = require("ui/trapper")
|
||||
local timeout, maxtime = 30, 60
|
||||
-- We use dismissableRunInSubprocess with complex return values:
|
||||
completed, success, content = Trapper:dismissableRunInSubprocess(function()
|
||||
return getUrlContent(url, timeout, maxtime)
|
||||
end, self.trap_widget)
|
||||
if not completed then
|
||||
error(self.dismissed_error_code) -- "Interrupted by user"
|
||||
end
|
||||
else
|
||||
local timeout, maxtime = 10, 60
|
||||
success, content = getUrlContent(url, timeout, maxtime)
|
||||
end
|
||||
logger.dbg("success:", success, "type(content):", type(content), "content:", content:sub(1, 500), "...")
|
||||
if not success then
|
||||
error(content)
|
||||
else
|
||||
return content
|
||||
end
|
||||
end
|
||||
|
||||
local ext_to_mimetype = {
|
||||
png = "image/png",
|
||||
jpg = "image/jpeg",
|
||||
@@ -195,29 +177,15 @@ local ext_to_mimetype = {
|
||||
ttf = "application/truetype",
|
||||
woff = "application/font-woff",
|
||||
}
|
||||
-- Create an epub file (with possibly images)
|
||||
function EpubDownloadBackend:createEpub(epub_path, html, url, include_images, message, filter_enable, filter_element)
|
||||
logger.dbg("EpubDownloadBackend:createEpub(", epub_path, ")")
|
||||
-- Use Trapper to display progress and ask questions through the UI.
|
||||
-- We need to have been Trapper.wrap()'ed for UI to be used, otherwise
|
||||
-- Trapper:info() and Trapper:confirm() will just use logger.
|
||||
local UI = require("ui/trapper")
|
||||
-- We may need to build absolute urls for non-absolute links and images urls
|
||||
-- GetPublishableHtml
|
||||
function EpubBuilder:getImagesAndHtml(html, url, include_images, filter_enable, filter_element)
|
||||
local base_url = socket_url.parse(url)
|
||||
|
||||
local cancelled = false
|
||||
local page_htmltitle = html:match([[<title>(.*)</title>]])
|
||||
logger.dbg("page_htmltitle is ", page_htmltitle)
|
||||
-- local sections = html.sections -- Wikipedia provided TOC
|
||||
local bookid = "bookid_placeholder" --string.format("wikipedia_%s_%s_%s", lang, phtml.pageid, phtml.revid)
|
||||
-- Not sure if this bookid may ever be used by indexing software/calibre, but if it is,
|
||||
-- should it changes if content is updated (as now, including the wikipedia revisionId),
|
||||
-- or should it stays the same even if revid changes (content of the same book updated).
|
||||
if filter_enable then html = filter(html, filter_element) end
|
||||
local images = {}
|
||||
local seen_images = {}
|
||||
local imagenum = 1
|
||||
local cover_imgid = nil -- best candidate for cover among our images
|
||||
html = filter_enable and filter(html, filter_element) or html
|
||||
|
||||
local processImg = function(img_tag)
|
||||
local src = img_tag:match([[src="([^"]*)"]])
|
||||
if src == nil or src == "" then
|
||||
@@ -272,13 +240,20 @@ function EpubDownloadBackend:createEpub(epub_path, html, url, include_images, me
|
||||
width = width,
|
||||
height = height,
|
||||
}
|
||||
table.insert(images, cur_image)
|
||||
|
||||
seen_images[src] = cur_image
|
||||
-- Use first image of reasonable size (not an icon) and portrait-like as cover-image
|
||||
if not cover_imgid and width and width > 50 and height and height > 50 and height > width then
|
||||
logger.dbg("Found a suitable cover image")
|
||||
cover_imgid = imgid
|
||||
cur_image["cover_image"] = true
|
||||
end
|
||||
|
||||
table.insert(
|
||||
images,
|
||||
cur_image
|
||||
)
|
||||
|
||||
imagenum = imagenum + 1
|
||||
end
|
||||
-- crengine will NOT use width and height attributes, but it will use
|
||||
@@ -296,130 +271,53 @@ function EpubDownloadBackend:createEpub(epub_path, html, url, include_images, me
|
||||
local style = table.concat(style_props, "; ")
|
||||
return string.format([[<img src="%s" style="%s" alt=""/>]], cur_image.imgpath, style)
|
||||
end
|
||||
html = html:gsub("(<%s*img [^>]*>)", processImg)
|
||||
logger.dbg("Images found in html:", images)
|
||||
|
||||
-- See what to do with images
|
||||
local use_img_2x = false
|
||||
if not include_images then
|
||||
if include_images then
|
||||
html = html:gsub("(<%s*img [^>]*>)", processImg)
|
||||
else
|
||||
-- Remove img tags to avoid little blank squares of missing images
|
||||
html = html:gsub("<%s*img [^>]*>", "")
|
||||
-- We could remove the whole image container <div class="thumb"...> ,
|
||||
-- but it's a lot of nested <div> and not easy to do.
|
||||
-- So the user will see the image legends and know a bit about
|
||||
-- the images he chose to not get.
|
||||
-- the images they chose to not get.
|
||||
end
|
||||
|
||||
UI:info(T(_("%1\n\nBuilding EPUB…"), message))
|
||||
-- Open the zip file (with .tmp for now, as crengine may still
|
||||
-- have a handle to the final epub_path, and we don't want to
|
||||
-- delete a good one if we fail/cancel later)
|
||||
local epub_path_tmp = epub_path .. ".tmp"
|
||||
local ZipWriter = require("ffi/zipwriter")
|
||||
local epub = ZipWriter:new{}
|
||||
if not epub:open(epub_path_tmp) then
|
||||
logger.dbg("Failed to open epub_path_tmp")
|
||||
return false
|
||||
end
|
||||
-- Force a GC to free the memory we used (the second call may help
|
||||
-- reclaim more memory).
|
||||
collectgarbage()
|
||||
collectgarbage()
|
||||
return images, html
|
||||
end
|
||||
|
||||
-- We now create and add all the required epub files
|
||||
function EpubBuilder:setTitle(title)
|
||||
self.title = title
|
||||
end
|
||||
|
||||
-- ----------------------------------------------------------------
|
||||
-- /mimetype : always "application/epub+zip"
|
||||
epub:add("mimetype", "application/epub+zip")
|
||||
|
||||
-- ----------------------------------------------------------------
|
||||
-- /META-INF/container.xml : always the same content
|
||||
epub:add("META-INF/container.xml", [[
|
||||
<?xml version="1.0"?>
|
||||
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
|
||||
<rootfiles>
|
||||
<rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>
|
||||
</rootfiles>
|
||||
</container>]])
|
||||
logger.dbg("Added META-INF/container.xml")
|
||||
|
||||
-- ----------------------------------------------------------------
|
||||
-- OEBPS/content.opf : metadata + list of other files (paths relative to OEBPS/ directory)
|
||||
-- Other possible items in this file that are of no interest to crengine :
|
||||
-- In <manifest> :
|
||||
-- <item id="cover" href="title.html" media-type="application/xhtml+xml"/>
|
||||
-- <item id="cover-image" href="images/cover.png" media-type="image/png"/>
|
||||
-- (crengine only uses <meta name="cover" content="cover-image" /> to get the cover image)
|
||||
-- In <spine toc="ncx"> :
|
||||
-- <itemref idref="cover" linear="no"/>
|
||||
-- And a <guide> section :
|
||||
-- <guide>
|
||||
-- <reference href="title.html" type="cover" title="Cover"/>
|
||||
-- <reference href="toc.html" type="toc" title="Table of Contents" href="toc.html" />
|
||||
-- </guide>
|
||||
local content_opf_parts = {}
|
||||
-- head
|
||||
local meta_cover = "<!-- no cover image -->"
|
||||
if include_images and cover_imgid then
|
||||
meta_cover = string.format([[<meta name="cover" content="%s"/>]], cover_imgid)
|
||||
end
|
||||
logger.dbg("meta_cover:", meta_cover)
|
||||
table.insert(content_opf_parts, string.format([[
|
||||
<?xml version='1.0' encoding='utf-8'?>
|
||||
<package xmlns="http://www.idpf.org/2007/opf"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
unique-identifier="bookid" version="2.0">
|
||||
<metadata>
|
||||
<dc:title>%s</dc:title>
|
||||
<dc:publisher>KOReader %s</dc:publisher>
|
||||
%s
|
||||
</metadata>
|
||||
<manifest>
|
||||
<item id="ncx" href="toc.ncx" media-type="application/x-dtbncx+xml"/>
|
||||
<item id="content" href="content.html" media-type="application/xhtml+xml"/>
|
||||
<item id="css" href="stylesheet.css" media-type="text/css"/>
|
||||
]], page_htmltitle, Version:getCurrentRevision(), meta_cover))
|
||||
-- images files
|
||||
if include_images then
|
||||
for inum, img in ipairs(images) do
|
||||
table.insert(content_opf_parts, string.format([[ <item id="%s" href="%s" media-type="%s"/>%s]], img.imgid, img.imgpath, img.mimetype, "\n"))
|
||||
end
|
||||
end
|
||||
-- tail
|
||||
table.insert(content_opf_parts, [[
|
||||
</manifest>
|
||||
<spine toc="ncx">
|
||||
<itemref idref="content"/>
|
||||
</spine>
|
||||
</package>
|
||||
]])
|
||||
epub:add("OEBPS/content.opf", table.concat(content_opf_parts))
|
||||
logger.dbg("Added OEBPS/content.opf")
|
||||
|
||||
-- ----------------------------------------------------------------
|
||||
-- OEBPS/stylesheet.css
|
||||
--- @todo We told it we'd include a stylesheet.css, so it's probably best
|
||||
-- that we do. In theory, we could try to fetch any *.css files linked in
|
||||
-- the main html.
|
||||
epub:add("OEBPS/stylesheet.css", [[
|
||||
/* Empty */
|
||||
]])
|
||||
logger.dbg("Added OEBPS/stylesheet.css")
|
||||
|
||||
-- ----------------------------------------------------------------
|
||||
-- OEBPS/toc.ncx : table of content
|
||||
function EpubBuilder:addToc(chapters)
|
||||
local toc_ncx_parts = {}
|
||||
local depth = 0
|
||||
local cur_level = 0
|
||||
local np_end = [[</navPoint>]]
|
||||
local num = 1
|
||||
-- Add our own first section for first page, with page name as title
|
||||
table.insert(toc_ncx_parts, string.format([[<navPoint id="navpoint-%s" playOrder="%s"><navLabel><text>%s</text></navLabel><content src="content.html"/>]], num, num, page_htmltitle))
|
||||
table.insert(toc_ncx_parts, np_end)
|
||||
--- @todo Not essential for most articles, but longer articles might benefit
|
||||
-- from parsing <h*> tags and constructing a proper TOC
|
||||
while cur_level > 0 do
|
||||
table.insert(toc_ncx_parts, np_end)
|
||||
cur_level = cur_level - 1
|
||||
local num = 0
|
||||
|
||||
for index, chapter in ipairs(chapters) do
|
||||
-- Add nav part for each chapter.
|
||||
table.insert(
|
||||
toc_ncx_parts,
|
||||
string.format([[<navPoint id="navpoint-%s" playOrder="%s"><navLabel><text>%s</text></navLabel><content src="%s.html"/></navPoint>]],
|
||||
num,
|
||||
num,
|
||||
chapter.title,
|
||||
chapter.md5
|
||||
)
|
||||
)
|
||||
num = num + 1
|
||||
end
|
||||
-- Prepend NCX head
|
||||
table.insert(toc_ncx_parts, 1, string.format([[
|
||||
-- Prepend NCX head.
|
||||
table.insert(
|
||||
toc_ncx_parts,
|
||||
1,
|
||||
string.format([[
|
||||
<?xml version='1.0' encoding='utf-8'?>
|
||||
<!DOCTYPE ncx PUBLIC "-//NISO//DTD ncx 2005-1//EN" "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">
|
||||
<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1">
|
||||
@@ -433,99 +331,172 @@ function EpubDownloadBackend:createEpub(epub_path, html, url, include_images, me
|
||||
<text>%s</text>
|
||||
</docTitle>
|
||||
<navMap>
|
||||
]], bookid, depth, page_htmltitle))
|
||||
-- Append NCX tail
|
||||
table.insert(toc_ncx_parts, [[
|
||||
]],
|
||||
"placeholder_bookid",
|
||||
depth,
|
||||
self.title
|
||||
)
|
||||
)
|
||||
-- Append NCX tail.
|
||||
table.insert(
|
||||
toc_ncx_parts,
|
||||
[[
|
||||
</navMap>
|
||||
</ncx>
|
||||
]])
|
||||
epub:add("OEBPS/toc.ncx", table.concat(toc_ncx_parts))
|
||||
logger.dbg("Added OEBPS/toc.ncx")
|
||||
|
||||
-- ----------------------------------------------------------------
|
||||
-- OEBPS/content.html
|
||||
epub:add("OEBPS/content.html", html)
|
||||
logger.dbg("Added OEBPS/content.html")
|
||||
|
||||
-- Force a GC to free the memory we used till now (the second call may
|
||||
-- help reclaim more memory).
|
||||
collectgarbage()
|
||||
collectgarbage()
|
||||
|
||||
-- ----------------------------------------------------------------
|
||||
-- OEBPS/images/*
|
||||
if include_images then
|
||||
local nb_images = #images
|
||||
for inum, img in ipairs(images) do
|
||||
-- Process can be interrupted at this point between each image download
|
||||
-- by tapping while the InfoMessage is displayed
|
||||
-- We use the fast_refresh option from image #2 for a quicker download
|
||||
local go_on = UI:info(T(_("%1\n\nRetrieving image %2 / %3 …"), message, inum, nb_images), inum >= 2)
|
||||
if not go_on then
|
||||
logger.dbg("cancelled")
|
||||
cancelled = true
|
||||
break
|
||||
end
|
||||
local src = img.src
|
||||
if use_img_2x and img.src2x then
|
||||
src = img.src2x
|
||||
end
|
||||
logger.dbg("Getting img ", src)
|
||||
local success, content = getUrlContent(src)
|
||||
-- success, content = getUrlContent(src..".unexistant") -- to simulate failure
|
||||
if success then
|
||||
logger.dbg("success, size:", #content)
|
||||
else
|
||||
logger.dbg("failed fetching:", src)
|
||||
end
|
||||
if success then
|
||||
-- Images do not need to be compressed, so spare some cpu cycles
|
||||
local no_compression = true
|
||||
if img.mimetype == "image/svg+xml" then -- except for SVG images (which are XML text)
|
||||
no_compression = false
|
||||
end
|
||||
epub:add("OEBPS/"..img.imgpath, content, no_compression)
|
||||
logger.dbg("Adding OEBPS/"..img.imgpath)
|
||||
else
|
||||
go_on = UI:confirm(T(_("Downloading image %1 failed. Continue anyway?"), inum), _("Stop"), _("Continue"))
|
||||
if not go_on then
|
||||
cancelled = true
|
||||
break
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
-- Done with adding files
|
||||
if cancelled then
|
||||
if UI:confirm(_("Download did not complete.\nDo you want to create an EPUB with the already downloaded images?"), _("Don't create"), _("Create")) then
|
||||
cancelled = false
|
||||
end
|
||||
end
|
||||
if cancelled then
|
||||
UI:info(_("Canceled. Cleaning up…"))
|
||||
else
|
||||
UI:info(T(_("%1\n\nPacking EPUB…"), message))
|
||||
end
|
||||
epub:close()
|
||||
|
||||
if cancelled then
|
||||
-- Build was cancelled, remove half created .epub
|
||||
if lfs.attributes(epub_path_tmp, "mode") == "file" then
|
||||
os.remove(epub_path_tmp)
|
||||
end
|
||||
return false
|
||||
end
|
||||
|
||||
-- Finally move the .tmp to the final file
|
||||
os.rename(epub_path_tmp, epub_path)
|
||||
logger.dbg("successfully created:", epub_path)
|
||||
|
||||
-- Force a GC to free the memory we used (the second call may help
|
||||
-- reclaim more memory).
|
||||
collectgarbage()
|
||||
collectgarbage()
|
||||
return true
|
||||
]]
|
||||
)
|
||||
self.ncx_toc = toc_ncx_parts
|
||||
end
|
||||
|
||||
return EpubDownloadBackend
|
||||
function EpubBuilder:addManifest(chapters, images)
|
||||
local content_opf_parts = {}
|
||||
local spine_parts = {}
|
||||
local meta_cover = "<!-- no cover image -->"
|
||||
|
||||
if #images > 0 then
|
||||
for inum, image in ipairs(images) do
|
||||
table.insert(
|
||||
content_opf_parts,
|
||||
string.format([[<item id="%s" href="%s" media-type="%s"/>%s]],
|
||||
image.imgid,
|
||||
image.imgpath,
|
||||
image.mimetype,
|
||||
"\n"
|
||||
)
|
||||
)
|
||||
-- See if the image has the tag we previously set indicating
|
||||
-- it can be used as a cover image.
|
||||
if image.cover_image then
|
||||
meta_cover = string.format([[<meta name="cover" content="%s"/>]], image.imgid)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if #chapters > 0 then
|
||||
for index, chapter in ipairs(chapters) do
|
||||
table.insert(
|
||||
content_opf_parts,
|
||||
string.format([[<item id="%s" href="%s.html" media-type="application/xhtml+xml"/>%s]],
|
||||
chapter.md5,
|
||||
chapter.md5,
|
||||
"\n"
|
||||
)
|
||||
)
|
||||
table.insert(
|
||||
spine_parts,
|
||||
string.format([[<itemref idref="%s"/>%s]],
|
||||
chapter.md5,
|
||||
"\n"
|
||||
)
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
logger.dbg("meta_cover:", meta_cover)
|
||||
|
||||
table.insert(
|
||||
content_opf_parts,
|
||||
1,
|
||||
string.format([[<?xml version='1.0' encoding='utf-8'?>
|
||||
<package xmlns="http://www.idpf.org/2007/opf"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
unique-identifier="bookid" version="2.0">
|
||||
<metadata>
|
||||
<dc:title>%s</dc:title>
|
||||
<dc:publisher>KOReader %s</dc:publisher>
|
||||
%s
|
||||
</metadata>
|
||||
<manifest>
|
||||
<item id="ncx" href="toc.ncx" media-type="application/x-dtbncx+xml"/>
|
||||
]], self.title, Version:getCurrentRevision(), meta_cover)
|
||||
)
|
||||
-- tail
|
||||
table.insert(
|
||||
content_opf_parts,
|
||||
string.format([[
|
||||
</manifest>
|
||||
<spine toc="ncx">
|
||||
%s
|
||||
</spine>
|
||||
</package>
|
||||
]], table.concat(spine_parts)
|
||||
)
|
||||
)
|
||||
|
||||
self.ncx_manifest = content_opf_parts
|
||||
end
|
||||
|
||||
function EpubBuilder:addContents(chapters)
|
||||
local contents = {}
|
||||
|
||||
for index, chapter in ipairs(chapters) do
|
||||
table.insert(
|
||||
contents,
|
||||
{
|
||||
filename = chapter.md5 .. ".html",
|
||||
html = chapter.html,
|
||||
}
|
||||
)
|
||||
end
|
||||
|
||||
self.ncx_contents = contents
|
||||
end
|
||||
|
||||
function EpubBuilder:addImages(images)
|
||||
local images_table = {}
|
||||
|
||||
for index, image in ipairs(images) do
|
||||
if not image.src then
|
||||
return
|
||||
end
|
||||
|
||||
local src = image.src
|
||||
local success, content = NewsHelpers:getUrlContent(src)
|
||||
-- success, content = NewsHelpers:getUrlContent(src..".unexistant") -- to simulate failure
|
||||
if success then
|
||||
logger.dbg("EpubBuilder:addImages = success, size:", #content)
|
||||
else
|
||||
logger.dbg("EpubBuilder:addImages = failure fetching:", src)
|
||||
end
|
||||
|
||||
if success then
|
||||
-- Images do not need to be compressed, so spare some cpu cycles
|
||||
local no_compression = true
|
||||
if image.mimetype == "image/svg+xml" then -- except for SVG images (which are XML text)
|
||||
no_compression = false
|
||||
end
|
||||
table.insert(
|
||||
images_table,
|
||||
{
|
||||
path = image.imgpath,
|
||||
content = content,
|
||||
compression = no_compression
|
||||
}
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
self.ncx_images = images_table
|
||||
|
||||
end
|
||||
|
||||
-- There can be multiple links.
|
||||
-- For now we just assume the first link is probably the right one.
|
||||
--- @todo Write unit tests.
|
||||
-- Some feeds that can be used for unit test.
|
||||
-- http://fransdejonge.com/feed/ for multiple links.
|
||||
-- https://github.com/koreader/koreader/commits/master.atom for single link with attributes.
|
||||
function EpubBuilder:getFeedLink(possible_link)
|
||||
local E = {}
|
||||
logger.dbg("Possible link", possible_link)
|
||||
if type(possible_link) == "string" then
|
||||
return possible_link
|
||||
elseif (possible_link._attr or E).href then
|
||||
return possible_link._attr.href
|
||||
elseif ((possible_link[1] or E)._attr or E).href then
|
||||
return possible_link[1]._attr.href
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
return EpubBuilder
|
||||
|
||||
398
plugins/newsdownloader.koplugin/feed_source.lua
Normal file
398
plugins/newsdownloader.koplugin/feed_source.lua
Normal file
@@ -0,0 +1,398 @@
|
||||
local BD = require("ui/bidi")
|
||||
local DownloadBackend = require("epubdownloadbackend")
|
||||
local NewsHelpers = require("http_utilities")
|
||||
local dateparser = require("lib.dateparser")
|
||||
local logger = require("logger")
|
||||
local md5 = require("ffi/sha2").md5
|
||||
local util = require("util")
|
||||
local _ = require("gettext")
|
||||
local N_ = _.ngettext
|
||||
local FFIUtil = require("ffi/util")
|
||||
local T = FFIUtil.template
|
||||
|
||||
local FeedSource = {
|
||||
file_extension = ".epub"
|
||||
}
|
||||
|
||||
function FeedSource:new(o)
|
||||
o = o or {}
|
||||
self.__index = self
|
||||
setmetatable(o, self)
|
||||
return o
|
||||
end
|
||||
|
||||
function FeedSource:getInitializedFeeds(feed_list, progress_callback, error_callback)
|
||||
local initialized_feeds = {}
|
||||
local unsupported_feeds_urls = {}
|
||||
|
||||
for idx, feed in ipairs(feed_list) do
|
||||
local url = feed[1]
|
||||
-- Show a UI update
|
||||
progress_callback(T(
|
||||
_("Setting up feed %1 of %2."),
|
||||
idx,
|
||||
url
|
||||
))
|
||||
-- Initialize the feed
|
||||
local ok, response = pcall(function()
|
||||
return self:initializeDocument(
|
||||
self:fetchDocumentByUrl(url)
|
||||
)
|
||||
end)
|
||||
-- If the initialization worked, add the feed
|
||||
-- to a list of initialized feeds
|
||||
if ok and response then
|
||||
table.insert(initialized_feeds, {
|
||||
config = feed,
|
||||
document = response,
|
||||
})
|
||||
else
|
||||
table.insert(unsupported_feeds_urls, {
|
||||
url .. ": " .. response
|
||||
})
|
||||
end
|
||||
end
|
||||
|
||||
if #unsupported_feeds_urls > 0 then
|
||||
-- When some errors are present, we get a sour message that includes
|
||||
-- information about the source of the error.
|
||||
local unsupported_urls = ""
|
||||
for key, value in pairs(unsupported_feeds_urls) do
|
||||
-- Create the error message.
|
||||
-- unsupported_urls = unsupported_urls .. " " .. value[1] .. " " .. value[2]
|
||||
unsupported_urls = value[1] .. "\n\n"
|
||||
-- Not sure what this does.
|
||||
if key ~= #unsupported_feeds_urls then
|
||||
unsupported_urls = BD.url(unsupported_urls) .. ", "
|
||||
end
|
||||
end
|
||||
error_callback(
|
||||
T(N_("Could not initialize a feed:\n\n%2\n\nPlease review your feed configuration.", "Could not initialize %1 feeds:\n\n%2\n\nPlease review your feed configurations.", #unsupported_feeds_urls),
|
||||
#unsupported_feeds_urls, unsupported_urls)
|
||||
)
|
||||
end
|
||||
|
||||
return initialized_feeds
|
||||
end
|
||||
|
||||
-- This function contacts the feed website and attempts to get
|
||||
-- the RSS/Atom document with a list of the latest items.
|
||||
function FeedSource:fetchDocumentByUrl(url)
|
||||
local document
|
||||
-- Get the XML document representing the feed
|
||||
local ok, response = pcall(function()
|
||||
local success, content = NewsHelpers:getUrlContent(url)
|
||||
if (success) then
|
||||
return content
|
||||
else
|
||||
error("Failed to download content for url: " .. url, 0)
|
||||
end
|
||||
end)
|
||||
-- Check to see if a response is available to deserialize.
|
||||
if ok then
|
||||
-- Deserialize the XML document into something Lua can use
|
||||
document = NewsHelpers:deserializeXMLString(response)
|
||||
end
|
||||
-- Return the document or any errors that may have occured
|
||||
if ok or document then
|
||||
return document
|
||||
else
|
||||
if not ok then
|
||||
error("(Reason: Failed to download feed document)", 0)
|
||||
else
|
||||
error("(Reason: Error during feed document deserialization)", 0)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
-- Supply this method with the XML document returned by the feed,
|
||||
-- and it will initialized the document by extracting the feed title,
|
||||
-- feed items, and items count.
|
||||
function FeedSource:initializeDocument(document)
|
||||
local feed_title
|
||||
local feed_items
|
||||
local total_items
|
||||
|
||||
local ok = pcall(function()
|
||||
return self:getFeedType(
|
||||
document,
|
||||
function()
|
||||
-- RSS callback
|
||||
feed_title = util.htmlEntitiesToUtf8(document.rss.channel.title)
|
||||
feed_items = document.rss.channel.item
|
||||
total_items = #document.rss.channel.item
|
||||
end,
|
||||
function()
|
||||
-- Atom callback
|
||||
feed_title = FeedSource:getFeedTitle(document.feed.title)
|
||||
feed_items = document.feed.entry
|
||||
total_items = #document.feed.entry
|
||||
end
|
||||
)
|
||||
end)
|
||||
|
||||
if ok then
|
||||
document.title = feed_title
|
||||
document.items = feed_items
|
||||
document.total_items = total_items
|
||||
return document
|
||||
else
|
||||
error(_("Could not initialize feed document"), 0)
|
||||
end
|
||||
end
|
||||
|
||||
function FeedSource:getItemsContent(feed, progress_callback, error_callback)
|
||||
local limit = tonumber(feed.config.limit)
|
||||
local total_items = (limit == 0) and
|
||||
feed.document.total_items or
|
||||
limit
|
||||
local initialized_feed_items = {}
|
||||
-- Download each ite0m in the feed
|
||||
for index, item in pairs(feed.document.items) do
|
||||
-- If limit has been met, stop downloading feed.
|
||||
if limit ~= 0 and index - 1 == limit then
|
||||
break
|
||||
end
|
||||
-- Display feedback to user.
|
||||
progress_callback(T(
|
||||
_("%3\n Downloading item %1 of %2"),
|
||||
index,
|
||||
total_items,
|
||||
feed.document.title
|
||||
))
|
||||
-- Download the article's HTML.
|
||||
local ok, response = pcall(function()
|
||||
return self:initializeItemHtml(
|
||||
feed,
|
||||
self:getItemHtml(
|
||||
item,
|
||||
feed.config.download_full_article
|
||||
)
|
||||
)
|
||||
end)
|
||||
|
||||
-- Add the result to our table, or send a
|
||||
-- result to the error callback.
|
||||
if ok then
|
||||
table.insert(initialized_feed_items, {
|
||||
html = response.html,
|
||||
images = response.images,
|
||||
item_slug = FeedSource:getItemTitleWithDate(item),
|
||||
item_title = item.title,
|
||||
md5 = md5(item.title),
|
||||
feed_title = feed.document.title,
|
||||
})
|
||||
else
|
||||
error_callback(
|
||||
T(_("Could not get content for: %1"), feed.document.title)
|
||||
)
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
if #initialized_feed_items > 0 then
|
||||
return initialized_feed_items
|
||||
else
|
||||
return nil
|
||||
end
|
||||
end
|
||||
|
||||
function FeedSource:initializeItemHtml(feed, html)
|
||||
local url = feed.config[1]
|
||||
-- local download_full_article = feed.config.download_full_article ~= false
|
||||
local include_images = feed.config.include_images ~= false
|
||||
local filter_element = feed.config.filter_element or
|
||||
feed.config.filter_element == nil
|
||||
local enable_filter = feed.config.enable_filter ~= false
|
||||
local item_images, item_html = DownloadBackend:getImagesAndHtml(
|
||||
html,
|
||||
url,
|
||||
include_images,
|
||||
enable_filter,
|
||||
filter_element
|
||||
)
|
||||
return {
|
||||
html = item_html,
|
||||
images = item_images
|
||||
}
|
||||
end
|
||||
|
||||
function FeedSource:getFeedType(document, rss_cb, atom_cb)
|
||||
-- Check to see if the feed uses RSS.
|
||||
local is_rss = document.rss and
|
||||
document.rss.channel and
|
||||
document.rss.channel.title and
|
||||
document.rss.channel.item and
|
||||
document.rss.channel.item[1] and
|
||||
document.rss.channel.item[1].title and
|
||||
document.rss.channel.item[1].link
|
||||
-- Check to see if the feed uses Atom.
|
||||
local is_atom = document.feed and
|
||||
document.feed.title and
|
||||
document.feed.entry[1] and
|
||||
document.feed.entry[1].title and
|
||||
document.feed.entry[1].link
|
||||
-- Setup the feed values based on feed type
|
||||
if is_atom then
|
||||
return atom_cb()
|
||||
elseif is_rss then
|
||||
return rss_cb()
|
||||
end
|
||||
-- Return the values through our callback, or call an
|
||||
-- error message if the feed wasn't RSS or Atom
|
||||
if not is_rss or not is_atom then
|
||||
local error_message
|
||||
if not is_rss then
|
||||
error_message = _("(Reason: Couldn't process RSS)")
|
||||
elseif not is_atom then
|
||||
error_message = _("(Reason: Couldn't process Atom)")
|
||||
end
|
||||
error(error_message)
|
||||
end
|
||||
end
|
||||
|
||||
function FeedSource:getItemHtml(item, download_full_article)
|
||||
if download_full_article then
|
||||
return NewsHelpers:loadPage(
|
||||
FeedSource:getFeedLink(item.link)
|
||||
)
|
||||
else
|
||||
local feed_description = item.description or item.summary
|
||||
local footer = _("This is just a description of the feed. To download the full article instead, go to the News Downloader settings and change 'download_full_article' to 'true'.")
|
||||
return string.format([[<!DOCTYPE html>
|
||||
<html>
|
||||
<head><meta charset='UTF-8'><title>%s</title></head>
|
||||
<body><header><h2>%s</h2></header><article>%s</article>
|
||||
<br><footer><small>%s</small></footer>
|
||||
</body>
|
||||
</html>]], item.title, item.title, feed_description, footer)
|
||||
end
|
||||
end
|
||||
|
||||
-- @todo: move this elsewhere
|
||||
function FeedSource:getEpubOutputDir(download_dir, sub_dir, epub_title)
|
||||
|
||||
local feed_output_dir = ("%s%s/"):format(
|
||||
download_dir,
|
||||
util.getSafeFilename(util.htmlEntitiesToUtf8(sub_dir)))
|
||||
|
||||
-- Create the output directory if it doesn't exist.
|
||||
if not lfs.attributes(feed_output_dir, "mode") then
|
||||
lfs.mkdir(feed_output_dir)
|
||||
end
|
||||
|
||||
local file_name = FeedSource:getFeedTitle(epub_title)
|
||||
|
||||
return ("%s%s%s"):format(
|
||||
feed_output_dir,
|
||||
file_name,
|
||||
self.file_extension
|
||||
)
|
||||
end
|
||||
|
||||
function FeedSource:createEpub(title, chapters, abs_output_path, progress_callback, error_callback)
|
||||
|
||||
local file_exists = lfs.attributes(abs_output_path, "mode")
|
||||
|
||||
if file_exists then
|
||||
logger.dbg("NewsDownloader: Skipping. EPUB file already exists", abs_output_path)
|
||||
return true
|
||||
end
|
||||
|
||||
if #chapters == 0 then
|
||||
error(_("Error: chapters contains 0 items"), 0)
|
||||
end
|
||||
|
||||
local images = {}
|
||||
|
||||
for index, chapter in ipairs(chapters) do
|
||||
for jndex, image in ipairs(chapter.images) do
|
||||
table.insert(
|
||||
images,
|
||||
image
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
local epub = DownloadBackend:new{}
|
||||
|
||||
progress_callback(T(_("Building EPUB %1"), title))
|
||||
epub:setTitle(title)
|
||||
epub:addToc(chapters)
|
||||
epub:addManifest(chapters, images)
|
||||
|
||||
progress_callback(T(_("Building EPUB %1: %2"), title, _("Adding contents")))
|
||||
epub:addContents(chapters)
|
||||
|
||||
progress_callback(T(_("Building EPUB %1: %2"), title, _("Adding images")))
|
||||
epub:addImages(images)
|
||||
|
||||
progress_callback(T(_("Building EPUB %1: %2"), title, _("Writing EPUB to disk")))
|
||||
local ok = pcall(function()
|
||||
return epub:build(abs_output_path)
|
||||
end)
|
||||
|
||||
if ok then
|
||||
if lfs.attributes(abs_output_path, "mode") then
|
||||
return true
|
||||
end
|
||||
end
|
||||
|
||||
return false
|
||||
end
|
||||
|
||||
local function parseDate(dateTime)
|
||||
-- Uses lua-feedparser https://github.com/slact/lua-feedparser
|
||||
-- feedparser is available under the (new) BSD license.
|
||||
-- see: koreader/plugins/newsdownloader.koplugin/lib/LICENCE_lua-feedparser
|
||||
local date = dateparser.parse(dateTime)
|
||||
return os.date("%y-%m-%d_%H-%M_", date)
|
||||
end
|
||||
|
||||
function FeedSource:getFeedTitleWithDate(feed)
|
||||
local title = util.getSafeFilename(FeedSource:getFeedTitle(feed.document.title))
|
||||
return os.date("%y-%m-%d_%H-%M_") .. title
|
||||
end
|
||||
|
||||
-- Creates a title with date from a feed item.
|
||||
function FeedSource:getItemTitleWithDate(item)
|
||||
local title = util.getSafeFilename(FeedSource:getFeedTitle(item.title))
|
||||
if item.updated then
|
||||
title = parseDate(item.updated) .. title
|
||||
elseif item.pubDate then
|
||||
title = parseDate(item.pubDate) .. title
|
||||
elseif item.published then
|
||||
title = parseDate(item.published) .. title
|
||||
end
|
||||
return title
|
||||
end
|
||||
|
||||
-- If a title looks like <title>blabla</title> it'll just be feed.title.
|
||||
-- If a title looks like <title attr="alb">blabla</title> then we get a table
|
||||
-- where [1] is the title string and the attributes are also available.
|
||||
function FeedSource:getFeedTitle(possible_title)
|
||||
if type(possible_title) == "string" then
|
||||
return util.htmlEntitiesToUtf8(possible_title)
|
||||
elseif possible_title[1] and type(possible_title[1]) == "string" then
|
||||
return util.htmlEntitiesToUtf8(possible_title[1])
|
||||
end
|
||||
end
|
||||
-- There can be multiple links.
|
||||
-- For now we just assume the first link is probably the right one.
|
||||
--- @todo Write unit tests.
|
||||
-- Some feeds that can be used for unit test.
|
||||
-- http://fransdejonge.com/feed/ for multiple links.
|
||||
-- https://github.com/koreader/koreader/commits/master.atom for single link with attributes.
|
||||
function FeedSource:getFeedLink(possible_link)
|
||||
local E = {}
|
||||
if type(possible_link) == "string" then
|
||||
return possible_link
|
||||
elseif (possible_link._attr or E).href then
|
||||
return possible_link._attr.href
|
||||
elseif ((possible_link[1] or E)._attr or E).href then
|
||||
return possible_link[1]._attr.href
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
return FeedSource
|
||||
@@ -7,7 +7,10 @@ local FeedView = {
|
||||
DOWNLOAD_FULL_ARTICLE = "download_full_article",
|
||||
INCLUDE_IMAGES = "include_images",
|
||||
ENABLE_FILTER = "enable_filter",
|
||||
FILTER_ELEMENT = "filter_element"
|
||||
FILTER_ELEMENT = "filter_element",
|
||||
VOLUMIZE = "volumize",
|
||||
ACTION_RESET_HISTORY = "reset_history",
|
||||
ACTION_DELETE_FEED = "delete_feed",
|
||||
}
|
||||
|
||||
function FeedView:getList(feed_config, callback, edit_feed_attribute_callback, delete_feed_callback)
|
||||
@@ -49,7 +52,7 @@ function FeedView:getList(feed_config, callback, edit_feed_attribute_callback, d
|
||||
return view_content
|
||||
end
|
||||
|
||||
function FeedView:getItem(id, feed, edit_feed_callback, delete_feed_callback)
|
||||
function FeedView:getItem(id, feed, edit_feed_callback, feed_action_callback)
|
||||
|
||||
logger.dbg("NewsDownloader:", feed)
|
||||
|
||||
@@ -67,6 +70,7 @@ function FeedView:getItem(id, feed, edit_feed_callback, delete_feed_callback)
|
||||
local include_images = feed.include_images ~= false
|
||||
local enable_filter = feed.enable_filter ~= false
|
||||
local filter_element = feed.filter_element
|
||||
local volumize = feed.volumize ~= false
|
||||
|
||||
local vc = {
|
||||
{
|
||||
@@ -136,11 +140,22 @@ function FeedView:getItem(id, feed, edit_feed_callback, delete_feed_callback)
|
||||
)
|
||||
end
|
||||
},
|
||||
{
|
||||
_("Volumize feed"),
|
||||
volumize,
|
||||
callback = function()
|
||||
edit_feed_callback(
|
||||
id,
|
||||
FeedView.VOLUMIZE,
|
||||
volumize
|
||||
)
|
||||
end
|
||||
},
|
||||
}
|
||||
|
||||
-- We don't always display this. For instance: if a feed
|
||||
-- is being created, this button is not necessary.
|
||||
if delete_feed_callback then
|
||||
-- These actions only pertain to initiated feeds, so we don't always
|
||||
-- display them.
|
||||
if feed_action_callback then
|
||||
table.insert(
|
||||
vc,
|
||||
"---"
|
||||
@@ -151,8 +166,22 @@ function FeedView:getItem(id, feed, edit_feed_callback, delete_feed_callback)
|
||||
_("Delete feed"),
|
||||
"",
|
||||
callback = function()
|
||||
delete_feed_callback(
|
||||
id
|
||||
feed_action_callback(
|
||||
id,
|
||||
FeedView.ACTION_DELETE_FEED
|
||||
)
|
||||
end
|
||||
}
|
||||
)
|
||||
table.insert(
|
||||
vc,
|
||||
{
|
||||
_("Reset feed history"),
|
||||
"",
|
||||
callback = function()
|
||||
feed_action_callback(
|
||||
url,
|
||||
FeedView.ACTION_RESET_HISTORY
|
||||
)
|
||||
end
|
||||
}
|
||||
|
||||
126
plugins/newsdownloader.koplugin/http_utilities.lua
Normal file
126
plugins/newsdownloader.koplugin/http_utilities.lua
Normal file
@@ -0,0 +1,126 @@
|
||||
local logger = require("logger")
|
||||
local http = require("socket.http")
|
||||
local socketutil = require("socketutil")
|
||||
local socket_url = require("socket.url")
|
||||
local socket = require("socket")
|
||||
local ltn12 = require("ltn12")
|
||||
|
||||
local NewsHelpers = {
|
||||
}
|
||||
|
||||
local max_redirects = 5; --prevent infinite redirects
|
||||
|
||||
-- Get URL content
|
||||
function NewsHelpers:getUrlContent(url, timeout, maxtime, redirectCount)
|
||||
logger.dbg("getUrlContent(", url, ",", timeout, ",", maxtime, ",", redirectCount, ")")
|
||||
if not redirectCount then
|
||||
redirectCount = 0
|
||||
elseif redirectCount == max_redirects then
|
||||
error("EpubDownloadBackend: reached max redirects: ", redirectCount)
|
||||
end
|
||||
|
||||
if not timeout then timeout = 10 end
|
||||
logger.dbg("timeout:", timeout)
|
||||
|
||||
local sink = {}
|
||||
local parsed = socket_url.parse(url)
|
||||
socketutil:set_timeout(timeout, maxtime or 30)
|
||||
local request = {
|
||||
url = url,
|
||||
method = "GET",
|
||||
sink = maxtime and socketutil.table_sink(sink) or ltn12.sink.table(sink),
|
||||
}
|
||||
logger.dbg("request:", request)
|
||||
local code, headers, status = socket.skip(1, http.request(request))
|
||||
socketutil:reset_timeout()
|
||||
logger.dbg("After http.request")
|
||||
local content = table.concat(sink) -- empty or content accumulated till now
|
||||
logger.dbg("type(code):", type(code))
|
||||
logger.dbg("code:", code)
|
||||
logger.dbg("headers:", headers)
|
||||
logger.dbg("status:", status)
|
||||
logger.dbg("#content:", #content)
|
||||
|
||||
if code == socketutil.TIMEOUT_CODE or
|
||||
code == socketutil.SSL_HANDSHAKE_CODE or
|
||||
code == socketutil.SINK_TIMEOUT_CODE
|
||||
then
|
||||
logger.warn("request interrupted:", code)
|
||||
return false, code
|
||||
end
|
||||
if headers == nil then
|
||||
logger.warn("No HTTP headers:", code, status)
|
||||
return false, "Network or remote server unavailable"
|
||||
end
|
||||
if not code or string.sub(code, 1, 1) ~= "2" then -- all 200..299 HTTP codes are OK
|
||||
if code and code > 299 and code < 400 and headers and headers.location then -- handle 301, 302...
|
||||
local redirected_url = headers.location
|
||||
local parsed_redirect_location = socket_url.parse(redirected_url)
|
||||
if not parsed_redirect_location.host then
|
||||
parsed_redirect_location.host = parsed.host
|
||||
parsed_redirect_location.scheme = parsed.scheme
|
||||
redirected_url = socket_url.build(parsed_redirect_location)
|
||||
end
|
||||
logger.dbg("getUrlContent: Redirecting to url: ", redirected_url)
|
||||
return self:getUrlContent(redirected_url, timeout, maxtime, redirectCount + 1)
|
||||
else
|
||||
-- error("EpubDownloadBackend: Don't know how to handle HTTP response status: " .. status)
|
||||
-- error("EpubDownloadBackend: Don't know how to handle HTTP response status.")
|
||||
logger.warn("HTTP status not okay:", code, status)
|
||||
return false, status
|
||||
end
|
||||
end
|
||||
if headers and headers["content-length"] then
|
||||
-- Check we really got the announced content size
|
||||
local content_length = tonumber(headers["content-length"])
|
||||
if #content ~= content_length then
|
||||
return false, "Incomplete content received"
|
||||
end
|
||||
end
|
||||
logger.dbg("Returning content ok")
|
||||
return true, content
|
||||
end
|
||||
|
||||
function NewsHelpers:loadPage(url)
|
||||
logger.dbg("Load page: ", url)
|
||||
local success, content
|
||||
--[[ if self.trap_widget then -- if previously set with EpubDownloadBackend:setTrapWidget()
|
||||
local Trapper = require("ui/trapper")
|
||||
local timeout, maxtime = 30, 60
|
||||
-- We use dismissableRunInSubprocess with complex return values:
|
||||
completed, success, content = Trapper:dismissableRunInSubprocess(function()
|
||||
return NewsHelpers:getUrlContent(url, timeout, maxtime)
|
||||
end, self.trap_widget)
|
||||
if not completed then
|
||||
error(self.dismissed_error_code) -- "Interrupted by user"
|
||||
end
|
||||
else]]--
|
||||
local timeout, maxtime = 10, 60
|
||||
success, content = NewsHelpers:getUrlContent(url, timeout, maxtime)
|
||||
-- end
|
||||
logger.dbg("success:", success, "type(content):", type(content), "content:", content:sub(1, 500), "...")
|
||||
if not success then
|
||||
error(content)
|
||||
else
|
||||
return content
|
||||
end
|
||||
end
|
||||
|
||||
function NewsHelpers:deserializeXMLString(xml_str)
|
||||
-- uses LuaXML https://github.com/manoelcampos/LuaXML
|
||||
-- The MIT License (MIT)
|
||||
-- Copyright (c) 2016 Manoel Campos da Silva Filho
|
||||
-- see: koreader/plugins/newsdownloader.koplugin/lib/LICENSE_LuaXML
|
||||
local treehdl = require("lib/handler")
|
||||
local libxml = require("lib/xml")
|
||||
-- Instantiate the object that parses the XML file as a Lua table.
|
||||
local xmlhandler = treehdl.simpleTreeHandler()
|
||||
-- Instantiate the object that parses the XML to a Lua table.
|
||||
local ok = pcall(function()
|
||||
libxml.xmlParser(xmlhandler):parse(xml_str)
|
||||
end)
|
||||
if not ok then return end
|
||||
return xmlhandler.root
|
||||
end
|
||||
|
||||
return NewsHelpers
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user