mirror of
https://github.com/koreader/koreader.git
synced 2025-08-10 00:52:38 +00:00
NewsDownloader: use feed.description as news context instead download full web page (#3426)
Fixes #3425.
This commit is contained in:
@@ -1,11 +1,27 @@
|
||||
return {
|
||||
-- list your feeds here:
|
||||
|
||||
{ "http://feeds.reuters.com/Reuters/worldNews?format=xml", limit = 2},
|
||||
-- set 'limit' to change number of 'news' to be downloaded from source
|
||||
-- 'limit' equal "0" means no limit.
|
||||
{ "http://www.pcworld.com/index.rss", limit = 1 },
|
||||
{ "http://feeds.reuters.com/Reuters/worldNews?format=xml", limit = 2, download_full_article=false},
|
||||
|
||||
{ "https://www.pcworld.com/index.rss", limit = 7 , download_full_article=true},
|
||||
|
||||
-- comment out line ("--" at line start) to stop downloading source
|
||||
--{ "http://www.football.co.uk/international/rss.xml", limit = 0 , download_full_article=true},
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
--HELP:
|
||||
-- use syntax: {"your_url", limit= max_number_of_items_to_be_created, download_full_article=true/false}
|
||||
|
||||
-- set 'limit' to change number of 'news' to be created
|
||||
-- 'limit' equal "0" means no limit.
|
||||
|
||||
-- 'download_full_article=false' - means download full article using feed link (may not always work correctly)
|
||||
-- 'download_full_article=true' - means use only feed description to create feeds (usually only part of the article)
|
||||
|
||||
|
||||
-- comment out line to stop downloading source
|
||||
--{ "http://www.football.co.uk/international/rss.xml", limit = 0 },
|
||||
}
|
||||
|
||||
@@ -160,12 +160,13 @@ function NewsDownloader:loadConfigAndProcessFeeds()
|
||||
for idx, feed in ipairs(feed_config) do
|
||||
local url = feed[1]
|
||||
local limit = feed.limit
|
||||
local download_full_article = feed.download_full_article
|
||||
if url and limit then
|
||||
info = InfoMessage:new{ text = T(_("Processing: %1"), url) }
|
||||
UIManager:show(info)
|
||||
-- processFeedSource is a blocking call, so manually force a UI refresh beforehand
|
||||
UIManager:forceRePaint()
|
||||
self:processFeedSource(url, tonumber(limit), unsupported_feeds_urls)
|
||||
self:processFeedSource(url, tonumber(limit), unsupported_feeds_urls, download_full_article)
|
||||
UIManager:close(info)
|
||||
else
|
||||
logger.warn('NewsDownloader: invalid feed config entry', feed)
|
||||
@@ -191,7 +192,7 @@ function NewsDownloader:loadConfigAndProcessFeeds()
|
||||
end
|
||||
end
|
||||
|
||||
function NewsDownloader:processFeedSource(url, limit, unsupported_feeds_urls)
|
||||
function NewsDownloader:processFeedSource(url, limit, unsupported_feeds_urls, download_full_article)
|
||||
local resp_lines = {}
|
||||
local parsed = socket_url.parse(url)
|
||||
local httpRequest = parsed.scheme == 'http' and http.request or https.request
|
||||
@@ -207,9 +208,9 @@ function NewsDownloader:processFeedSource(url, limit, unsupported_feeds_urls)
|
||||
local is_atom = feeds.feed and feeds.feed.title and feeds.feed.entry[1] and feeds.feed.entry[1].title and feeds.feed.entry[1].link
|
||||
|
||||
if is_atom then
|
||||
self:processAtom(feeds, limit)
|
||||
self:processAtom(feeds, limit, download_full_article)
|
||||
elseif is_rss then
|
||||
self:processRSS(feeds, limit)
|
||||
self:processRSS(feeds, limit, download_full_article)
|
||||
else
|
||||
table.insert(unsupported_feeds_urls, url)
|
||||
return
|
||||
@@ -233,7 +234,7 @@ function NewsDownloader:deserializeXMLString(xml_str)
|
||||
return xmlhandler.root
|
||||
end
|
||||
|
||||
function NewsDownloader:processAtom(feeds, limit)
|
||||
function NewsDownloader:processAtom(feeds, limit, download_full_article)
|
||||
local feed_output_dir = string.format("%s%s/",
|
||||
news_download_dir_path,
|
||||
util.replaceInvalidChars(getFeedTitle(feeds.feed.title)))
|
||||
@@ -245,11 +246,15 @@ function NewsDownloader:processAtom(feeds, limit)
|
||||
if limit ~= 0 and index - 1 == limit then
|
||||
break
|
||||
end
|
||||
self:downloadFeed(feed, feed_output_dir)
|
||||
if download_full_article then
|
||||
self:downloadFeed(feed, feed_output_dir)
|
||||
else
|
||||
self:createFromDescription(feed, feed.context, feed_output_dir)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
function NewsDownloader:processRSS(feeds, limit)
|
||||
function NewsDownloader:processRSS(feeds, limit, download_full_article)
|
||||
local feed_output_dir = ("%s%s/"):format(
|
||||
news_download_dir_path, util.replaceInvalidChars(feeds.rss.channel.title))
|
||||
if not lfs.attributes(feed_output_dir, "mode") then
|
||||
@@ -260,7 +265,11 @@ function NewsDownloader:processRSS(feeds, limit)
|
||||
if limit ~= 0 and index - 1 == limit then
|
||||
break
|
||||
end
|
||||
self:downloadFeed(feed, feed_output_dir)
|
||||
if download_full_article then
|
||||
self:downloadFeed(feed, feed_output_dir)
|
||||
else
|
||||
self:createFromDescription(feed, feed.description, feed_output_dir)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -276,6 +285,25 @@ function NewsDownloader:downloadFeed(feed, feed_output_dir)
|
||||
httpRequest({ url = link, sink = ltn12.sink.file(io.open(news_dl_path, 'w')), })
|
||||
end
|
||||
|
||||
function NewsDownloader:createFromDescription(feed, context, feed_output_dir)
|
||||
local news_file_path = ("%s%s%s"):format(feed_output_dir,
|
||||
util.replaceInvalidChars(getFeedTitle(feed.title)),
|
||||
file_extension)
|
||||
logger.dbg("NewsDownloader: News file will be created :", news_file_path)
|
||||
local file = io.open(news_file_path, "w")
|
||||
local footer = _("This is just description of the feed. To download full article go to News Downloader settings and change 'download_full_article' to 'true'")
|
||||
|
||||
local html = string.format([[<!DOCTYPE html>
|
||||
<html>
|
||||
<head><meta charset='UTF-8'><title>%s</title></head>
|
||||
<body><header><h2>%s</h2></header><article>%s</article>
|
||||
<br><footer><small>%s</small></footer>
|
||||
</body>
|
||||
</html>]], feed.title, feed.title, context, footer)
|
||||
file:write(html)
|
||||
file:close()
|
||||
end
|
||||
|
||||
function NewsDownloader:removeNewsButKeepFeedConfig()
|
||||
logger.dbg("NewsDownloader: Removing news from :", news_download_dir_path)
|
||||
for entry in lfs.dir(news_download_dir_path) do
|
||||
|
||||
Reference in New Issue
Block a user