Add tests for NewsDownloader (#13806)

To help prevent situations like #13799.

As also referenced in <https://github.com/koreader/koreader/pull/13407#issuecomment-2722714846>.

Also fixes single item RSS and single entry Atom feeds in passing.

References #3073.
This commit is contained in:
Frans de Jonge
2025-05-30 19:20:17 +02:00
committed by GitHub
parent ba2857cfc1
commit 0257caad05
2 changed files with 254 additions and 9 deletions

View File

@@ -43,7 +43,7 @@ local FEED_TYPE_ATOM = "atom"
-- If a title looks like <title>blabla</title> it'll just be feed.title.
-- If a title looks like <title attr="alb">blabla</title> then we get a table
-- where [1] is the title string and the attributes are also available.
local function getFeedTitle(possible_title)
function NewsDownloader.getFeedTitle(possible_title)
if type(possible_title) == "string" then
return util.htmlEntitiesToUtf8(possible_title)
elseif possible_title[1] and type(possible_title[1]) == "string" then
@@ -69,7 +69,7 @@ end
-- Some feeds that can be used for unit test.
-- http://fransdejonge.com/feed/ for multiple links.
-- https://github.com/koreader/koreader/commits/master.atom for single link with attributes.
local function getFeedLink(possible_link)
function NewsDownloader.getFeedLink(possible_link)
local E = {}
if type(possible_link) == "string" then
return possible_link
@@ -605,12 +605,20 @@ function NewsDownloader:processFeed(feed_type, feeds, cookies, limit, download_f
total_items = (limit == 0)
and #feeds.rss.channel.item
or limit
if feed_item[1] == nil and feed_item.title then
-- Normalize data for single-item feeds.
feed_item = {feed_item}
end
else
feed_title = getFeedTitle(feeds.feed.title)
feed_title = self.getFeedTitle(feeds.feed.title)
feed_item = feeds.feed.entry
total_items = (limit == 0)
and #feeds.feed.entry
or limit
if feed_item[1] == nil and feed_item.title then
-- Normalize data for single-item feeds.
feed_item = {feed_item}
end
end
-- Get the path to the output directory.
local feed_output_dir = ("%s%s/"):format(
@@ -686,7 +694,7 @@ local function parseDate(dateTime)
end
local function getTitleWithDate(feed)
local title = util.getSafeFilename(getFeedTitle(feed.title))
local title = util.getSafeFilename(NewsDownloader.getFeedTitle(feed.title))
if feed.updated then
title = parseDate(feed.updated) .. title
elseif feed.pubDate then
@@ -709,7 +717,7 @@ function NewsDownloader:downloadFeed(feed, cookies, feed_output_dir, include_ima
else
logger.dbg("NewsDownloader: News file will be stored to :", news_file_path)
local article_message = T(_("%1\n%2"), message, title_with_date)
local link = getFeedLink(feed.link)
local link = self.getFeedLink(feed.link)
local html = DownloadBackend:loadPage(link, cookies)
DownloadBackend:createEpub(news_file_path, html, link, include_images, article_message, enable_filter, filter_element)
end
@@ -729,7 +737,7 @@ function NewsDownloader:createFromDescription(feed, title, content, feed_output_
local byline = getByline(feed)
local footer = _("If this is only a summary, the full article can be downloaded by going to the News Downloader settings and changing 'Download full article' to 'true'.")
local base_url = getFeedLink(feed.link)
local base_url = self.getFeedLink(feed.link)
if base_url then
if not base_url:match("/$") then
base_url = base_url .. "/"
@@ -761,7 +769,7 @@ function NewsDownloader:createFromDescription(feed, title, content, feed_output_
<footer><small>%s</small></footer>
</body>
</html>]], title, title, byline, content, footer)
local link = getFeedLink(feed.link)
local link = self.getFeedLink(feed.link)
DownloadBackend:createEpub(news_file_path, html, link, include_images, article_message)
end
end
@@ -849,7 +857,7 @@ function NewsDownloader:viewFeedList()
-- Prepare the view with all the callbacks for editing the attributes
local feed_item_vc = FeedView:getItem(
#feed_config + 1,
getEmptyFeed(),
self.getEmptyFeed(),
function(id, edit_key, value)
self:editFeedAttribute(id, edit_key, value)
end
@@ -1013,7 +1021,7 @@ function NewsDownloader:updateFeedConfig(id, key, value)
if id > #feed_config then
table.insert(
feed_config,
getEmptyFeed()
self.getEmptyFeed()
)
end

View File

@@ -0,0 +1,237 @@
describe("NewsDownloader module", function()
setup(function()
require("commonrequire")
end)
local NewsDownloader
setup(function()
package.path = "plugins/newsdownloader.koplugin/?.lua;" .. package.path
NewsDownloader = require("main")
end)
describe("RSS feed parsing", function()
local rss_xml = [[
<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0">
<channel>
<title>KOReader News</title>
<link>https://github.com/koreader/koreader</link>
<description>KOReader updates and release notes</description>
<item>
<title>KOReader v2023.05 released</title>
<link>https://github.com/koreader/koreader/releases/tag/v2023.05</link>
<description>New release with improved PDF rendering and UI enhancements</description>
</item>
<item>
<title>KOReader v2023.04 released</title>
<link>https://github.com/koreader/koreader/releases/tag/v2023.04</link>
<description>&lt;p&gt;Bug fixes &amp; improved EPUB handling&lt;/p&gt;</description>
</item>
</channel>
</rss>
]]
it("should parse RSS feed titles correctly", function()
local feeds = NewsDownloader:deserializeXMLString(rss_xml)
assert.truthy(feeds)
assert.truthy(feeds.rss)
assert.truthy(feeds.rss.channel)
assert.truthy(feeds.rss.channel.title)
assert.equals("KOReader News", feeds.rss.channel.title)
-- Test item titles
assert.truthy(feeds.rss.channel.item)
assert.equals("KOReader v2023.05 released", feeds.rss.channel.item[1].title)
assert.equals("KOReader v2023.04 released", feeds.rss.channel.item[2].title)
end)
it("should parse RSS feed descriptions correctly", function()
local feeds = NewsDownloader:deserializeXMLString(rss_xml)
assert.truthy(feeds)
-- Test channel description
assert.equals("KOReader updates and release notes", feeds.rss.channel.description)
-- Test item descriptions
assert.equals("New release with improved PDF rendering and UI enhancements",
feeds.rss.channel.item[1].description)
-- Test HTML entities handling in descriptions
assert.equals("<p>Bug fixes & improved EPUB handling</p>",
require("util").htmlEntitiesToUtf8(feeds.rss.channel.item[2].description))
end)
it("should parse RSS feed links correctly", function()
local feeds = NewsDownloader:deserializeXMLString(rss_xml)
assert.truthy(feeds)
-- Test channel link
assert.equals("https://github.com/koreader/koreader", feeds.rss.channel.link)
-- Test item links
assert.equals("https://github.com/koreader/koreader/releases/tag/v2023.05",
feeds.rss.channel.item[1].link)
assert.equals("https://github.com/koreader/koreader/releases/tag/v2023.04",
feeds.rss.channel.item[2].link)
-- Test getFeedLink function using the exposed module function
assert.equals("https://github.com/koreader/koreader/releases/tag/v2023.05",
NewsDownloader.getFeedLink(feeds.rss.channel.item[1].link))
end)
end)
describe("Atom feed parsing", function()
local atom_xml = [[
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title>KOReader GitHub Commits</title>
<link href="https://github.com/koreader/koreader/commits/master.atom"/>
<updated>2023-05-15T12:00:00Z</updated>
<entry>
<title>Fix PDF rendering issue</title>
<link href="https://github.com/koreader/koreader/commit/abc123"/>
<id>https://github.com/koreader/koreader/commit/abc123</id>
<updated>2023-05-15T12:00:00Z</updated>
<content type="html">
&lt;pre&gt;This commit fixes the PDF rendering issue on eInk screens&lt;/pre&gt;
</content>
</entry>
<entry>
<title type="html">Improve EPUB &amp; FB2 support</title>
<link href="https://github.com/koreader/koreader/commit/def456"/>
<id>https://github.com/koreader/koreader/commit/def456</id>
<updated>2023-05-14T10:30:00Z</updated>
<content type="html">
&lt;pre&gt;Add better support for EPUB and FB2 formats&lt;/pre&gt;
</content>
</entry>
</feed>
]]
it("should parse Atom feed titles correctly", function()
local feeds = NewsDownloader:deserializeXMLString(atom_xml)
assert.truthy(feeds)
assert.truthy(feeds.feed)
assert.truthy(feeds.feed.title)
assert.equals("KOReader GitHub Commits", NewsDownloader.getFeedTitle(feeds.feed.title))
-- Test entry titles
assert.truthy(feeds.feed.entry)
assert.equals("Fix PDF rendering issue", NewsDownloader.getFeedTitle(feeds.feed.entry[1].title))
-- Test HTML entities in titles
assert.equals("Improve EPUB & FB2 support",
NewsDownloader.getFeedTitle(feeds.feed.entry[2].title))
end)
it("should parse Atom feed content correctly", function()
local feeds = NewsDownloader:deserializeXMLString(atom_xml)
assert.truthy(feeds)
-- Test entry content
local expected_content1 = "<pre>This commit fixes the PDF rendering issue on eInk screens</pre>"
assert.equals(expected_content1,
require("util").htmlEntitiesToUtf8(feeds.feed.entry[1].content[1]))
local expected_content2 = "<pre>Add better support for EPUB and FB2 formats</pre>"
assert.equals(expected_content2,
require("util").htmlEntitiesToUtf8(feeds.feed.entry[2].content[1]))
end)
it("should parse Atom feed links correctly", function()
local feeds = NewsDownloader:deserializeXMLString(atom_xml)
assert.truthy(feeds)
-- Test feed link (with attributes)
assert.equals("https://github.com/koreader/koreader/commits/master.atom",
NewsDownloader.getFeedLink(feeds.feed.link))
-- Test entry links
assert.equals("https://github.com/koreader/koreader/commit/abc123",
NewsDownloader.getFeedLink(feeds.feed.entry[1].link))
assert.equals("https://github.com/koreader/koreader/commit/def456",
NewsDownloader.getFeedLink(feeds.feed.entry[2].link))
end)
end)
describe("Special case handling", function()
it("should handle single-item RSS feeds properly", function()
local single_item_rss = [[
<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0">
<channel>
<title>Single Item Feed</title>
<item>
<title>The Only Item</title>
<link>https://example.com/only</link>
<description>This is the only item in the feed</description>
</item>
</channel>
</rss>
]]
local feeds = NewsDownloader:deserializeXMLString(single_item_rss)
assert.truthy(feeds)
assert.equals("Single Item Feed", feeds.rss.channel.title)
-- The plugin should normalize single items
local processed = false
-- Mock necessary functions to avoid creating files and whatnot
local old_createFromDescription = NewsDownloader.createFromDescription
NewsDownloader.createFromDescription = function(self, feed, title, desc, dir, img, msg)
assert.equals("The Only Item", title)
assert.equals("This is the only item in the feed", desc)
processed = true
end
NewsDownloader:processFeed("rss", feeds, nil, 1, false, false, "Testing", true, nil)
assert.is_true(processed)
NewsDownloader.createFromDescription = old_createFromDescription
end)
it("should handle single-item Atom feeds properly", function()
local single_item_atom = [[
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title>Single Item Atom Feed</title>
<link href="https://example.com/atom-feed"/>
<updated>2023-06-15T09:00:00Z</updated>
<author>
<name>KOReader Team</name>
</author>
<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
<entry>
<title>The Only Atom Entry</title>
<link href="https://example.com/only-entry"/>
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
<updated>2023-06-15T09:00:00Z</updated>
<summary>This is the only entry in this Atom feed</summary>
<content type="html">
&lt;p&gt;This is the complete content of the only entry in this Atom feed&lt;/p&gt;
</content>
</entry>
</feed>
]]
local feeds = NewsDownloader:deserializeXMLString(single_item_atom)
assert.truthy(feeds)
assert.equals("Single Item Atom Feed", feeds.feed.title)
-- The plugin should normalize single items
local processed = false
-- Mock necessary functions to avoid creating files and whatnot
local old_createFromDescription = NewsDownloader.createFromDescription
NewsDownloader.createFromDescription = function(self, feed, title, desc, dir, img, msg)
assert.equals("The Only Atom Entry", title)
assert.equals("<p>This is the complete content of the only entry in this Atom feed</p>", desc)
processed = true
end
NewsDownloader:processFeed("atom", feeds, nil, 1, false, false, "Testing", true, nil)
assert.is_true(processed)
NewsDownloader.createFromDescription = old_createFromDescription
end)
end)
end)