[plugin] NewsDownloader: make <title> match less greedy (#13070)

On most pages it works fine, but on pages with multiple `</title>` it can match very large amounts of text. Valid examples include `<svg>` in the HTML, where [`<title>`](https://developer.mozilla.org/en-US/docs/Web/SVG/Element/title) is used similar to `title=""` in HTML, but of course there could also simply be invalid pages.
2025-08-10 00:52:38 +00:00 · 2025-01-15 10:07:27 +01:00
parent 87d1678b02
commit 8314838add
1 changed files with 1 additions and 1 deletions
--- a/plugins/newsdownloader.koplugin/epubdownloadbackend.lua
+++ b/plugins/newsdownloader.koplugin/epubdownloadbackend.lua
@@ -321,7 +321,7 @@ function EpubDownloadBackend:createEpub(epub_path, html, url, include_images, me
    local base_url = socket_url.parse(url)

    local cancelled = false
-    local page_htmltitle = html:match([[<title>(.*)</title>]])
+    local page_htmltitle = html:match([[<title[^>]*>(.-)</title>]])
    logger.dbg("page_htmltitle is ", page_htmltitle)
 --    local sections = html.sections -- Wikipedia provided TOC
    local bookid = "bookid_placeholder" --string.format("wikipedia_%s_%s_%s", lang, phtml.pageid, phtml.revid)