From 8314838addabc09055704ec3ec51701742fd9e07 Mon Sep 17 00:00:00 2001 From: Frans de Jonge Date: Wed, 15 Jan 2025 10:07:27 +0100 Subject: [PATCH] [plugin] NewsDownloader: make match less greedy (#13070) On most pages it works fine, but on pages with multiple `` it can match very large amounts of text. Valid examples include `` in the HTML, where [``](https://developer.mozilla.org/en-US/docs/Web/SVG/Element/title) is used similar to `title=""` in HTML, but of course there could also simply be invalid pages. --- plugins/newsdownloader.koplugin/epubdownloadbackend.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/newsdownloader.koplugin/epubdownloadbackend.lua b/plugins/newsdownloader.koplugin/epubdownloadbackend.lua index bdc39a0ec..311ca7b97 100644 --- a/plugins/newsdownloader.koplugin/epubdownloadbackend.lua +++ b/plugins/newsdownloader.koplugin/epubdownloadbackend.lua @@ -321,7 +321,7 @@ function EpubDownloadBackend:createEpub(epub_path, html, url, include_images, me local base_url = socket_url.parse(url) local cancelled = false - local page_htmltitle = html:match([[(.*)]]) + local page_htmltitle = html:match([[]*>(.-)]]) logger.dbg("page_htmltitle is ", page_htmltitle) -- local sections = html.sections -- Wikipedia provided TOC local bookid = "bookid_placeholder" --string.format("wikipedia_%s_%s_%s", lang, phtml.pageid, phtml.revid)