From 1e55dda4c765be3316d7ca87be3052943cb1be86 Mon Sep 17 00:00:00 2001 From: bateast Date: Sun, 3 Nov 2024 16:31:33 +0000 Subject: [PATCH] Add authentication request and cookies management for news (#12496) Add 'credentials' element in news configuration, used to request authentication cookies for further articles download --- .../epubdownloadbackend.lua | 126 ++++++++++++++++-- .../newsdownloader.koplugin/feed_config.lua | 5 + .../luahttpdownloadbackend.lua | 20 --- plugins/newsdownloader.koplugin/main.lua | 22 ++- 4 files changed, 139 insertions(+), 34 deletions(-) delete mode 100644 plugins/newsdownloader.koplugin/luahttpdownloadbackend.lua diff --git a/plugins/newsdownloader.koplugin/epubdownloadbackend.lua b/plugins/newsdownloader.koplugin/epubdownloadbackend.lua index b8f36397b..950ab4c7a 100644 --- a/plugins/newsdownloader.koplugin/epubdownloadbackend.lua +++ b/plugins/newsdownloader.koplugin/epubdownloadbackend.lua @@ -69,9 +69,82 @@ local function filter(text, element) return "" .. filtered .. "" end +-- From https://github.com/lunarmodules/luasocket/blob/1fad1626900a128be724cba9e9c19a6b2fe2bf6b/samples/cookie.lua +local token_class = '[^%c%s%(%)%<%>%@%,%;%:%\\%"%/%[%]%?%=%{%}]' + +local function unquote(t, quoted) + local n = string.match(t, "%$(%d+)$") + if n then n = tonumber(n) end + if quoted[n] then return quoted[n] + else return t end +end + +local function parse_set_cookie(c, quoted, cookie_table) + c = c .. ";$last=last;" + local _, _, n, v, i = string.find(c, "(" .. token_class .. + "+)%s*=%s*(.-)%s*;%s*()") + local cookie = { + name = n, + value = unquote(v, quoted), + attributes = {} + } + while 1 do + _, _, n, v, i = string.find(c, "(" .. token_class .. + "+)%s*=?%s*(.-)%s*;%s*()", i) + if not n or n == "$last" then break end + cookie.attributes[#cookie.attributes+1] = { + name = n, + value = unquote(v, quoted) + } + end + cookie_table[#cookie_table+1] = cookie +end +local function split_set_cookie(s, cookie_table) + cookie_table = cookie_table or {} + -- remove quoted strings from cookie list + local quoted = {} + s = string.gsub(s, '"(.-)"', function(q) + quoted[#quoted+1] = q + return "$" .. #quoted + end) + -- add sentinel + s = s .. ",$last=" + -- split into individual cookies + local i = 1 + while 1 do + local _, _, cookie, next_token + _, _, cookie, i, next_token = string.find(s, "(.-)%s*%,%s*()(" .. + token_class .. "+)%s*=", i) + if not next_token then break end + parse_set_cookie(cookie, quoted, cookie_table) + if next_token == "$last" then break end + end + return cookie_table +end + +local function quote(s) + if string.find(s, "[ %,%;]") then return '"' .. s .. '"' + else return s end +end + +local _empty = {} +local function build_cookies(cookies) + local s = "" + for i,v in ipairs(cookies or _empty) do + if v.name then + s = s .. v.name + if v.value and v.value ~= "" then + s = s .. '=' .. quote(v.value) + end + end + if i < #cookies then s = s .. "; " end + end + return s +end + -- Get URL content -local function getUrlContent(url, timeout, maxtime, redirectCount) - logger.dbg("getUrlContent(", url, ",", timeout, ",", maxtime, ",", redirectCount, ")") +local function getUrlContent(url, cookies, timeout, maxtime, redirectCount) + logger.dbg("getUrlContent(", url, ",", cookies, ", ", timeout, ",", maxtime, ",", redirectCount, ")") if not redirectCount then redirectCount = 0 elseif redirectCount == max_redirects then @@ -88,12 +161,16 @@ local function getUrlContent(url, timeout, maxtime, redirectCount) url = url, method = "GET", sink = maxtime and socketutil.table_sink(sink) or ltn12.sink.table(sink), + headers = { + ["cookie"] = build_cookies(cookies) + } } logger.dbg("request:", request) local code, headers, status = socket.skip(1, http.request(request)) + socketutil:reset_timeout() - logger.dbg("After http.request") local content = table.concat(sink) -- empty or content accumulated till now + logger.dbg("After http.request") logger.dbg("type(code):", type(code)) logger.dbg("code:", code) logger.dbg("headers:", headers) @@ -139,9 +216,40 @@ local function getUrlContent(url, timeout, maxtime, redirectCount) return true, content end -function EpubDownloadBackend:getResponseAsString(url) +function EpubDownloadBackend:getConnectionCookies(url, credentials) + + local body = "" + for k, v in pairs(credentials) do + body = body .. (tostring(k) .. "=" .. tostring(v) .. "&") + end + local request = { + method = "POST", + url = url, + headers = { + ["content-type"] = "application/x-www-form-urlencoded", + ["content-length"] = tostring(#body) + }, + source = ltn12.source.string(body), + sink = nil + } + logger.dbg("request:", request, ", body: ", body) + local code, headers, status = socket.skip(1, http.request(request)) + + logger.dbg("code:", code) + logger.dbg("headers:", headers) + logger.dbg("status:", status) + + local cookies = {} + local to_parse = headers["set-cookie"] + split_set_cookie(to_parse, cookies) + logger.dbg("Cookies: ", cookies) + + return cookies +end + +function EpubDownloadBackend:getResponseAsString(url, cookies) logger.dbg("EpubDownloadBackend:getResponseAsString(", url, ")") - local success, content = getUrlContent(url) + local success, content = getUrlContent(url, cookies) if (success) then return content else @@ -157,21 +265,21 @@ function EpubDownloadBackend:resetTrapWidget() self.trap_widget = nil end -function EpubDownloadBackend:loadPage(url) +function EpubDownloadBackend:loadPage(url, cookies) local completed, success, content if self.trap_widget then -- if previously set with EpubDownloadBackend:setTrapWidget() local Trapper = require("ui/trapper") local timeout, maxtime = 30, 60 -- We use dismissableRunInSubprocess with complex return values: completed, success, content = Trapper:dismissableRunInSubprocess(function() - return getUrlContent(url, timeout, maxtime) + return getUrlContent(url, cookies, timeout, maxtime) end, self.trap_widget) if not completed then error(self.dismissed_error_code) -- "Interrupted by user" end else local timeout, maxtime = 10, 60 - success, content = getUrlContent(url, timeout, maxtime) + success, content = getUrlContent(url, cookies, timeout, maxtime) end logger.dbg("success:", success, "type(content):", type(content), "content:", content:sub(1, 500), "...") if not success then @@ -472,7 +580,7 @@ function EpubDownloadBackend:createEpub(epub_path, html, url, include_images, me src = img.src2x end logger.dbg("Getting img ", src) - local success, content = getUrlContent(src) + local success, content = getUrlContent(src, nil) -- success, content = getUrlContent(src..".unexistant") -- to simulate failure if success then logger.dbg("success, size:", #content) diff --git a/plugins/newsdownloader.koplugin/feed_config.lua b/plugins/newsdownloader.koplugin/feed_config.lua index 051b9ba32..50105f913 100644 --- a/plugins/newsdownloader.koplugin/feed_config.lua +++ b/plugins/newsdownloader.koplugin/feed_config.lua @@ -28,6 +28,11 @@ return {--do NOT change this line -- 'filter_element="name_of_css.element.class" - means to filter the chosen CSS selector, it can be easily picked using a modern web browser -- The default value is empty. The default list of common selectors is used as fallback if this value is set. +-- Optional 'credentials' element is used to authenticate on subscription based articles. +-- It is itself comprised of a 'url' strings, that is the url of the connexion form, +-- and an 'auth' table that contains form data used for user authentication {form_key = value, …}. +-- Exampple: credentials={url="https://secure.lemonde.fr/sfuser/connexion", auth={email="titi@gmouil.com", password="xxxx"}} + -- comment out line ("--" at line start) to stop downloading source diff --git a/plugins/newsdownloader.koplugin/luahttpdownloadbackend.lua b/plugins/newsdownloader.koplugin/luahttpdownloadbackend.lua deleted file mode 100644 index 801ecd493..000000000 --- a/plugins/newsdownloader.koplugin/luahttpdownloadbackend.lua +++ /dev/null @@ -1,20 +0,0 @@ -local logger = require("logger") -local http_request = require "http.request" - --- Currently unused. TODO @mwoz123 ADD LUA-HTTP AS LIBRARY -local LuaHttpDownloadBackend = {} - -function LuaHttpDownloadBackend:getResponseAsString(url) - local _, stream = assert(http_request.new_from_uri(url):go()) - local body = assert(stream:get_body_as_string()) - logger.dbg("Response body:", body) - return body -end - -function LuaHttpDownloadBackend:download(link, path) - local _, stream = assert(http_request.new_from_uri(link):go()) - stream:save_body_to_file(path) - stream:shutdown() -end - -return LuaHttpDownloadBackend diff --git a/plugins/newsdownloader.koplugin/main.lua b/plugins/newsdownloader.koplugin/main.lua index 2789c94e7..05289e34d 100644 --- a/plugins/newsdownloader.koplugin/main.lua +++ b/plugins/newsdownloader.koplugin/main.lua @@ -253,6 +253,7 @@ function NewsDownloader:loadConfigAndProcessFeeds(touchmenu_instance) local include_images = not never_download_images and feed.include_images local enable_filter = feed.enable_filter or feed.enable_filter == nil local filter_element = feed.filter_element or feed.filter_element == nil + local credentials = feed.credentials -- Check if the two required attributes are set. if url and limit then feed_message = T(_("Processing %1/%2:\n%3"), idx, total_feed_entries, BD.url(url)) @@ -260,6 +261,7 @@ function NewsDownloader:loadConfigAndProcessFeeds(touchmenu_instance) -- Process the feed source. self:processFeedSource( url, + credentials, tonumber(limit), unsupported_feeds_urls, download_full_article, @@ -338,9 +340,16 @@ function NewsDownloader:loadConfigAndProcessFeedsWithUI(touchmenu_instance) end) end -function NewsDownloader:processFeedSource(url, limit, unsupported_feeds_urls, download_full_article, include_images, message, enable_filter, filter_element) +function NewsDownloader:processFeedSource(url, credentials, limit, unsupported_feeds_urls, download_full_article, include_images, message, enable_filter, filter_element) + + local cookies = nil + if credentials ~= nil then + logger.dbg("Auth Cookies from ", cookies) + cookies = DownloadBackend:getConnectionCookies(credentials.url, credentials.auth) + end + local ok, response = pcall(function() - return DownloadBackend:getResponseAsString(url) + return DownloadBackend:getResponseAsString(url, cookies) end) local feeds -- Check to see if a response is available to deserialize. @@ -385,6 +394,7 @@ function NewsDownloader:processFeedSource(url, limit, unsupported_feeds_urls, do return self:processFeed( FEED_TYPE_ATOM, feeds, + cookies, limit, download_full_article, include_images, @@ -398,6 +408,7 @@ function NewsDownloader:processFeedSource(url, limit, unsupported_feeds_urls, do return self:processFeed( FEED_TYPE_RSS, feeds, + cookies, limit, download_full_article, include_images, @@ -450,7 +461,7 @@ function NewsDownloader:deserializeXMLString(xml_str) return xmlhandler.root end -function NewsDownloader:processFeed(feed_type, feeds, limit, download_full_article, include_images, message, enable_filter, filter_element) +function NewsDownloader:processFeed(feed_type, feeds, cookies, limit, download_full_article, include_images, message, enable_filter, filter_element) local feed_title local feed_item local total_items @@ -504,6 +515,7 @@ function NewsDownloader:processFeed(feed_type, feeds, limit, download_full_artic if download_full_article then self:downloadFeed( feed, + cookies, feed_output_dir, include_images, article_message, @@ -543,7 +555,7 @@ local function getTitleWithDate(feed) return title end -function NewsDownloader:downloadFeed(feed, feed_output_dir, include_images, message, enable_filter, filter_element) +function NewsDownloader:downloadFeed(feed, cookies, feed_output_dir, include_images, message, enable_filter, filter_element) local title_with_date = getTitleWithDate(feed) local news_file_path = ("%s%s%s"):format(feed_output_dir, title_with_date, @@ -556,7 +568,7 @@ function NewsDownloader:downloadFeed(feed, feed_output_dir, include_images, mess logger.dbg("NewsDownloader: News file will be stored to :", news_file_path) local article_message = T(_("%1\n%2"), message, title_with_date) local link = getFeedLink(feed.link) - local html = DownloadBackend:loadPage(link) + local html = DownloadBackend:loadPage(link, cookies) DownloadBackend:createEpub(news_file_path, html, link, include_images, article_message, enable_filter, filter_element) end end