Add authentication request and cookies management for news (#12496)

Add 'credentials' element in news configuration, used to request authentication cookies for further articles download
This commit is contained in:
bateast
2024-11-03 16:31:33 +00:00
committed by GitHub
parent c2d1099976
commit 1e55dda4c7
4 changed files with 139 additions and 34 deletions

View File

@@ -69,9 +69,82 @@ local function filter(text, element)
return "<!DOCTYPE html><html><head></head><body>" .. filtered .. "</body></html>"
end
-- From https://github.com/lunarmodules/luasocket/blob/1fad1626900a128be724cba9e9c19a6b2fe2bf6b/samples/cookie.lua
local token_class = '[^%c%s%(%)%<%>%@%,%;%:%\\%"%/%[%]%?%=%{%}]'
local function unquote(t, quoted)
local n = string.match(t, "%$(%d+)$")
if n then n = tonumber(n) end
if quoted[n] then return quoted[n]
else return t end
end
local function parse_set_cookie(c, quoted, cookie_table)
c = c .. ";$last=last;"
local _, _, n, v, i = string.find(c, "(" .. token_class ..
"+)%s*=%s*(.-)%s*;%s*()")
local cookie = {
name = n,
value = unquote(v, quoted),
attributes = {}
}
while 1 do
_, _, n, v, i = string.find(c, "(" .. token_class ..
"+)%s*=?%s*(.-)%s*;%s*()", i)
if not n or n == "$last" then break end
cookie.attributes[#cookie.attributes+1] = {
name = n,
value = unquote(v, quoted)
}
end
cookie_table[#cookie_table+1] = cookie
end
local function split_set_cookie(s, cookie_table)
cookie_table = cookie_table or {}
-- remove quoted strings from cookie list
local quoted = {}
s = string.gsub(s, '"(.-)"', function(q)
quoted[#quoted+1] = q
return "$" .. #quoted
end)
-- add sentinel
s = s .. ",$last="
-- split into individual cookies
local i = 1
while 1 do
local _, _, cookie, next_token
_, _, cookie, i, next_token = string.find(s, "(.-)%s*%,%s*()(" ..
token_class .. "+)%s*=", i)
if not next_token then break end
parse_set_cookie(cookie, quoted, cookie_table)
if next_token == "$last" then break end
end
return cookie_table
end
local function quote(s)
if string.find(s, "[ %,%;]") then return '"' .. s .. '"'
else return s end
end
local _empty = {}
local function build_cookies(cookies)
local s = ""
for i,v in ipairs(cookies or _empty) do
if v.name then
s = s .. v.name
if v.value and v.value ~= "" then
s = s .. '=' .. quote(v.value)
end
end
if i < #cookies then s = s .. "; " end
end
return s
end
-- Get URL content
local function getUrlContent(url, timeout, maxtime, redirectCount)
logger.dbg("getUrlContent(", url, ",", timeout, ",", maxtime, ",", redirectCount, ")")
local function getUrlContent(url, cookies, timeout, maxtime, redirectCount)
logger.dbg("getUrlContent(", url, ",", cookies, ", ", timeout, ",", maxtime, ",", redirectCount, ")")
if not redirectCount then
redirectCount = 0
elseif redirectCount == max_redirects then
@@ -88,12 +161,16 @@ local function getUrlContent(url, timeout, maxtime, redirectCount)
url = url,
method = "GET",
sink = maxtime and socketutil.table_sink(sink) or ltn12.sink.table(sink),
headers = {
["cookie"] = build_cookies(cookies)
}
}
logger.dbg("request:", request)
local code, headers, status = socket.skip(1, http.request(request))
socketutil:reset_timeout()
logger.dbg("After http.request")
local content = table.concat(sink) -- empty or content accumulated till now
logger.dbg("After http.request")
logger.dbg("type(code):", type(code))
logger.dbg("code:", code)
logger.dbg("headers:", headers)
@@ -139,9 +216,40 @@ local function getUrlContent(url, timeout, maxtime, redirectCount)
return true, content
end
function EpubDownloadBackend:getResponseAsString(url)
function EpubDownloadBackend:getConnectionCookies(url, credentials)
local body = ""
for k, v in pairs(credentials) do
body = body .. (tostring(k) .. "=" .. tostring(v) .. "&")
end
local request = {
method = "POST",
url = url,
headers = {
["content-type"] = "application/x-www-form-urlencoded",
["content-length"] = tostring(#body)
},
source = ltn12.source.string(body),
sink = nil
}
logger.dbg("request:", request, ", body: ", body)
local code, headers, status = socket.skip(1, http.request(request))
logger.dbg("code:", code)
logger.dbg("headers:", headers)
logger.dbg("status:", status)
local cookies = {}
local to_parse = headers["set-cookie"]
split_set_cookie(to_parse, cookies)
logger.dbg("Cookies: ", cookies)
return cookies
end
function EpubDownloadBackend:getResponseAsString(url, cookies)
logger.dbg("EpubDownloadBackend:getResponseAsString(", url, ")")
local success, content = getUrlContent(url)
local success, content = getUrlContent(url, cookies)
if (success) then
return content
else
@@ -157,21 +265,21 @@ function EpubDownloadBackend:resetTrapWidget()
self.trap_widget = nil
end
function EpubDownloadBackend:loadPage(url)
function EpubDownloadBackend:loadPage(url, cookies)
local completed, success, content
if self.trap_widget then -- if previously set with EpubDownloadBackend:setTrapWidget()
local Trapper = require("ui/trapper")
local timeout, maxtime = 30, 60
-- We use dismissableRunInSubprocess with complex return values:
completed, success, content = Trapper:dismissableRunInSubprocess(function()
return getUrlContent(url, timeout, maxtime)
return getUrlContent(url, cookies, timeout, maxtime)
end, self.trap_widget)
if not completed then
error(self.dismissed_error_code) -- "Interrupted by user"
end
else
local timeout, maxtime = 10, 60
success, content = getUrlContent(url, timeout, maxtime)
success, content = getUrlContent(url, cookies, timeout, maxtime)
end
logger.dbg("success:", success, "type(content):", type(content), "content:", content:sub(1, 500), "...")
if not success then
@@ -472,7 +580,7 @@ function EpubDownloadBackend:createEpub(epub_path, html, url, include_images, me
src = img.src2x
end
logger.dbg("Getting img ", src)
local success, content = getUrlContent(src)
local success, content = getUrlContent(src, nil)
-- success, content = getUrlContent(src..".unexistant") -- to simulate failure
if success then
logger.dbg("success, size:", #content)

View File

@@ -28,6 +28,11 @@ return {--do NOT change this line
-- 'filter_element="name_of_css.element.class" - means to filter the chosen CSS selector, it can be easily picked using a modern web browser
-- The default value is empty. The default list of common selectors is used as fallback if this value is set.
-- Optional 'credentials' element is used to authenticate on subscription based articles.
-- It is itself comprised of a 'url' strings, that is the url of the connexion form,
-- and an 'auth' table that contains form data used for user authentication {form_key = value, …}.
-- Exampple: credentials={url="https://secure.lemonde.fr/sfuser/connexion", auth={email="titi@gmouil.com", password="xxxx"}}
-- comment out line ("--" at line start) to stop downloading source

View File

@@ -1,20 +0,0 @@
local logger = require("logger")
local http_request = require "http.request"
-- Currently unused. TODO @mwoz123 ADD LUA-HTTP AS LIBRARY
local LuaHttpDownloadBackend = {}
function LuaHttpDownloadBackend:getResponseAsString(url)
local _, stream = assert(http_request.new_from_uri(url):go())
local body = assert(stream:get_body_as_string())
logger.dbg("Response body:", body)
return body
end
function LuaHttpDownloadBackend:download(link, path)
local _, stream = assert(http_request.new_from_uri(link):go())
stream:save_body_to_file(path)
stream:shutdown()
end
return LuaHttpDownloadBackend

View File

@@ -253,6 +253,7 @@ function NewsDownloader:loadConfigAndProcessFeeds(touchmenu_instance)
local include_images = not never_download_images and feed.include_images
local enable_filter = feed.enable_filter or feed.enable_filter == nil
local filter_element = feed.filter_element or feed.filter_element == nil
local credentials = feed.credentials
-- Check if the two required attributes are set.
if url and limit then
feed_message = T(_("Processing %1/%2:\n%3"), idx, total_feed_entries, BD.url(url))
@@ -260,6 +261,7 @@ function NewsDownloader:loadConfigAndProcessFeeds(touchmenu_instance)
-- Process the feed source.
self:processFeedSource(
url,
credentials,
tonumber(limit),
unsupported_feeds_urls,
download_full_article,
@@ -338,9 +340,16 @@ function NewsDownloader:loadConfigAndProcessFeedsWithUI(touchmenu_instance)
end)
end
function NewsDownloader:processFeedSource(url, limit, unsupported_feeds_urls, download_full_article, include_images, message, enable_filter, filter_element)
function NewsDownloader:processFeedSource(url, credentials, limit, unsupported_feeds_urls, download_full_article, include_images, message, enable_filter, filter_element)
local cookies = nil
if credentials ~= nil then
logger.dbg("Auth Cookies from ", cookies)
cookies = DownloadBackend:getConnectionCookies(credentials.url, credentials.auth)
end
local ok, response = pcall(function()
return DownloadBackend:getResponseAsString(url)
return DownloadBackend:getResponseAsString(url, cookies)
end)
local feeds
-- Check to see if a response is available to deserialize.
@@ -385,6 +394,7 @@ function NewsDownloader:processFeedSource(url, limit, unsupported_feeds_urls, do
return self:processFeed(
FEED_TYPE_ATOM,
feeds,
cookies,
limit,
download_full_article,
include_images,
@@ -398,6 +408,7 @@ function NewsDownloader:processFeedSource(url, limit, unsupported_feeds_urls, do
return self:processFeed(
FEED_TYPE_RSS,
feeds,
cookies,
limit,
download_full_article,
include_images,
@@ -450,7 +461,7 @@ function NewsDownloader:deserializeXMLString(xml_str)
return xmlhandler.root
end
function NewsDownloader:processFeed(feed_type, feeds, limit, download_full_article, include_images, message, enable_filter, filter_element)
function NewsDownloader:processFeed(feed_type, feeds, cookies, limit, download_full_article, include_images, message, enable_filter, filter_element)
local feed_title
local feed_item
local total_items
@@ -504,6 +515,7 @@ function NewsDownloader:processFeed(feed_type, feeds, limit, download_full_artic
if download_full_article then
self:downloadFeed(
feed,
cookies,
feed_output_dir,
include_images,
article_message,
@@ -543,7 +555,7 @@ local function getTitleWithDate(feed)
return title
end
function NewsDownloader:downloadFeed(feed, feed_output_dir, include_images, message, enable_filter, filter_element)
function NewsDownloader:downloadFeed(feed, cookies, feed_output_dir, include_images, message, enable_filter, filter_element)
local title_with_date = getTitleWithDate(feed)
local news_file_path = ("%s%s%s"):format(feed_output_dir,
title_with_date,
@@ -556,7 +568,7 @@ function NewsDownloader:downloadFeed(feed, feed_output_dir, include_images, mess
logger.dbg("NewsDownloader: News file will be stored to :", news_file_path)
local article_message = T(_("%1\n%2"), message, title_with_date)
local link = getFeedLink(feed.link)
local html = DownloadBackend:loadPage(link)
local html = DownloadBackend:loadPage(link, cookies)
DownloadBackend:createEpub(news_file_path, html, link, include_images, article_message, enable_filter, filter_element)
end
end