mirror of
https://github.com/koreader/koreader.git
synced 2025-08-10 00:52:38 +00:00
Add authentication request and cookies management for news (#12496)
Add 'credentials' element in news configuration, used to request authentication cookies for further articles download
This commit is contained in:
@@ -69,9 +69,82 @@ local function filter(text, element)
|
||||
return "<!DOCTYPE html><html><head></head><body>" .. filtered .. "</body></html>"
|
||||
end
|
||||
|
||||
-- From https://github.com/lunarmodules/luasocket/blob/1fad1626900a128be724cba9e9c19a6b2fe2bf6b/samples/cookie.lua
|
||||
local token_class = '[^%c%s%(%)%<%>%@%,%;%:%\\%"%/%[%]%?%=%{%}]'
|
||||
|
||||
local function unquote(t, quoted)
|
||||
local n = string.match(t, "%$(%d+)$")
|
||||
if n then n = tonumber(n) end
|
||||
if quoted[n] then return quoted[n]
|
||||
else return t end
|
||||
end
|
||||
|
||||
local function parse_set_cookie(c, quoted, cookie_table)
|
||||
c = c .. ";$last=last;"
|
||||
local _, _, n, v, i = string.find(c, "(" .. token_class ..
|
||||
"+)%s*=%s*(.-)%s*;%s*()")
|
||||
local cookie = {
|
||||
name = n,
|
||||
value = unquote(v, quoted),
|
||||
attributes = {}
|
||||
}
|
||||
while 1 do
|
||||
_, _, n, v, i = string.find(c, "(" .. token_class ..
|
||||
"+)%s*=?%s*(.-)%s*;%s*()", i)
|
||||
if not n or n == "$last" then break end
|
||||
cookie.attributes[#cookie.attributes+1] = {
|
||||
name = n,
|
||||
value = unquote(v, quoted)
|
||||
}
|
||||
end
|
||||
cookie_table[#cookie_table+1] = cookie
|
||||
end
|
||||
local function split_set_cookie(s, cookie_table)
|
||||
cookie_table = cookie_table or {}
|
||||
-- remove quoted strings from cookie list
|
||||
local quoted = {}
|
||||
s = string.gsub(s, '"(.-)"', function(q)
|
||||
quoted[#quoted+1] = q
|
||||
return "$" .. #quoted
|
||||
end)
|
||||
-- add sentinel
|
||||
s = s .. ",$last="
|
||||
-- split into individual cookies
|
||||
local i = 1
|
||||
while 1 do
|
||||
local _, _, cookie, next_token
|
||||
_, _, cookie, i, next_token = string.find(s, "(.-)%s*%,%s*()(" ..
|
||||
token_class .. "+)%s*=", i)
|
||||
if not next_token then break end
|
||||
parse_set_cookie(cookie, quoted, cookie_table)
|
||||
if next_token == "$last" then break end
|
||||
end
|
||||
return cookie_table
|
||||
end
|
||||
|
||||
local function quote(s)
|
||||
if string.find(s, "[ %,%;]") then return '"' .. s .. '"'
|
||||
else return s end
|
||||
end
|
||||
|
||||
local _empty = {}
|
||||
local function build_cookies(cookies)
|
||||
local s = ""
|
||||
for i,v in ipairs(cookies or _empty) do
|
||||
if v.name then
|
||||
s = s .. v.name
|
||||
if v.value and v.value ~= "" then
|
||||
s = s .. '=' .. quote(v.value)
|
||||
end
|
||||
end
|
||||
if i < #cookies then s = s .. "; " end
|
||||
end
|
||||
return s
|
||||
end
|
||||
|
||||
-- Get URL content
|
||||
local function getUrlContent(url, timeout, maxtime, redirectCount)
|
||||
logger.dbg("getUrlContent(", url, ",", timeout, ",", maxtime, ",", redirectCount, ")")
|
||||
local function getUrlContent(url, cookies, timeout, maxtime, redirectCount)
|
||||
logger.dbg("getUrlContent(", url, ",", cookies, ", ", timeout, ",", maxtime, ",", redirectCount, ")")
|
||||
if not redirectCount then
|
||||
redirectCount = 0
|
||||
elseif redirectCount == max_redirects then
|
||||
@@ -88,12 +161,16 @@ local function getUrlContent(url, timeout, maxtime, redirectCount)
|
||||
url = url,
|
||||
method = "GET",
|
||||
sink = maxtime and socketutil.table_sink(sink) or ltn12.sink.table(sink),
|
||||
headers = {
|
||||
["cookie"] = build_cookies(cookies)
|
||||
}
|
||||
}
|
||||
logger.dbg("request:", request)
|
||||
local code, headers, status = socket.skip(1, http.request(request))
|
||||
|
||||
socketutil:reset_timeout()
|
||||
logger.dbg("After http.request")
|
||||
local content = table.concat(sink) -- empty or content accumulated till now
|
||||
logger.dbg("After http.request")
|
||||
logger.dbg("type(code):", type(code))
|
||||
logger.dbg("code:", code)
|
||||
logger.dbg("headers:", headers)
|
||||
@@ -139,9 +216,40 @@ local function getUrlContent(url, timeout, maxtime, redirectCount)
|
||||
return true, content
|
||||
end
|
||||
|
||||
function EpubDownloadBackend:getResponseAsString(url)
|
||||
function EpubDownloadBackend:getConnectionCookies(url, credentials)
|
||||
|
||||
local body = ""
|
||||
for k, v in pairs(credentials) do
|
||||
body = body .. (tostring(k) .. "=" .. tostring(v) .. "&")
|
||||
end
|
||||
local request = {
|
||||
method = "POST",
|
||||
url = url,
|
||||
headers = {
|
||||
["content-type"] = "application/x-www-form-urlencoded",
|
||||
["content-length"] = tostring(#body)
|
||||
},
|
||||
source = ltn12.source.string(body),
|
||||
sink = nil
|
||||
}
|
||||
logger.dbg("request:", request, ", body: ", body)
|
||||
local code, headers, status = socket.skip(1, http.request(request))
|
||||
|
||||
logger.dbg("code:", code)
|
||||
logger.dbg("headers:", headers)
|
||||
logger.dbg("status:", status)
|
||||
|
||||
local cookies = {}
|
||||
local to_parse = headers["set-cookie"]
|
||||
split_set_cookie(to_parse, cookies)
|
||||
logger.dbg("Cookies: ", cookies)
|
||||
|
||||
return cookies
|
||||
end
|
||||
|
||||
function EpubDownloadBackend:getResponseAsString(url, cookies)
|
||||
logger.dbg("EpubDownloadBackend:getResponseAsString(", url, ")")
|
||||
local success, content = getUrlContent(url)
|
||||
local success, content = getUrlContent(url, cookies)
|
||||
if (success) then
|
||||
return content
|
||||
else
|
||||
@@ -157,21 +265,21 @@ function EpubDownloadBackend:resetTrapWidget()
|
||||
self.trap_widget = nil
|
||||
end
|
||||
|
||||
function EpubDownloadBackend:loadPage(url)
|
||||
function EpubDownloadBackend:loadPage(url, cookies)
|
||||
local completed, success, content
|
||||
if self.trap_widget then -- if previously set with EpubDownloadBackend:setTrapWidget()
|
||||
local Trapper = require("ui/trapper")
|
||||
local timeout, maxtime = 30, 60
|
||||
-- We use dismissableRunInSubprocess with complex return values:
|
||||
completed, success, content = Trapper:dismissableRunInSubprocess(function()
|
||||
return getUrlContent(url, timeout, maxtime)
|
||||
return getUrlContent(url, cookies, timeout, maxtime)
|
||||
end, self.trap_widget)
|
||||
if not completed then
|
||||
error(self.dismissed_error_code) -- "Interrupted by user"
|
||||
end
|
||||
else
|
||||
local timeout, maxtime = 10, 60
|
||||
success, content = getUrlContent(url, timeout, maxtime)
|
||||
success, content = getUrlContent(url, cookies, timeout, maxtime)
|
||||
end
|
||||
logger.dbg("success:", success, "type(content):", type(content), "content:", content:sub(1, 500), "...")
|
||||
if not success then
|
||||
@@ -472,7 +580,7 @@ function EpubDownloadBackend:createEpub(epub_path, html, url, include_images, me
|
||||
src = img.src2x
|
||||
end
|
||||
logger.dbg("Getting img ", src)
|
||||
local success, content = getUrlContent(src)
|
||||
local success, content = getUrlContent(src, nil)
|
||||
-- success, content = getUrlContent(src..".unexistant") -- to simulate failure
|
||||
if success then
|
||||
logger.dbg("success, size:", #content)
|
||||
|
||||
@@ -28,6 +28,11 @@ return {--do NOT change this line
|
||||
-- 'filter_element="name_of_css.element.class" - means to filter the chosen CSS selector, it can be easily picked using a modern web browser
|
||||
-- The default value is empty. The default list of common selectors is used as fallback if this value is set.
|
||||
|
||||
-- Optional 'credentials' element is used to authenticate on subscription based articles.
|
||||
-- It is itself comprised of a 'url' strings, that is the url of the connexion form,
|
||||
-- and an 'auth' table that contains form data used for user authentication {form_key = value, …}.
|
||||
-- Exampple: credentials={url="https://secure.lemonde.fr/sfuser/connexion", auth={email="titi@gmouil.com", password="xxxx"}}
|
||||
|
||||
-- comment out line ("--" at line start) to stop downloading source
|
||||
|
||||
|
||||
|
||||
@@ -1,20 +0,0 @@
|
||||
local logger = require("logger")
|
||||
local http_request = require "http.request"
|
||||
|
||||
-- Currently unused. TODO @mwoz123 ADD LUA-HTTP AS LIBRARY
|
||||
local LuaHttpDownloadBackend = {}
|
||||
|
||||
function LuaHttpDownloadBackend:getResponseAsString(url)
|
||||
local _, stream = assert(http_request.new_from_uri(url):go())
|
||||
local body = assert(stream:get_body_as_string())
|
||||
logger.dbg("Response body:", body)
|
||||
return body
|
||||
end
|
||||
|
||||
function LuaHttpDownloadBackend:download(link, path)
|
||||
local _, stream = assert(http_request.new_from_uri(link):go())
|
||||
stream:save_body_to_file(path)
|
||||
stream:shutdown()
|
||||
end
|
||||
|
||||
return LuaHttpDownloadBackend
|
||||
@@ -253,6 +253,7 @@ function NewsDownloader:loadConfigAndProcessFeeds(touchmenu_instance)
|
||||
local include_images = not never_download_images and feed.include_images
|
||||
local enable_filter = feed.enable_filter or feed.enable_filter == nil
|
||||
local filter_element = feed.filter_element or feed.filter_element == nil
|
||||
local credentials = feed.credentials
|
||||
-- Check if the two required attributes are set.
|
||||
if url and limit then
|
||||
feed_message = T(_("Processing %1/%2:\n%3"), idx, total_feed_entries, BD.url(url))
|
||||
@@ -260,6 +261,7 @@ function NewsDownloader:loadConfigAndProcessFeeds(touchmenu_instance)
|
||||
-- Process the feed source.
|
||||
self:processFeedSource(
|
||||
url,
|
||||
credentials,
|
||||
tonumber(limit),
|
||||
unsupported_feeds_urls,
|
||||
download_full_article,
|
||||
@@ -338,9 +340,16 @@ function NewsDownloader:loadConfigAndProcessFeedsWithUI(touchmenu_instance)
|
||||
end)
|
||||
end
|
||||
|
||||
function NewsDownloader:processFeedSource(url, limit, unsupported_feeds_urls, download_full_article, include_images, message, enable_filter, filter_element)
|
||||
function NewsDownloader:processFeedSource(url, credentials, limit, unsupported_feeds_urls, download_full_article, include_images, message, enable_filter, filter_element)
|
||||
|
||||
local cookies = nil
|
||||
if credentials ~= nil then
|
||||
logger.dbg("Auth Cookies from ", cookies)
|
||||
cookies = DownloadBackend:getConnectionCookies(credentials.url, credentials.auth)
|
||||
end
|
||||
|
||||
local ok, response = pcall(function()
|
||||
return DownloadBackend:getResponseAsString(url)
|
||||
return DownloadBackend:getResponseAsString(url, cookies)
|
||||
end)
|
||||
local feeds
|
||||
-- Check to see if a response is available to deserialize.
|
||||
@@ -385,6 +394,7 @@ function NewsDownloader:processFeedSource(url, limit, unsupported_feeds_urls, do
|
||||
return self:processFeed(
|
||||
FEED_TYPE_ATOM,
|
||||
feeds,
|
||||
cookies,
|
||||
limit,
|
||||
download_full_article,
|
||||
include_images,
|
||||
@@ -398,6 +408,7 @@ function NewsDownloader:processFeedSource(url, limit, unsupported_feeds_urls, do
|
||||
return self:processFeed(
|
||||
FEED_TYPE_RSS,
|
||||
feeds,
|
||||
cookies,
|
||||
limit,
|
||||
download_full_article,
|
||||
include_images,
|
||||
@@ -450,7 +461,7 @@ function NewsDownloader:deserializeXMLString(xml_str)
|
||||
return xmlhandler.root
|
||||
end
|
||||
|
||||
function NewsDownloader:processFeed(feed_type, feeds, limit, download_full_article, include_images, message, enable_filter, filter_element)
|
||||
function NewsDownloader:processFeed(feed_type, feeds, cookies, limit, download_full_article, include_images, message, enable_filter, filter_element)
|
||||
local feed_title
|
||||
local feed_item
|
||||
local total_items
|
||||
@@ -504,6 +515,7 @@ function NewsDownloader:processFeed(feed_type, feeds, limit, download_full_artic
|
||||
if download_full_article then
|
||||
self:downloadFeed(
|
||||
feed,
|
||||
cookies,
|
||||
feed_output_dir,
|
||||
include_images,
|
||||
article_message,
|
||||
@@ -543,7 +555,7 @@ local function getTitleWithDate(feed)
|
||||
return title
|
||||
end
|
||||
|
||||
function NewsDownloader:downloadFeed(feed, feed_output_dir, include_images, message, enable_filter, filter_element)
|
||||
function NewsDownloader:downloadFeed(feed, cookies, feed_output_dir, include_images, message, enable_filter, filter_element)
|
||||
local title_with_date = getTitleWithDate(feed)
|
||||
local news_file_path = ("%s%s%s"):format(feed_output_dir,
|
||||
title_with_date,
|
||||
@@ -556,7 +568,7 @@ function NewsDownloader:downloadFeed(feed, feed_output_dir, include_images, mess
|
||||
logger.dbg("NewsDownloader: News file will be stored to :", news_file_path)
|
||||
local article_message = T(_("%1\n%2"), message, title_with_date)
|
||||
local link = getFeedLink(feed.link)
|
||||
local html = DownloadBackend:loadPage(link)
|
||||
local html = DownloadBackend:loadPage(link, cookies)
|
||||
DownloadBackend:createEpub(news_file_path, html, link, include_images, article_message, enable_filter, filter_element)
|
||||
end
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user