NewsDownloader: add a cache to reduce wasted bandwidth (#13171)

Closes #13061.
This commit is contained in:
Frans de Jonge
2025-01-31 17:59:08 +01:00
committed by GitHub
parent 03500bc17e
commit d6941bc339
2 changed files with 68 additions and 9 deletions

View File

@@ -1,3 +1,5 @@
local CacheSQLite = require("cachesqlite")
local DataStorage = require("datastorage")
local Version = require("version")
local ffiutil = require("ffi/util")
local http = require("socket.http")
@@ -22,6 +24,12 @@ local EpubDownloadBackend = {
}
local max_redirects = 5; --prevent infinite redirects
local FeedCache = CacheSQLite:new{
slots = 500,
db_path = DataStorage:getDataDir() .. "/cache/newsdownloader.sqlite",
size = 1024 * 1024 * 10, -- 10MB
}
-- filter HTML using CSS selector
local function filter(text, element)
local htmlparser = require("htmlparser")
@@ -150,8 +158,8 @@ local function build_cookies(cookies)
end
-- Get URL content
local function getUrlContent(url, cookies, timeout, maxtime, redirectCount)
logger.dbg("getUrlContent(", url, ",", cookies, ", ", timeout, ",", maxtime, ",", redirectCount, ")")
local function getUrlContent(url, cookies, timeout, maxtime, redirectCount, add_to_cache)
logger.dbg("getUrlContent(", url, ",", cookies, ", ", timeout, ",", maxtime, ",", redirectCount, ",", add_to_cache, ")")
if not redirectCount then
redirectCount = 0
elseif redirectCount == max_redirects then
@@ -209,7 +217,7 @@ local function getUrlContent(url, cookies, timeout, maxtime, redirectCount)
redirected_url = socket_url.build(parsed_redirect_location)
end
logger.dbg("getUrlContent: Redirecting to url: ", redirected_url)
return getUrlContent(redirected_url, timeout, maxtime, redirectCount + 1)
return getUrlContent(redirected_url, timeout, maxtime, redirectCount + 1, add_to_cache)
else
error("EpubDownloadBackend: Don't know how to handle HTTP response status:", status or code)
end
@@ -223,10 +231,23 @@ local function getUrlContent(url, cookies, timeout, maxtime, redirectCount)
return false, "Incomplete content received"
end
end
if add_to_cache then
logger.dbg("Adding to cache", url)
FeedCache:insert(url, {
headers = headers,
content = content,
})
end
logger.dbg("Returning content ok")
return true, content
end
function EpubDownloadBackend:getCache()
return FeedCache
end
function EpubDownloadBackend:getConnectionCookies(url, credentials)
local body = ""
@@ -258,9 +279,9 @@ function EpubDownloadBackend:getConnectionCookies(url, credentials)
return cookies
end
function EpubDownloadBackend:getResponseAsString(url, cookies)
function EpubDownloadBackend:getResponseAsString(url, cookies, add_to_cache)
logger.dbg("EpubDownloadBackend:getResponseAsString(", url, ")")
local success, content = getUrlContent(url, cookies)
local success, content = getUrlContent(url, cookies, nil, nil, nil, add_to_cache)
if (success) then
return content
else

View File

@@ -16,8 +16,11 @@ local NetworkMgr = require("ui/network/manager")
local Persist = require("persist")
local WidgetContainer = require("ui/widget/container/widgetcontainer")
local dateparser = require("lib.dateparser")
local http = require("socket.http")
local lfs = require("libs/libkoreader-lfs")
local ltn12 = require("ltn12")
local logger = require("logger")
local socket = require("socket")
local util = require("util")
local _ = require("gettext")
local T = FFIUtil.template
@@ -341,16 +344,51 @@ function NewsDownloader:loadConfigAndProcessFeedsWithUI(touchmenu_instance)
end
function NewsDownloader:processFeedSource(url, credentials, limit, unsupported_feeds_urls, download_full_article, include_images, message, enable_filter, filter_element)
-- Check if we have a cached response first
local cache = DownloadBackend:getCache()
local cached_response = cache:check(url)
local ok, response
local cookies = nil
if credentials ~= nil then
logger.dbg("Auth Cookies from ", cookies)
logger.dbg("Auth Cookies from ", credentials.url)
cookies = DownloadBackend:getConnectionCookies(credentials.url, credentials.auth)
end
local ok, response = pcall(function()
return DownloadBackend:getResponseAsString(url, cookies)
end)
if cached_response then
logger.dbg("NewsDownloader: Using cached response for ", url)
local headers_cached = cached_response.headers
logger.dbg("NewsDownloader: Cached response headers", headers_cached)
local last_modified = headers_cached["last-modified"]
if last_modified then
logger.dbg("NewsDownloader: sending If-Modified-Since", last_modified, url)
local response_body = {}
local headers = {
["If-Modified-Since"] = last_modified
}
if cookies then
headers["Cookie"] = cookies
end
local code, response_headers = socket.skip(1, http.request{
url = url,
headers = headers,
sink = ltn12.sink.table(response_body)
})
ok = (code == 304)
logger.dbg("NewsDownloader: If-Modified-Since response", code, response_headers)
if ok then
response = cached_response.content
end
end
end
if not response then
ok, response = pcall(function()
return DownloadBackend:getResponseAsString(url, cookies, true)
end)
end
local feeds
-- Check to see if a response is available to deserialize.
if ok then