mirror of
https://github.com/koreader/koreader.git
synced 2025-08-10 00:52:38 +00:00
NewsDownloader: add a cache to reduce wasted bandwidth (#13171)
Closes #13061.
This commit is contained in:
@@ -1,3 +1,5 @@
|
||||
local CacheSQLite = require("cachesqlite")
|
||||
local DataStorage = require("datastorage")
|
||||
local Version = require("version")
|
||||
local ffiutil = require("ffi/util")
|
||||
local http = require("socket.http")
|
||||
@@ -22,6 +24,12 @@ local EpubDownloadBackend = {
|
||||
}
|
||||
local max_redirects = 5; --prevent infinite redirects
|
||||
|
||||
local FeedCache = CacheSQLite:new{
|
||||
slots = 500,
|
||||
db_path = DataStorage:getDataDir() .. "/cache/newsdownloader.sqlite",
|
||||
size = 1024 * 1024 * 10, -- 10MB
|
||||
}
|
||||
|
||||
-- filter HTML using CSS selector
|
||||
local function filter(text, element)
|
||||
local htmlparser = require("htmlparser")
|
||||
@@ -150,8 +158,8 @@ local function build_cookies(cookies)
|
||||
end
|
||||
|
||||
-- Get URL content
|
||||
local function getUrlContent(url, cookies, timeout, maxtime, redirectCount)
|
||||
logger.dbg("getUrlContent(", url, ",", cookies, ", ", timeout, ",", maxtime, ",", redirectCount, ")")
|
||||
local function getUrlContent(url, cookies, timeout, maxtime, redirectCount, add_to_cache)
|
||||
logger.dbg("getUrlContent(", url, ",", cookies, ", ", timeout, ",", maxtime, ",", redirectCount, ",", add_to_cache, ")")
|
||||
if not redirectCount then
|
||||
redirectCount = 0
|
||||
elseif redirectCount == max_redirects then
|
||||
@@ -209,7 +217,7 @@ local function getUrlContent(url, cookies, timeout, maxtime, redirectCount)
|
||||
redirected_url = socket_url.build(parsed_redirect_location)
|
||||
end
|
||||
logger.dbg("getUrlContent: Redirecting to url: ", redirected_url)
|
||||
return getUrlContent(redirected_url, timeout, maxtime, redirectCount + 1)
|
||||
return getUrlContent(redirected_url, timeout, maxtime, redirectCount + 1, add_to_cache)
|
||||
else
|
||||
error("EpubDownloadBackend: Don't know how to handle HTTP response status:", status or code)
|
||||
end
|
||||
@@ -223,10 +231,23 @@ local function getUrlContent(url, cookies, timeout, maxtime, redirectCount)
|
||||
return false, "Incomplete content received"
|
||||
end
|
||||
end
|
||||
|
||||
if add_to_cache then
|
||||
logger.dbg("Adding to cache", url)
|
||||
FeedCache:insert(url, {
|
||||
headers = headers,
|
||||
content = content,
|
||||
})
|
||||
end
|
||||
|
||||
logger.dbg("Returning content ok")
|
||||
return true, content
|
||||
end
|
||||
|
||||
function EpubDownloadBackend:getCache()
|
||||
return FeedCache
|
||||
end
|
||||
|
||||
function EpubDownloadBackend:getConnectionCookies(url, credentials)
|
||||
|
||||
local body = ""
|
||||
@@ -258,9 +279,9 @@ function EpubDownloadBackend:getConnectionCookies(url, credentials)
|
||||
return cookies
|
||||
end
|
||||
|
||||
function EpubDownloadBackend:getResponseAsString(url, cookies)
|
||||
function EpubDownloadBackend:getResponseAsString(url, cookies, add_to_cache)
|
||||
logger.dbg("EpubDownloadBackend:getResponseAsString(", url, ")")
|
||||
local success, content = getUrlContent(url, cookies)
|
||||
local success, content = getUrlContent(url, cookies, nil, nil, nil, add_to_cache)
|
||||
if (success) then
|
||||
return content
|
||||
else
|
||||
|
||||
@@ -16,8 +16,11 @@ local NetworkMgr = require("ui/network/manager")
|
||||
local Persist = require("persist")
|
||||
local WidgetContainer = require("ui/widget/container/widgetcontainer")
|
||||
local dateparser = require("lib.dateparser")
|
||||
local http = require("socket.http")
|
||||
local lfs = require("libs/libkoreader-lfs")
|
||||
local ltn12 = require("ltn12")
|
||||
local logger = require("logger")
|
||||
local socket = require("socket")
|
||||
local util = require("util")
|
||||
local _ = require("gettext")
|
||||
local T = FFIUtil.template
|
||||
@@ -341,16 +344,51 @@ function NewsDownloader:loadConfigAndProcessFeedsWithUI(touchmenu_instance)
|
||||
end
|
||||
|
||||
function NewsDownloader:processFeedSource(url, credentials, limit, unsupported_feeds_urls, download_full_article, include_images, message, enable_filter, filter_element)
|
||||
-- Check if we have a cached response first
|
||||
local cache = DownloadBackend:getCache()
|
||||
local cached_response = cache:check(url)
|
||||
local ok, response
|
||||
|
||||
local cookies = nil
|
||||
if credentials ~= nil then
|
||||
logger.dbg("Auth Cookies from ", cookies)
|
||||
logger.dbg("Auth Cookies from ", credentials.url)
|
||||
cookies = DownloadBackend:getConnectionCookies(credentials.url, credentials.auth)
|
||||
end
|
||||
|
||||
local ok, response = pcall(function()
|
||||
return DownloadBackend:getResponseAsString(url, cookies)
|
||||
end)
|
||||
if cached_response then
|
||||
logger.dbg("NewsDownloader: Using cached response for ", url)
|
||||
local headers_cached = cached_response.headers
|
||||
logger.dbg("NewsDownloader: Cached response headers", headers_cached)
|
||||
|
||||
local last_modified = headers_cached["last-modified"]
|
||||
if last_modified then
|
||||
logger.dbg("NewsDownloader: sending If-Modified-Since", last_modified, url)
|
||||
local response_body = {}
|
||||
local headers = {
|
||||
["If-Modified-Since"] = last_modified
|
||||
}
|
||||
if cookies then
|
||||
headers["Cookie"] = cookies
|
||||
end
|
||||
local code, response_headers = socket.skip(1, http.request{
|
||||
url = url,
|
||||
headers = headers,
|
||||
sink = ltn12.sink.table(response_body)
|
||||
})
|
||||
ok = (code == 304)
|
||||
logger.dbg("NewsDownloader: If-Modified-Since response", code, response_headers)
|
||||
if ok then
|
||||
response = cached_response.content
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if not response then
|
||||
ok, response = pcall(function()
|
||||
return DownloadBackend:getResponseAsString(url, cookies, true)
|
||||
end)
|
||||
end
|
||||
|
||||
local feeds
|
||||
-- Check to see if a response is available to deserialize.
|
||||
if ok then
|
||||
|
||||
Reference in New Issue
Block a user