Trigger an HTMLMetadata download not more than once an hour (per URL).

This commit is contained in:
Brent Simmons
2024-10-15 21:25:32 -07:00
parent 776adcb63b
commit 20b66e1a5b
4 changed files with 62 additions and 38 deletions

View File

@@ -16,6 +16,10 @@ public extension Date {
return addingTimeInterval(0.0 - TimeInterval(days: days))
}
func bySubtracting(hours: Int) -> Date {
return addingTimeInterval(0.0 - TimeInterval(hours: hours))
}
func byAdding(days: Int) -> Date {
return addingTimeInterval(TimeInterval(days: days))
}
@@ -26,4 +30,8 @@ public extension TimeInterval {
init(days: Int) {
self.init(days * 24 * 60 * 60)
}
init(hours: Int) {
self.init(hours * 60 * 60)
}
}

View File

@@ -30,7 +30,7 @@ import UniformTypeIdentifiers
print("findFaviconURLs \(homePageURL)")
// If the favicon has an explicit type, check that for an ignored type; otherwise, check the file extension.
let htmlMetadata = HTMLMetadataDownloader.cachedMetadata(for: homePageURL)
let htmlMetadata = HTMLMetadataDownloader.shared.cachedMetadata(for: homePageURL)
let faviconURLs = htmlMetadata?.favicons?.compactMap { favicon -> String? in
shouldAllowFavicon(favicon) ? favicon.urlString : nil

View File

@@ -156,7 +156,7 @@ private extension FeedIconDownloader {
return nil
}
guard let metadata = HTMLMetadataDownloader.cachedMetadata(for: homePageURL) else {
guard let metadata = HTMLMetadataDownloader.shared.cachedMetadata(for: homePageURL) else {
return nil
}

View File

@@ -11,74 +11,90 @@ import os
import Web
import Parser
public struct HTMLMetadataDownloader {
public final class HTMLMetadataDownloader: Sendable {
static let shared = HTMLMetadataDownloader()
nonisolated(unsafe) private static let logger = Logger(subsystem: Bundle.main.bundleIdentifier!, category: "HTMLMetadataDownloader")
private static let debugLoggingEnabled = false
private static let cache = HTMLMetadataCache()
private let debugLoggingEnabled = false
private let cache = HTMLMetadataCache()
private let attemptDatesLock = OSAllocatedUnfairLock(initialState: [String: Date]())
public static func cachedMetadata(for url: String) -> HTMLMetadata? {
public func cachedMetadata(for url: String) -> HTMLMetadata? {
if debugLoggingEnabled {
logger.debug("HTMLMetadataDownloader requested cached metadata for \(url)")
Self.logger.debug("HTMLMetadataDownloader requested cached metadata for \(url)")
}
guard let htmlMetadata = cache[url] else {
downloadMetadataIfNeeded(url)
return nil
}
if debugLoggingEnabled {
logger.debug("HTMLMetadataDownloader returning cached metadata for \(url)")
Self.logger.debug("HTMLMetadataDownloader returning cached metadata for \(url)")
}
return htmlMetadata
}
}
public static func downloadMetadata(for url: String) async -> HTMLMetadata? {
private extension HTMLMetadataDownloader {
if let htmlMetadata = cachedMetadata(for: url) {
return htmlMetadata
private func downloadMetadataIfNeeded(_ url: String) {
// We try a download once an hour at most.
let shouldDownload = attemptDatesLock.withLock { attemptDates in
let currentDate = Date()
if let attemptDate = attemptDates[url], attemptDate > currentDate.bySubtracting(hours: 1) {
if debugLoggingEnabled {
Self.logger.debug("HTMLMetadataDownloader skipping download for \(url) because an attempt was made less than an hour ago.")
}
return false
}
attemptDates[url] = currentDate
return true
}
if shouldDownload {
downloadMetadata(url)
}
}
private func downloadMetadata(_ url: String) {
guard let actualURL = URL(string: url) else {
return nil
if debugLoggingEnabled {
Self.logger.debug("HTMLMetadataDownloader skipping download for \(url) because it couldnt construct a URL.")
}
return
}
if debugLoggingEnabled {
logger.debug("HTMLMetadataDownloader downloading for \(url)")
Self.logger.debug("HTMLMetadataDownloader downloading for \(url)")
}
let downloadRecord = try? await DownloadWithCacheManager.shared.download(actualURL)
let data = downloadRecord?.data
let response = downloadRecord?.response
Task {
let downloadRecord = try? await DownloadWithCacheManager.shared.download(actualURL)
let data = downloadRecord?.data
let response = downloadRecord?.response
if let data, !data.isEmpty, let response, response.statusIsOK {
let urlToUse = response.url ?? actualURL
let parserData = ParserData(url: urlToUse.absoluteString, data: data)
if let htmlMetadata = await parseMetadata(with: parserData) {
if let data, !data.isEmpty, let response, response.statusIsOK {
let urlToUse = response.url ?? actualURL
let parserData = ParserData(url: urlToUse.absoluteString, data: data)
let htmlMetadata = HTMLMetadataParser.metadata(with: parserData)
if debugLoggingEnabled {
logger.debug("HTMLMetadataDownloader caching parsed metadata for \(url)")
Self.logger.debug("HTMLMetadataDownloader caching parsed metadata for \(url)")
}
cache[url] = htmlMetadata
return htmlMetadata
cache[url] = htmlMetadata
return
}
if debugLoggingEnabled {
logger.debug("HTMLMetadataDownloader parser returned nil for \(url)")
Self.logger.debug("HTMLMetadataDownloader failed download for \(url)")
}
return nil
}
if debugLoggingEnabled {
logger.debug("HTMLMetadataDownloader failed download for \(url)")
}
return nil
}
private static func parseMetadata(with parserData: ParserData) async -> HTMLMetadata? {
return HTMLMetadataParser.metadata(with: parserData)
}
}