mirror of
https://github.com/Ranchero-Software/NetNewsWire
synced 2025-08-12 06:26:36 +00:00
Restore changes reverted in previous beta.
This commit is contained in:
@@ -189,13 +189,28 @@ public extension String {
|
||||
|
||||
/// Removes an HTML tag and everything between its start and end tags.
|
||||
///
|
||||
/// The regex pattern `<tag>[\\s\\S]*?</tag>` explanation:
|
||||
/// - `<` matches the literal `<` character.
|
||||
/// - `tag` matches the literal parameter provided to the function, e.g., `style`.
|
||||
/// - `>` matches the literal `>` character.
|
||||
/// - `[\\s\\S]*?`
|
||||
/// - `[\\s\\S]` matches _any_ character, including new lines.
|
||||
/// - `*` will match zero or more of the preceeding character, in this case _any_
|
||||
/// character.
|
||||
/// - `?` switches the matching mode to [lazy](https://javascript.info/regexp-greedy-and-lazy)
|
||||
/// so it will match as few as characters as possible before satisfying the rest of the pattern.
|
||||
/// - `<` matches the literal `<` character.
|
||||
/// - `/` matches the literal `/` character.
|
||||
/// - `tag` matches the literal parameter provided to the function, e.g., `style`.
|
||||
/// - `>` matches the literal `>` character.
|
||||
///
|
||||
/// - Parameter tag: The tag to remove.
|
||||
///
|
||||
/// - Returns: A new copy of `self` with the tag removed.
|
||||
///
|
||||
/// - Note: Doesn't work correctly with nested tags of the same name.
|
||||
private func removingTagAndContents(_ tag: String) -> String {
|
||||
return self.replacingOccurrences(of: "<\(tag).+?</\(tag)>", with: "", options: [.regularExpression, .caseInsensitive])
|
||||
return self.replacingOccurrences(of: "<\(tag)>[\\s\\S]*?</\(tag)>", with: "", options: [.regularExpression, .caseInsensitive])
|
||||
}
|
||||
|
||||
/// Strips HTML from a string.
|
||||
|
||||
@@ -7,25 +7,34 @@
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import os
|
||||
import RSCore
|
||||
|
||||
public typealias DownloadCallback = (Data?, URLResponse?, Error?) -> Swift.Void
|
||||
public typealias DownloadCallback = @MainActor (Data?, URLResponse?, Error?) -> Swift.Void
|
||||
|
||||
/// Simple downloader, for a one-shot download like an image
|
||||
/// or a web page. For a download-feeds session, see DownloadSession.
|
||||
public final class Downloader {
|
||||
/// Caches response for a short time for GET requests. May return cached response.
|
||||
@MainActor public final class Downloader {
|
||||
|
||||
public static let shared = Downloader()
|
||||
private let urlSession: URLSession
|
||||
private var callbacks = [URL: [DownloadCallback]]()
|
||||
|
||||
// Cache — short-lived
|
||||
private let cache = Cache<DownloaderRecord>(timeToLive: 60 * 3, timeBetweenCleanups: 60 * 2)
|
||||
|
||||
nonisolated private static let logger = Logger(subsystem: Bundle.main.bundleIdentifier!, category: "Downloader")
|
||||
nonisolated private static let debugLoggingEnabled = false
|
||||
|
||||
private init() {
|
||||
|
||||
let sessionConfiguration = URLSessionConfiguration.ephemeral
|
||||
sessionConfiguration.requestCachePolicy = .reloadIgnoringLocalCacheData
|
||||
sessionConfiguration.httpShouldSetCookies = false
|
||||
sessionConfiguration.httpCookieAcceptPolicy = .never
|
||||
sessionConfiguration.httpMaximumConnectionsPerHost = 1
|
||||
sessionConfiguration.httpCookieStorage = nil
|
||||
|
||||
|
||||
if let userAgentHeaders = UserAgent.headers() {
|
||||
sessionConfiguration.httpAdditionalHeaders = userAgentHeaders
|
||||
}
|
||||
@@ -37,20 +46,103 @@ public final class Downloader {
|
||||
urlSession.invalidateAndCancel()
|
||||
}
|
||||
|
||||
public func download(_ url: URL, _ completion: DownloadCallback? = nil) {
|
||||
download(URLRequest(url: url), completion)
|
||||
public func download(_ url: URL, _ callback: @escaping DownloadCallback) {
|
||||
assert(Thread.isMainThread)
|
||||
download(URLRequest(url: url), callback)
|
||||
}
|
||||
|
||||
public func download(_ urlRequest: URLRequest, _ completion: DownloadCallback? = nil) {
|
||||
public func download(_ urlRequest: URLRequest, _ callback: @escaping DownloadCallback) {
|
||||
assert(Thread.isMainThread)
|
||||
|
||||
guard let url = urlRequest.url else {
|
||||
Self.logger.fault("Downloader: skipping download for URLRequest without a URL")
|
||||
return
|
||||
}
|
||||
|
||||
let isCacheableRequest = urlRequest.httpMethod == HTTPMethod.get
|
||||
|
||||
// Return cached record if available.
|
||||
if isCacheableRequest {
|
||||
if let cachedRecord = cache[url.absoluteString] {
|
||||
if Self.debugLoggingEnabled {
|
||||
Self.logger.debug("Downloader: returning cached record for \(url)")
|
||||
}
|
||||
callback(cachedRecord.data, cachedRecord.response, cachedRecord.error)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Add callback. If there is already a download in progress for this URL, return early.
|
||||
if callbacks[url] == nil {
|
||||
if Self.debugLoggingEnabled {
|
||||
Self.logger.debug("Downloader: downloading \(url)")
|
||||
}
|
||||
callbacks[url] = [callback]
|
||||
} else {
|
||||
// A download is already be in progress for this URL. Don’t start a separate download.
|
||||
// Add the callback to the callbacks array for this URL.
|
||||
if Self.debugLoggingEnabled {
|
||||
Self.logger.debug("Downloader: download in progress for \(url) — adding callback")
|
||||
}
|
||||
callbacks[url]?.append(callback)
|
||||
return
|
||||
}
|
||||
|
||||
var urlRequestToUse = urlRequest
|
||||
urlRequestToUse.addSpecialCaseUserAgentIfNeeded()
|
||||
|
||||
let task = urlSession.dataTask(with: urlRequestToUse) { (data, response, error) in
|
||||
DispatchQueue.main.async() {
|
||||
completion?(data, response, error)
|
||||
|
||||
if isCacheableRequest {
|
||||
if Self.debugLoggingEnabled {
|
||||
Self.logger.debug("Downloader: caching record for \(url)")
|
||||
}
|
||||
let cachedRecord = DownloaderRecord(data: data, response: response, error: error)
|
||||
self.cache[url.absoluteString] = cachedRecord
|
||||
}
|
||||
|
||||
Task { @MainActor in
|
||||
self.callAndReleaseCallbacks(url, data, response, error)
|
||||
}
|
||||
}
|
||||
task.resume()
|
||||
}
|
||||
}
|
||||
|
||||
private extension Downloader {
|
||||
|
||||
func callAndReleaseCallbacks(_ url: URL, _ data: Data? = nil, _ response: URLResponse? = nil, _ error: Error? = nil) {
|
||||
assert(Thread.isMainThread)
|
||||
|
||||
defer {
|
||||
callbacks[url] = nil
|
||||
}
|
||||
|
||||
guard let callbacksForURL = callbacks[url] else {
|
||||
assertionFailure("Downloader: downloaded URL \(url) but no callbacks found")
|
||||
Self.logger.fault("Downloader: downloaded URL \(url) but no callbacks found")
|
||||
return
|
||||
}
|
||||
|
||||
if Self.debugLoggingEnabled {
|
||||
let count = callbacksForURL.count
|
||||
if count == 1 {
|
||||
Self.logger.debug("Downloader: calling 1 callback for URL \(url)")
|
||||
} else {
|
||||
Self.logger.debug("Downloader: calling \(count) callbacks for URL \(url)")
|
||||
}
|
||||
}
|
||||
|
||||
for callback in callbacksForURL {
|
||||
callback(data, response, error)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct DownloaderRecord: CacheRecord, Sendable {
|
||||
|
||||
let dateCreated = Date()
|
||||
let data: Data?
|
||||
let response: URLResponse?
|
||||
let error: Error?
|
||||
}
|
||||
|
||||
@@ -75,36 +75,38 @@ private extension HTMLMetadataDownloader {
|
||||
}
|
||||
|
||||
func downloadMetadata(_ url: String) {
|
||||
|
||||
|
||||
guard let actualURL = URL(string: url) else {
|
||||
if Self.debugLoggingEnabled {
|
||||
Self.logger.debug("HTMLMetadataDownloader skipping download for \(url) because it couldn’t construct a URL.")
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
if Self.debugLoggingEnabled {
|
||||
Self.logger.debug("HTMLMetadataDownloader downloading for \(url)")
|
||||
}
|
||||
|
||||
Downloader.shared.download(actualURL) { data, response, error in
|
||||
if let data, !data.isEmpty, let response, response.statusIsOK {
|
||||
let urlToUse = response.url ?? actualURL
|
||||
let parserData = ParserData(url: urlToUse.absoluteString, data: data)
|
||||
let htmlMetadata = RSHTMLMetadataParser.htmlMetadata(with: parserData)
|
||||
if Self.debugLoggingEnabled {
|
||||
Self.logger.debug("HTMLMetadataDownloader caching parsed metadata for \(url)")
|
||||
|
||||
Task { @MainActor in
|
||||
Downloader.shared.download(actualURL) { data, response, error in
|
||||
if let data, !data.isEmpty, let response, response.statusIsOK {
|
||||
let urlToUse = response.url ?? actualURL
|
||||
let parserData = ParserData(url: urlToUse.absoluteString, data: data)
|
||||
let htmlMetadata = RSHTMLMetadataParser.htmlMetadata(with: parserData)
|
||||
if Self.debugLoggingEnabled {
|
||||
Self.logger.debug("HTMLMetadataDownloader caching parsed metadata for \(url)")
|
||||
}
|
||||
self.cache[url] = htmlMetadata
|
||||
return
|
||||
}
|
||||
|
||||
if let statusCode = response?.forcedStatusCode, (400...499).contains(statusCode) {
|
||||
self.noteURLDidReturn4xx(url)
|
||||
}
|
||||
|
||||
if Self.debugLoggingEnabled {
|
||||
Self.logger.debug("HTMLMetadataDownloader failed download for \(url)")
|
||||
}
|
||||
self.cache[url] = htmlMetadata
|
||||
return
|
||||
}
|
||||
|
||||
if let statusCode = response?.forcedStatusCode, (400...499).contains(statusCode) {
|
||||
self.noteURLDidReturn4xx(url)
|
||||
}
|
||||
|
||||
if Self.debugLoggingEnabled {
|
||||
Self.logger.debug("HTMLMetadataDownloader failed download for \(url)")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user