mirror of
https://github.com/Ranchero-Software/NetNewsWire
synced 2025-08-12 06:26:36 +00:00
467 lines
12 KiB
Swift
Executable File
467 lines
12 KiB
Swift
Executable File
//
|
|
// DownloadSession.swift
|
|
// RSWeb
|
|
//
|
|
// Created by Brent Simmons on 3/12/16.
|
|
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
|
|
//
|
|
|
|
import Foundation
|
|
import os
|
|
|
|
// Create a DownloadSessionDelegate, then create a DownloadSession.
|
|
// To download things: call download with a set of URLs. DownloadSession will call the various delegate methods.
|
|
|
|
public protocol DownloadSessionDelegate {
|
|
|
|
func downloadSession(_ downloadSession: DownloadSession, conditionalGetInfoFor: URL) -> HTTPConditionalGetInfo?
|
|
func downloadSession(_ downloadSession: DownloadSession, downloadDidComplete: URL, response: URLResponse?, data: Data, error: NSError?)
|
|
func downloadSession(_ downloadSession: DownloadSession, shouldContinueAfterReceivingData: Data, url: URL) -> Bool
|
|
func downloadSessionDidComplete(_ downloadSession: DownloadSession)
|
|
}
|
|
|
|
@objc public final class DownloadSession: NSObject {
|
|
|
|
public let downloadProgress = DownloadProgress(numberOfTasks: 0)
|
|
|
|
private var urlSession: URLSession!
|
|
private var tasksInProgress = Set<URLSessionTask>()
|
|
private var tasksPending = Set<URLSessionTask>()
|
|
private var taskIdentifierToInfoDictionary = [Int: DownloadInfo]()
|
|
private var urlsInSession = Set<URL>()
|
|
private let delegate: DownloadSessionDelegate
|
|
private var redirectCache = [URL: URL]()
|
|
private var queue = [URL]()
|
|
|
|
// 429 Too Many Requests responses
|
|
private var retryAfterMessages = [String: HTTPResponse429]()
|
|
|
|
/// URLs with 400-499 responses (except for 429).
|
|
/// These URLs are skipped for the rest of the session.
|
|
private var urlsWith400s = Set<URL>()
|
|
|
|
|
|
public init(delegate: DownloadSessionDelegate) {
|
|
|
|
self.delegate = delegate
|
|
|
|
super.init()
|
|
|
|
let sessionConfiguration = URLSessionConfiguration.ephemeral
|
|
sessionConfiguration.requestCachePolicy = .reloadIgnoringLocalCacheData
|
|
sessionConfiguration.timeoutIntervalForRequest = 15.0
|
|
sessionConfiguration.httpShouldSetCookies = false
|
|
sessionConfiguration.httpCookieAcceptPolicy = .never
|
|
sessionConfiguration.httpMaximumConnectionsPerHost = 1
|
|
sessionConfiguration.httpCookieStorage = nil
|
|
sessionConfiguration.urlCache = nil
|
|
|
|
if let userAgentHeaders = UserAgent.headers() {
|
|
sessionConfiguration.httpAdditionalHeaders = userAgentHeaders
|
|
}
|
|
|
|
urlSession = URLSession(configuration: sessionConfiguration, delegate: self, delegateQueue: OperationQueue.main)
|
|
}
|
|
|
|
deinit {
|
|
urlSession.invalidateAndCancel()
|
|
}
|
|
|
|
// MARK: - API
|
|
|
|
public func cancelAll() {
|
|
urlSession.getTasksWithCompletionHandler { dataTasks, uploadTasks, downloadTasks in
|
|
for task in dataTasks {
|
|
task.cancel()
|
|
}
|
|
for task in uploadTasks {
|
|
task.cancel()
|
|
}
|
|
for task in downloadTasks {
|
|
task.cancel()
|
|
}
|
|
}
|
|
}
|
|
|
|
public func download(_ urls: Set<URL>) {
|
|
|
|
let filteredURLs = Self.filteredURLs(urls)
|
|
|
|
for url in filteredURLs {
|
|
addDataTask(url)
|
|
}
|
|
|
|
urlsInSession = filteredURLs
|
|
updateDownloadProgress()
|
|
}
|
|
}
|
|
|
|
// MARK: - URLSessionTaskDelegate
|
|
|
|
extension DownloadSession: URLSessionTaskDelegate {
|
|
|
|
public func urlSession(_ session: URLSession, task: URLSessionTask, didCompleteWithError error: Error?) {
|
|
|
|
defer {
|
|
removeTask(task)
|
|
}
|
|
|
|
guard let info = infoForTask(task) else {
|
|
return
|
|
}
|
|
|
|
delegate.downloadSession(self, downloadDidComplete: info.url, response: info.urlResponse, data: info.data as Data, error: error as NSError?)
|
|
}
|
|
|
|
private static let redirectStatusCodes = Set([HTTPResponseCode.redirectPermanent, HTTPResponseCode.redirectTemporary, HTTPResponseCode.redirectVeryTemporary, HTTPResponseCode.redirectPermanentPreservingMethod])
|
|
|
|
public func urlSession(_ session: URLSession, task: URLSessionTask, willPerformHTTPRedirection response: HTTPURLResponse, newRequest request: URLRequest, completionHandler: @escaping (URLRequest?) -> Void) {
|
|
|
|
if Self.redirectStatusCodes.contains(response.statusCode) {
|
|
if let oldURL = task.originalRequest?.url, let newURL = request.url {
|
|
cacheRedirect(oldURL, newURL)
|
|
}
|
|
}
|
|
|
|
var modifiedRequest = request
|
|
|
|
if let url = request.url, url.isOpenRSSOrgURL {
|
|
modifiedRequest.setValue(UserAgent.openRSSOrgUserAgent, forHTTPHeaderField: HTTPRequestHeader.userAgent)
|
|
}
|
|
|
|
completionHandler(modifiedRequest)
|
|
}
|
|
}
|
|
|
|
// MARK: - URLSessionDataDelegate
|
|
|
|
extension DownloadSession: URLSessionDataDelegate {
|
|
|
|
public func urlSession(_ session: URLSession, dataTask: URLSessionDataTask, didReceive response: URLResponse, completionHandler: @escaping (URLSession.ResponseDisposition) -> Void) {
|
|
|
|
defer {
|
|
updateDownloadProgress()
|
|
}
|
|
|
|
tasksInProgress.insert(dataTask)
|
|
tasksPending.remove(dataTask)
|
|
|
|
let taskInfo = infoForTask(dataTask)
|
|
if let taskInfo {
|
|
taskInfo.urlResponse = response
|
|
}
|
|
|
|
if !response.statusIsOK {
|
|
|
|
completionHandler(.cancel)
|
|
removeTask(dataTask)
|
|
|
|
let statusCode = response.forcedStatusCode
|
|
|
|
if statusCode == HTTPResponseCode.tooManyRequests {
|
|
handle429Response(dataTask, response)
|
|
} else if (400...499).contains(statusCode), let url = response.url {
|
|
urlsWith400s.insert(url)
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
addDataTaskFromQueueIfNecessary()
|
|
completionHandler(.allow)
|
|
}
|
|
|
|
public func urlSession(_ session: URLSession, dataTask: URLSessionDataTask, didReceive data: Data) {
|
|
|
|
guard let info = infoForTask(dataTask) else {
|
|
return
|
|
}
|
|
info.addData(data)
|
|
|
|
if !delegate.downloadSession(self, shouldContinueAfterReceivingData: info.data as Data, url: info.url) {
|
|
dataTask.cancel()
|
|
removeTask(dataTask)
|
|
}
|
|
}
|
|
}
|
|
|
|
// MARK: - Private
|
|
|
|
private extension DownloadSession {
|
|
|
|
func addDataTask(_ url: URL) {
|
|
|
|
guard tasksPending.count < 500 else {
|
|
queue.insert(url, at: 0)
|
|
return
|
|
}
|
|
|
|
// If received permanent redirect earlier, use that URL.
|
|
let urlToUse = cachedRedirect(for: url) ?? url
|
|
|
|
if requestShouldBeDroppedDueToActive429(urlToUse) {
|
|
os_log(.debug, "Dropping request for previous 429: \(urlToUse)")
|
|
return
|
|
}
|
|
if requestShouldBeDroppedDueToPrevious400(urlToUse) {
|
|
os_log(.debug, "Dropping request for previous 400-499: \(urlToUse)")
|
|
return
|
|
}
|
|
|
|
let urlRequest: URLRequest = {
|
|
var request = URLRequest(url: urlToUse)
|
|
if let conditionalGetInfo = delegate.downloadSession(self, conditionalGetInfoFor: url) {
|
|
conditionalGetInfo.addRequestHeadersToURLRequest(&request)
|
|
}
|
|
if url.isOpenRSSOrgURL {
|
|
request.setValue(UserAgent.openRSSOrgUserAgent, forHTTPHeaderField: HTTPRequestHeader.userAgent)
|
|
}
|
|
return request
|
|
}()
|
|
|
|
let task = urlSession.dataTask(with: urlRequest)
|
|
|
|
let info = DownloadInfo(url)
|
|
taskIdentifierToInfoDictionary[task.taskIdentifier] = info
|
|
|
|
tasksPending.insert(task)
|
|
task.resume()
|
|
}
|
|
|
|
func addDataTaskFromQueueIfNecessary() {
|
|
guard tasksPending.count < 500, let url = queue.popLast() else { return }
|
|
addDataTask(url)
|
|
}
|
|
|
|
func infoForTask(_ task: URLSessionTask) -> DownloadInfo? {
|
|
return taskIdentifierToInfoDictionary[task.taskIdentifier]
|
|
}
|
|
|
|
func removeTask(_ task: URLSessionTask) {
|
|
tasksInProgress.remove(task)
|
|
tasksPending.remove(task)
|
|
taskIdentifierToInfoDictionary[task.taskIdentifier] = nil
|
|
|
|
addDataTaskFromQueueIfNecessary()
|
|
|
|
updateDownloadProgress()
|
|
}
|
|
|
|
func urlStringIsBlackListedRedirect(_ urlString: String) -> Bool {
|
|
|
|
// Hotels and similar often do permanent redirects. We can catch some of those.
|
|
|
|
let s = urlString.lowercased()
|
|
let badStrings = ["solutionip", "lodgenet", "monzoon", "landingpage", "btopenzone", "register", "login", "authentic"]
|
|
|
|
for oneBadString in badStrings {
|
|
if s.contains(oneBadString) {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
func cacheRedirect(_ oldURL: URL, _ newURL: URL) {
|
|
if urlStringIsBlackListedRedirect(newURL.absoluteString) {
|
|
return
|
|
}
|
|
redirectCache[oldURL] = newURL
|
|
}
|
|
|
|
func cachedRedirect(for url: URL) -> URL? {
|
|
|
|
// Follow chains of redirects, but avoid loops.
|
|
|
|
var urls = Set<URL>()
|
|
urls.insert(url)
|
|
|
|
var currentURL = url
|
|
|
|
while(true) {
|
|
|
|
if let oneRedirectURL = redirectCache[currentURL] {
|
|
|
|
if urls.contains(oneRedirectURL) {
|
|
// Cycle. Bail.
|
|
return nil
|
|
}
|
|
urls.insert(oneRedirectURL)
|
|
currentURL = oneRedirectURL
|
|
}
|
|
|
|
else {
|
|
break
|
|
}
|
|
}
|
|
|
|
if currentURL == url {
|
|
return nil
|
|
}
|
|
return currentURL
|
|
}
|
|
|
|
// MARK: - Download Progress
|
|
|
|
func updateDownloadProgress() {
|
|
|
|
downloadProgress.numberOfTasks = urlsInSession.count
|
|
|
|
let numberRemaining = tasksPending.count + tasksInProgress.count + queue.count
|
|
downloadProgress.numberRemaining = min(numberRemaining, downloadProgress.numberOfTasks)
|
|
|
|
// Complete?
|
|
if downloadProgress.numberOfTasks > 0 && downloadProgress.numberRemaining < 1 {
|
|
delegate.downloadSessionDidComplete(self)
|
|
urlsInSession.removeAll()
|
|
}
|
|
}
|
|
|
|
// MARK: - 429 Too Many Requests
|
|
|
|
func handle429Response(_ dataTask: URLSessionDataTask, _ response: URLResponse) {
|
|
|
|
guard let message = createHTTPResponse429(dataTask, response) else {
|
|
return
|
|
}
|
|
|
|
retryAfterMessages[message.host] = message
|
|
cancelAndRemoveTasksWithHost(message.host)
|
|
}
|
|
|
|
func createHTTPResponse429(_ dataTask: URLSessionDataTask, _ response: URLResponse) -> HTTPResponse429? {
|
|
|
|
guard let url = dataTask.currentRequest?.url ?? dataTask.originalRequest?.url else {
|
|
return nil
|
|
}
|
|
guard let httpResponse = response as? HTTPURLResponse else {
|
|
return nil
|
|
}
|
|
guard let retryAfterValue = httpResponse.value(forHTTPHeaderField: HTTPResponseHeader.retryAfter) else {
|
|
return nil
|
|
}
|
|
guard let retryAfter = TimeInterval(retryAfterValue), retryAfter > 0 else {
|
|
return nil
|
|
}
|
|
|
|
return HTTPResponse429(url: url, retryAfter: retryAfter)
|
|
}
|
|
|
|
func cancelAndRemoveTasksWithHost(_ host: String) {
|
|
|
|
cancelAndRemoveTasksWithHost(host, in: tasksInProgress)
|
|
cancelAndRemoveTasksWithHost(host, in: tasksPending)
|
|
}
|
|
|
|
func cancelAndRemoveTasksWithHost(_ host: String, in tasks: Set<URLSessionTask>) {
|
|
|
|
let lowercaseHost = host.lowercased()
|
|
|
|
let tasksToRemove = tasks.filter { task in
|
|
if let taskHost = task.lowercaseHost, taskHost.contains(lowercaseHost) {
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
|
|
for task in tasksToRemove {
|
|
task.cancel()
|
|
}
|
|
for task in tasksToRemove {
|
|
removeTask(task)
|
|
}
|
|
}
|
|
|
|
func requestShouldBeDroppedDueToActive429(_ url: URL) -> Bool {
|
|
|
|
guard let host = url.host() else {
|
|
return false
|
|
}
|
|
guard let retryAfterMessage = retryAfterMessages[host] else {
|
|
return false
|
|
}
|
|
|
|
if retryAfterMessage.resumeDate < Date() {
|
|
retryAfterMessages[host] = nil
|
|
return false
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
// MARK: - 400-499 responses
|
|
|
|
func requestShouldBeDroppedDueToPrevious400(_ url: URL) -> Bool {
|
|
|
|
if urlsWith400s.contains(url) {
|
|
return true
|
|
}
|
|
if let redirectedURL = cachedRedirect(for: url), urlsWith400s.contains(redirectedURL) {
|
|
return true
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// MARK: - Filtering URLs
|
|
|
|
static private let lastOpenRSSOrgFeedRefreshKey = "lastOpenRSSOrgFeedRefresh"
|
|
static private var lastOpenRSSOrgFeedRefresh: Date {
|
|
get {
|
|
UserDefaults.standard.value(forKey: lastOpenRSSOrgFeedRefreshKey) as? Date ?? Date.distantPast
|
|
}
|
|
set {
|
|
UserDefaults.standard.setValue(newValue, forKey: lastOpenRSSOrgFeedRefreshKey)
|
|
}
|
|
}
|
|
|
|
static private var canDownloadFromOpenRSSOrg: Bool {
|
|
let okayToDownloadDate = lastOpenRSSOrgFeedRefresh + TimeInterval(60 * 60 * 10) // 10 minutes (arbitrary)
|
|
return Date() > okayToDownloadDate
|
|
}
|
|
|
|
static func filteredURLs(_ urls: Set<URL>) -> Set<URL> {
|
|
|
|
// Possibly remove some openrss.org URLs.
|
|
// Can be extended later if necessary.
|
|
|
|
if canDownloadFromOpenRSSOrg {
|
|
// Allow only one feed from openrss.org per refresh session
|
|
lastOpenRSSOrgFeedRefresh = Date()
|
|
return urls.byRemovingAllButOneRandomOpenRSSOrgURL()
|
|
}
|
|
|
|
return urls.byRemovingOpenRSSOrgURLs()
|
|
}
|
|
}
|
|
|
|
extension URLSessionTask {
|
|
|
|
var lowercaseHost: String? {
|
|
guard let request = currentRequest ?? originalRequest else {
|
|
return nil
|
|
}
|
|
return request.url?.host()?.lowercased()
|
|
}
|
|
}
|
|
|
|
// MARK: - DownloadInfo
|
|
|
|
private final class DownloadInfo {
|
|
|
|
let url: URL
|
|
let data = NSMutableData()
|
|
var urlResponse: URLResponse?
|
|
|
|
init(_ url: URL) {
|
|
|
|
self.url = url
|
|
}
|
|
|
|
func addData(_ d: Data) {
|
|
|
|
data.append(d)
|
|
}
|
|
}
|