Get RSCore and RSWeb building.

This commit is contained in:
Brent Simmons
2025-01-03 22:16:11 -08:00
parent 90088735b1
commit 83e3324a4a
41 changed files with 535 additions and 229 deletions

View File

@@ -0,0 +1,85 @@
//
// Cache.swift
//
//
// Created by Brent Simmons on 10/12/24.
//
import Foundation
import os
public protocol CacheRecord: Sendable {
var dateCreated: Date { get }
}
public final class Cache<T: CacheRecord>: Sendable {
public let timeToLive: TimeInterval
public let timeBetweenCleanups: TimeInterval
private struct State: Sendable {
var lastCleanupDate = Date()
var cache = [String: T]()
}
private let stateLock = OSAllocatedUnfairLock(initialState: State())
public init(timeToLive: TimeInterval, timeBetweenCleanups: TimeInterval) {
self.timeToLive = timeToLive
self.timeBetweenCleanups = timeBetweenCleanups
}
public subscript(_ key: String) -> T? {
get {
stateLock.withLock { state in
cleanupIfNeeded(&state)
guard let value = state.cache[key] else {
return nil
}
if value.dateCreated.timeIntervalSinceNow < -timeToLive {
state.cache[key] = nil
return nil
}
return value
}
}
set {
stateLock.withLock { state in
state.cache[key] = newValue
}
}
}
public func cleanup() {
stateLock.withLock { state in
cleanupIfNeeded(&state)
}
}
}
extension Cache {
private func cleanupIfNeeded(_ state: inout State) {
let currentDate = Date()
guard state.lastCleanupDate.timeIntervalSince(currentDate) < -timeBetweenCleanups else {
return
}
var keysToDelete = [String]()
for (key, value) in state.cache {
if value.dateCreated.timeIntervalSince(currentDate) < -timeToLive {
keysToDelete.append(key)
}
}
for key in keysToDelete {
state.cache[key] = nil
}
state.lastCleanupDate = Date()
}
}

View File

@@ -16,14 +16,22 @@ public extension Date {
return addingTimeInterval(0.0 - TimeInterval(days: days))
}
func bySubtracting(hours: Int) -> Date {
return addingTimeInterval(0.0 - TimeInterval(hours: hours))
}
func byAdding(days: Int) -> Date {
return addingTimeInterval(TimeInterval(days: days))
}
}
private extension TimeInterval {
public extension TimeInterval {
init(days: Int) {
self.init(days * 24 * 60 * 60)
}
init(hours: Int) {
self.init(hours * 60 * 60)
}
}

View File

@@ -1,3 +0,0 @@
struct RSCore {
var text = "Hello, World!"
}

View File

@@ -12,10 +12,16 @@ let package = Package(
targets: ["RSWeb"]),
],
dependencies: [
.package(path: "../Parser"),
.package(path: "../RSCore"),
],
targets: [
.target(
name: "RSWeb",
dependencies: [
"Parser",
"RSCore"
],
swiftSettings: [.unsafeFlags(["-warnings-as-errors"])]
),
.testTarget(

View File

@@ -0,0 +1,66 @@
//
// CacheControl.swift
// RSWeb
//
// Created by Brent Simmons on 11/30/24.
//
import Foundation
/// Basic Cache-Control handling  just the part we need,
/// which is to know when we got the response (dateCreated)
/// and when we can ask again (canResume).
public struct CacheControlInfo: Codable, Equatable {
let dateCreated: Date
let maxAge: TimeInterval
var resumeDate: Date {
dateCreated + maxAge
}
public var canResume: Bool {
Date() >= resumeDate
}
public init?(urlResponse: HTTPURLResponse) {
guard let cacheControlValue = urlResponse.valueForHTTPHeaderField(HTTPResponseHeader.cacheControl) else {
return nil
}
self.init(value: cacheControlValue)
}
/// Returns nil if theres no max-age or its < 1.
public init?(value: String) {
guard let maxAge = Self.parseMaxAge(value) else {
return nil
}
let d = Date()
self.dateCreated = d
self.maxAge = maxAge
}
}
private extension CacheControlInfo {
static let maxAgePrefix = "max-age="
static let maxAgePrefixCount = maxAgePrefix.count
static func parseMaxAge(_ s: String) -> TimeInterval? {
let components = s.components(separatedBy: ",")
let trimmedComponents = components.map { $0.trimmingCharacters(in: .whitespaces) }
for component in trimmedComponents {
if component.hasPrefix(Self.maxAgePrefix) {
let maxAgeStringValue = component.dropFirst(maxAgePrefixCount)
if let timeInterval = TimeInterval(maxAgeStringValue), timeInterval > 0 {
return timeInterval
}
}
}
return nil
}
}

View File

@@ -1,32 +0,0 @@
//
// DownloadObject.swift
// RSWeb
//
// Created by Brent Simmons on 8/3/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
import Foundation
public final class DownloadObject: Hashable {
public let url: URL
public var data = Data()
public init(url: URL) {
self.url = url
}
// MARK: - Hashable
public func hash(into hasher: inout Hasher) {
hasher.combine(url)
}
// MARK: - Equatable
public static func ==(lhs: DownloadObject, rhs: DownloadObject) -> Bool {
return lhs.url == rhs.url && lhs.data == rhs.data
}
}

View File

@@ -0,0 +1,56 @@
//
// Downloader.swift
// RSWeb
//
// Created by Brent Simmons on 8/27/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
import Foundation
public typealias DownloadCallback = (Data?, URLResponse?, Error?) -> Swift.Void
/// Simple downloader, for a one-shot download like an image
/// or a web page. For a download-feeds session, see DownloadSession.
public final class Downloader {
public static let shared = Downloader()
private let urlSession: URLSession
private init() {
let sessionConfiguration = URLSessionConfiguration.ephemeral
sessionConfiguration.requestCachePolicy = .reloadIgnoringLocalCacheData
sessionConfiguration.httpShouldSetCookies = false
sessionConfiguration.httpCookieAcceptPolicy = .never
sessionConfiguration.httpMaximumConnectionsPerHost = 1
sessionConfiguration.httpCookieStorage = nil
if let userAgentHeaders = UserAgent.headers() {
sessionConfiguration.httpAdditionalHeaders = userAgentHeaders
}
urlSession = URLSession(configuration: sessionConfiguration)
}
deinit {
urlSession.invalidateAndCancel()
}
public func download(_ url: URL, _ completion: DownloadCallback? = nil) {
download(URLRequest(url: url), completion)
}
public func download(_ urlRequest: URLRequest, _ completion: DownloadCallback? = nil) {
var urlRequestToUse = urlRequest
urlRequestToUse.addSpecialCaseUserAgentIfNeeded()
let task = urlSession.dataTask(with: urlRequestToUse) { (data, response, error) in
DispatchQueue.main.async() {
completion?(data, response, error)
}
}
task.resume()
}
}

View File

@@ -0,0 +1,47 @@
//
// HTMLMetadataCache.swift
//
//
// Created by Brent Simmons on 10/13/24.
//
import Foundation
import Parser
import RSCore
extension Notification.Name {
// Sent when HTMLMetadata is cached. Posted on any thread.
static let htmlMetadataAvailable = Notification.Name("htmlMetadataAvailable")
}
final class HTMLMetadataCache: Sendable {
static let shared = HTMLMetadataCache()
// Sent along with .htmlMetadataAvailable notification
struct UserInfoKey {
static let htmlMetadata = "htmlMetadata"
static let url = "url" // String value
}
private struct HTMLMetadataCacheRecord: CacheRecord {
let metadata: HTMLMetadata
let dateCreated = Date()
}
private let cache = Cache<HTMLMetadataCacheRecord>(timeToLive: TimeInterval(hours: 21), timeBetweenCleanups: TimeInterval(hours: 10))
subscript(_ url: String) -> HTMLMetadata? {
get {
return cache[url]?.metadata
}
set {
guard let htmlMetadata = newValue else {
return
}
let cacheRecord = HTMLMetadataCacheRecord(metadata: htmlMetadata)
cache[url] = cacheRecord
NotificationCenter.default.post(name: .htmlMetadataAvailable, object: self, userInfo: [UserInfoKey.htmlMetadata: htmlMetadata, UserInfoKey.url: url])
}
}
}

View File

@@ -0,0 +1,120 @@
//
// HTMLMetadataDownloader.swift
// NetNewsWire
//
// Created by Brent Simmons on 11/26/17.
// Copyright © 2017 Ranchero Software. All rights reserved.
//
import Foundation
import os
import Parser
import RSCore
public final class HTMLMetadataDownloader: Sendable {
public static let shared = HTMLMetadataDownloader()
private static let logger = Logger(subsystem: Bundle.main.bundleIdentifier!, category: "HTMLMetadataDownloader")
private static let debugLoggingEnabled = false
private let cache = HTMLMetadataCache()
private let attemptDatesLock = OSAllocatedUnfairLock(initialState: [String: Date]())
private let urlsReturning4xxsLock = OSAllocatedUnfairLock(initialState: Set<String>())
public func cachedMetadata(for url: String) -> HTMLMetadata? {
if Self.debugLoggingEnabled {
Self.logger.debug("HTMLMetadataDownloader requested cached metadata for \(url)")
}
guard let htmlMetadata = cache[url] else {
downloadMetadataIfNeeded(url)
return nil
}
if Self.debugLoggingEnabled {
Self.logger.debug("HTMLMetadataDownloader returning cached metadata for \(url)")
}
return htmlMetadata
}
}
private extension HTMLMetadataDownloader {
func downloadMetadataIfNeeded(_ url: String) {
if urlShouldBeSkippedDueToPrevious4xxResponse(url) {
if Self.debugLoggingEnabled {
Self.logger.debug("HTMLMetadataDownloader skipping download for \(url) because an earlier request returned a 4xx response.")
}
return
}
// Limit how often a download should be attempted.
let shouldDownload = attemptDatesLock.withLock { attemptDates in
let currentDate = Date()
let hoursBetweenAttempts = 3 // arbitrary
if let attemptDate = attemptDates[url], attemptDate > currentDate.bySubtracting(hours: hoursBetweenAttempts) {
if Self.debugLoggingEnabled {
Self.logger.debug("HTMLMetadataDownloader skipping download for \(url) because an attempt was made less than an hour ago.")
}
return false
}
attemptDates[url] = currentDate
return true
}
if shouldDownload {
downloadMetadata(url)
}
}
func downloadMetadata(_ url: String) {
guard let actualURL = URL(string: url) else {
if Self.debugLoggingEnabled {
Self.logger.debug("HTMLMetadataDownloader skipping download for \(url) because it couldnt construct a URL.")
}
return
}
if Self.debugLoggingEnabled {
Self.logger.debug("HTMLMetadataDownloader downloading for \(url)")
}
Downloader.shared.download(actualURL) { data, response, error in
if let data, !data.isEmpty, let response, response.statusIsOK {
let urlToUse = response.url ?? actualURL
let parserData = ParserData(url: urlToUse.absoluteString, data: data)
let htmlMetadata = HTMLMetadataParser.metadata(with: parserData)
if Self.debugLoggingEnabled {
Self.logger.debug("HTMLMetadataDownloader caching parsed metadata for \(url)")
}
self.cache[url] = htmlMetadata
return
}
if let statusCode = response?.forcedStatusCode, (400...499).contains(statusCode) {
self.noteURLDidReturn4xx(url)
}
if Self.debugLoggingEnabled {
Self.logger.debug("HTMLMetadataDownloader failed download for \(url)")
}
}
}
func urlShouldBeSkippedDueToPrevious4xxResponse(_ url: String) -> Bool {
urlsReturning4xxsLock.withLock { $0.contains(url) }
}
func noteURLDidReturn4xx(_ url: String) {
_ = urlsReturning4xxsLock.withLock { $0.insert(url) }
}
}

View File

@@ -0,0 +1,37 @@
//
// File.swift
// RSWeb
//
// Created by Brent Simmons on 11/24/24.
//
import Foundation
// 429 Too Many Requests
struct HTTPResponse429 {
let url: URL
let host: String // lowercased
let dateCreated: Date
let retryAfter: TimeInterval
var resumeDate: Date {
dateCreated + TimeInterval(retryAfter)
}
var canResume: Bool {
Date() >= resumeDate
}
init?(url: URL, retryAfter: TimeInterval) {
guard let host = url.host() else {
return nil
}
self.url = url
self.host = host.lowercased()
self.retryAfter = retryAfter
self.dateCreated = Date()
}
}

View File

@@ -1,192 +0,0 @@
//
// OneShotDownload.swift
// RSWeb
//
// Created by Brent Simmons on 8/27/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
import Foundation
// Main thread only.
public typealias OneShotDownloadCallback = (Data?, URLResponse?, Error?) -> Swift.Void
private final class OneShotDownloadManager {
private let urlSession: URLSession
fileprivate static let shared = OneShotDownloadManager()
public init() {
let sessionConfiguration = URLSessionConfiguration.ephemeral
sessionConfiguration.requestCachePolicy = .reloadIgnoringLocalCacheData
sessionConfiguration.httpShouldSetCookies = false
sessionConfiguration.httpCookieAcceptPolicy = .never
sessionConfiguration.httpMaximumConnectionsPerHost = 2
sessionConfiguration.httpCookieStorage = nil
sessionConfiguration.urlCache = nil
sessionConfiguration.timeoutIntervalForRequest = 30
if let userAgentHeaders = UserAgent.headers() {
sessionConfiguration.httpAdditionalHeaders = userAgentHeaders
}
urlSession = URLSession(configuration: sessionConfiguration)
}
deinit {
urlSession.invalidateAndCancel()
}
public func download(_ url: URL, _ completion: @escaping OneShotDownloadCallback) {
let task = urlSession.dataTask(with: url) { (data, response, error) in
DispatchQueue.main.async() {
completion(data, response, error)
}
}
task.resume()
}
public func download(_ urlRequest: URLRequest, _ completion: @escaping OneShotDownloadCallback) {
let task = urlSession.dataTask(with: urlRequest) { (data, response, error) in
DispatchQueue.main.async() {
completion(data, response, error)
}
}
task.resume()
}
}
// Call one of these. Its easier than referring to OneShotDownloadManager.
// callback is called on the main queue.
public func download(_ url: URL, _ completion: @escaping OneShotDownloadCallback) {
precondition(Thread.isMainThread)
OneShotDownloadManager.shared.download(url, completion)
}
public func download(_ urlRequest: URLRequest, _ completion: @escaping OneShotDownloadCallback) {
precondition(Thread.isMainThread)
OneShotDownloadManager.shared.download(urlRequest, completion)
}
// MARK: - Downloading using a cache
private struct WebCacheRecord {
let url: URL
let dateDownloaded: Date
let data: Data
let response: URLResponse
}
private final class WebCache {
private var cache = [URL: WebCacheRecord]()
func cleanup(_ cleanupInterval: TimeInterval) {
let cutoffDate = Date(timeInterval: -cleanupInterval, since: Date())
for key in cache.keys {
let cacheRecord = self[key]!
if shouldDelete(cacheRecord, cutoffDate) {
cache[key] = nil
}
}
}
private func shouldDelete(_ cacheRecord: WebCacheRecord, _ cutoffDate: Date) -> Bool {
return cacheRecord.dateDownloaded < cutoffDate
}
subscript(_ url: URL) -> WebCacheRecord? {
get {
return cache[url]
}
set {
if let cacheRecord = newValue {
cache[url] = cacheRecord
}
else {
cache[url] = nil
}
}
}
}
// URLSessionConfiguration has a cache policy.
// But we dont know how it works, and the unimplemented parts spook us a bit.
// So we use a cache that works exactly as we want it to work.
// It also makes sure we dont have multiple requests for the same URL at the same time.
private struct CallbackRecord {
let url: URL
let completion: OneShotDownloadCallback
}
private final class DownloadWithCacheManager {
static let shared = DownloadWithCacheManager()
private var cache = WebCache()
private static let timeToLive: TimeInterval = 10 * 60 // 10 minutes
private static let cleanupInterval: TimeInterval = 5 * 60 // clean up the cache at most every 5 minutes
private var lastCleanupDate = Date()
private var pendingCallbacks = [CallbackRecord]()
private var urlsInProgress = Set<URL>()
func download(_ url: URL, _ completion: @escaping OneShotDownloadCallback, forceRedownload: Bool = false) {
if lastCleanupDate.timeIntervalSinceNow < -DownloadWithCacheManager.cleanupInterval {
lastCleanupDate = Date()
cache.cleanup(DownloadWithCacheManager.timeToLive)
}
if !forceRedownload {
let cacheRecord: WebCacheRecord? = cache[url]
if let cacheRecord = cacheRecord {
completion(cacheRecord.data, cacheRecord.response, nil)
return
}
}
let callbackRecord = CallbackRecord(url: url, completion: completion)
pendingCallbacks.append(callbackRecord)
if urlsInProgress.contains(url) {
return // The completion handler will get called later.
}
urlsInProgress.insert(url)
OneShotDownloadManager.shared.download(url) { (data, response, error) in
self.urlsInProgress.remove(url)
if let data = data, let response = response, response.statusIsOK, error == nil {
let cacheRecord = WebCacheRecord(url: url, dateDownloaded: Date(), data: data, response: response)
self.cache[url] = cacheRecord
}
var callbackCount = 0
for callbackRecord in self.pendingCallbacks {
if url == callbackRecord.url {
callbackRecord.completion(data, response, error)
callbackCount += 1
}
}
self.pendingCallbacks.removeAll(where: { (callbackRecord) -> Bool in
return callbackRecord.url == url
})
}
}
}
public func downloadUsingCache(_ url: URL, _ completion: @escaping OneShotDownloadCallback) {
precondition(Thread.isMainThread)
DownloadWithCacheManager.shared.download(url, completion)
}
public func downloadAddingToCache(_ url: URL, _ completion: @escaping OneShotDownloadCallback) {
precondition(Thread.isMainThread)
DownloadWithCacheManager.shared.download(url, completion, forceRedownload: true)
}

View File

@@ -0,0 +1,108 @@
//
// SpecialCases.swift
// RSWeb
//
// Created by Brent Simmons on 12/12/24.
//
import Foundation
import os
extension URL {
private static let openRSSOrgURLCache = OSAllocatedUnfairLock(initialState: [URL: Bool]())
public var isOpenRSSOrgURL: Bool {
Self.openRSSOrgURLCache.withLock { cache in
if let cachedResult = cache[self] {
return cachedResult
}
let result: Bool
if let host = host(), host.contains("openrss.org") {
result = true
}
else {
result = false
}
cache[self] = result
return result
}
}
}
extension Set where Element == URL {
func byRemovingOpenRSSOrgURLs() -> Set<URL> {
filter { !$0.isOpenRSSOrgURL }
}
func openRSSOrgURLs() -> Set<URL> {
filter { $0.isOpenRSSOrgURL }
}
func byRemovingAllButOneRandomOpenRSSOrgURL() -> Set<URL> {
if self.isEmpty || self.count == 1 {
return self
}
let openRSSOrgURLs = openRSSOrgURLs()
if openRSSOrgURLs.isEmpty || openRSSOrgURLs.count == 1 {
return self
}
let randomIndex = Int.random(in: 0..<openRSSOrgURLs.count)
let singleOpenRSSOrgURLToRead = (Array(openRSSOrgURLs))[randomIndex]
var urls = byRemovingOpenRSSOrgURLs()
urls.insert(singleOpenRSSOrgURLToRead)
return urls
}
}
extension URLRequest {
mutating func addSpecialCaseUserAgentIfNeeded() {
if let url, url.isOpenRSSOrgURL {
setValue(UserAgent.openRSSOrgUserAgent, forHTTPHeaderField: HTTPRequestHeader.userAgent)
}
}
}
extension UserAgent {
static let openRSSOrgUserAgent = {
#if os(iOS)
let platform = "iOS"
#else
let platform = "Mac"
#endif
let version = stringFromInfoPlist("CFBundleShortVersionString") ?? "Unknown"
let build = stringFromInfoPlist("CFBundleVersion") ?? "Unknown"
let template = Bundle.main.object(forInfoDictionaryKey: "UserAgentExtended") as? String
var userAgent = template!.replacingOccurrences(of: "[platform]", with: platform)
userAgent = userAgent.replacingOccurrences(of: "[version]", with: version)
userAgent = userAgent.replacingOccurrences(of: "[build]", with: build)
return userAgent
}()
private static func stringFromInfoPlist(_ key: String) -> String? {
guard let s = Bundle.main.object(forInfoDictionaryKey: key) as? String else {
assertionFailure("Expected to get \(key) from infoDictionary.")
return nil
}
return s
}
}

View File

@@ -14,7 +14,7 @@ let package = Package(
dependencies: [
.package(path: "../RSCore"),
.package(path: "../Articles"),
.package(path: "../RSDatabase.git"),
.package(path: "../RSDatabase"),
],
targets: [
.target(