mirror of
https://github.com/Ranchero-Software/NetNewsWire
synced 2025-08-12 06:26:36 +00:00
Move modules to Modules folder.
This commit is contained in:
@@ -0,0 +1,73 @@
|
||||
//
|
||||
// FeedParser.swift
|
||||
// Parser
|
||||
//
|
||||
// Created by Brent Simmons on 6/20/17.
|
||||
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
// FeedParser handles RSS, Atom, JSON Feed, and RSS-in-JSON.
|
||||
// You don’t need to know the type of feed.
|
||||
|
||||
public struct FeedParser {
|
||||
|
||||
public static func canParse(_ data: Data) -> Bool {
|
||||
|
||||
let type = FeedType.feedType(data)
|
||||
|
||||
switch type {
|
||||
case .jsonFeed, .rssInJSON, .rss, .atom:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
public static func parse(urlString: String, data: Data) throws -> ParsedFeed? {
|
||||
|
||||
let type = FeedType.feedType(data)
|
||||
|
||||
switch type {
|
||||
|
||||
case .jsonFeed:
|
||||
return try JSONFeedParser.parse(urlString: urlString, data: data)
|
||||
|
||||
case .rssInJSON:
|
||||
return try RSSInJSONParser.parse(urlString: urlString, data: data)
|
||||
|
||||
case .rss:
|
||||
let feed = RSSParser.parsedFeed(urlString: urlString, data: data)
|
||||
return RSSFeedTransformer.parsedFeed(with: feed, feedType: .rss)
|
||||
|
||||
case .atom:
|
||||
let feed = AtomParser.parsedFeed(urlString: urlString, data: data)
|
||||
return RSSFeedTransformer.parsedFeed(with: feed, feedType: .atom)
|
||||
|
||||
case .unknown, .notAFeed:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
public static func parse(_ parserData: ParserData, _ completion: @Sendable @escaping (ParsedFeed?, Error?) -> Void) {
|
||||
|
||||
Task {
|
||||
do {
|
||||
let parsedFeed = try await parseAsync(urlString: parserData.url, data: parserData.data)
|
||||
Task { @MainActor in
|
||||
completion(parsedFeed, nil)
|
||||
}
|
||||
} catch {
|
||||
Task { @MainActor in
|
||||
completion(nil, error)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static func parseAsync(urlString: String, data: Data) async throws -> ParsedFeed? {
|
||||
|
||||
try parse(urlString: urlString, data: data)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
//
|
||||
// FeedParserError.swift
|
||||
// Parser
|
||||
//
|
||||
// Created by Brent Simmons on 6/24/17.
|
||||
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
public struct FeedParserError: Error, Sendable {
|
||||
|
||||
public enum FeedParserErrorType: Sendable {
|
||||
|
||||
case rssChannelNotFound
|
||||
case rssItemsNotFound
|
||||
case jsonFeedVersionNotFound
|
||||
case jsonFeedItemsNotFound
|
||||
case jsonFeedTitleNotFound
|
||||
case invalidJSON
|
||||
}
|
||||
|
||||
public let errorType: FeedParserErrorType
|
||||
|
||||
public init(_ errorType: FeedParserErrorType) {
|
||||
|
||||
self.errorType = errorType
|
||||
}
|
||||
}
|
||||
148
Modules/Parser/Sources/Parser/FeedParser/Feeds/FeedType.swift
Normal file
148
Modules/Parser/Sources/Parser/FeedParser/Feeds/FeedType.swift
Normal file
@@ -0,0 +1,148 @@
|
||||
//
|
||||
// FeedType.swift
|
||||
// Parser
|
||||
//
|
||||
// Created by Brent Simmons on 6/20/17.
|
||||
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
public enum FeedType: Sendable {
|
||||
|
||||
case rss
|
||||
case atom
|
||||
case jsonFeed
|
||||
case rssInJSON
|
||||
case unknown
|
||||
case notAFeed
|
||||
|
||||
private static let minNumberOfBytesRequired = 128
|
||||
|
||||
static func feedType(_ data: Data, isPartialData: Bool = false) -> FeedType {
|
||||
|
||||
// Can call with partial data — while still downloading, for instance.
|
||||
// If there’s not enough data, return .unknown. Ask again when there’s more data.
|
||||
// If it’s definitely not a feed, return .notAFeed.
|
||||
|
||||
let count = data.count
|
||||
if count < minNumberOfBytesRequired {
|
||||
return .unknown
|
||||
}
|
||||
|
||||
return data.withUnsafeBytes { (pointer: UnsafeRawBufferPointer) in
|
||||
|
||||
guard let baseAddress = pointer.baseAddress else {
|
||||
return .unknown
|
||||
}
|
||||
let cCharPointer = baseAddress.assumingMemoryBound(to: CChar.self)
|
||||
|
||||
if isProbablyJSON(cCharPointer, count) {
|
||||
|
||||
if isPartialData {
|
||||
// Might not be able to detect a JSON Feed without all data.
|
||||
// Dr. Drang’s JSON Feed (see althis.json and allthis-partial.json in tests)
|
||||
// has, at this writing, the JSON version element at the end of the feed,
|
||||
// which is totally legal — but it means not being able to detect
|
||||
// that it’s a JSON Feed without all the data.
|
||||
// So this returns .unknown instead of .notAFeed.
|
||||
return .unknown
|
||||
}
|
||||
|
||||
if isProbablyJSONFeed(cCharPointer, count) {
|
||||
return .jsonFeed
|
||||
}
|
||||
if isProbablyRSSInJSON(cCharPointer, count) {
|
||||
return .rssInJSON
|
||||
}
|
||||
}
|
||||
|
||||
if isProbablyRSS(cCharPointer, count) {
|
||||
return .rss
|
||||
}
|
||||
if isProbablyAtom(cCharPointer, count) {
|
||||
return .atom
|
||||
}
|
||||
|
||||
return .notAFeed
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private extension FeedType {
|
||||
|
||||
static func isProbablyRSS(_ bytes: UnsafePointer<CChar>, _ count: Int) -> Bool {
|
||||
|
||||
if didFindString("<rss", bytes, count) || didFindString("<rdf:RDF", bytes, count) {
|
||||
return true
|
||||
}
|
||||
|
||||
return didFindString("<channel>", bytes, count) && didFindString("<pubDate>", bytes, count)
|
||||
}
|
||||
|
||||
static func isProbablyAtom(_ bytes: UnsafePointer<CChar>, _ count: Int) -> Bool {
|
||||
|
||||
didFindString("<feed", bytes, count)
|
||||
}
|
||||
|
||||
static func isProbablyJSON(_ bytes: UnsafePointer<CChar>, _ count: Int) -> Bool {
|
||||
|
||||
bytesStartWithStringIgnoringWhitespace("{", bytes, count)
|
||||
}
|
||||
|
||||
static func isProbablyJSONFeed(_ bytes: UnsafePointer<CChar>, _ count: Int) -> Bool {
|
||||
|
||||
// Assumes already called `isProbablyJSON` and it returned true.
|
||||
didFindString("://jsonfeed.org/version/", bytes, count) || didFindString(":\\/\\/jsonfeed.org\\/version\\/", bytes, count)
|
||||
}
|
||||
|
||||
static func isProbablyRSSInJSON(_ bytes: UnsafePointer<CChar>, _ count: Int) -> Bool {
|
||||
|
||||
// Assumes already called `isProbablyJSON` and it returned true.
|
||||
didFindString("rss", bytes, count) && didFindString("channel", bytes, count) && didFindString("item", bytes, count)
|
||||
}
|
||||
|
||||
static func didFindString(_ string: UnsafePointer<CChar>, _ bytes: UnsafePointer<CChar>, _ numberOfBytes: Int) -> Bool {
|
||||
|
||||
let foundString = strnstr(bytes, string, numberOfBytes)
|
||||
return foundString != nil
|
||||
}
|
||||
|
||||
struct Whitespace {
|
||||
static let space = Character(" ").asciiValue!
|
||||
static let `return` = Character("\r").asciiValue!
|
||||
static let newline = Character("\n").asciiValue!
|
||||
static let tab = Character("\t").asciiValue!
|
||||
}
|
||||
|
||||
static func bytesStartWithStringIgnoringWhitespace(_ string: UnsafePointer<CChar>, _ bytes: UnsafePointer<CChar>, _ numberOfBytes: Int) -> Bool {
|
||||
|
||||
var i = 0
|
||||
|
||||
while i < numberOfBytes {
|
||||
|
||||
let ch = bytes[i]
|
||||
|
||||
if ch == Whitespace.space || ch == Whitespace.return || ch == Whitespace.newline || ch == Whitespace.tab {
|
||||
i += 1
|
||||
continue
|
||||
}
|
||||
|
||||
if ch == string[0] {
|
||||
if let found = strnstr(bytes, string, numberOfBytes) {
|
||||
return found == bytes + i
|
||||
}
|
||||
}
|
||||
|
||||
// Allow for a BOM of up to four bytes (assuming BOM is only at the start)
|
||||
if i < 4 {
|
||||
i += 1
|
||||
continue
|
||||
}
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,247 @@
|
||||
//
|
||||
// JSONFeedParser.swift
|
||||
// Parser
|
||||
//
|
||||
// Created by Brent Simmons on 6/25/17.
|
||||
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
// See https://jsonfeed.org/version/1.1
|
||||
|
||||
public struct JSONFeedParser {
|
||||
|
||||
struct Key {
|
||||
static let version = "version"
|
||||
static let items = "items"
|
||||
static let title = "title"
|
||||
static let homePageURL = "home_page_url"
|
||||
static let feedURL = "feed_url"
|
||||
static let feedDescription = "description"
|
||||
static let nextURL = "next_url"
|
||||
static let icon = "icon"
|
||||
static let favicon = "favicon"
|
||||
static let expired = "expired"
|
||||
static let author = "author"
|
||||
static let authors = "authors"
|
||||
static let name = "name"
|
||||
static let url = "url"
|
||||
static let avatar = "avatar"
|
||||
static let hubs = "hubs"
|
||||
static let type = "type"
|
||||
static let contentHTML = "content_html"
|
||||
static let contentText = "content_text"
|
||||
static let externalURL = "external_url"
|
||||
static let summary = "summary"
|
||||
static let image = "image"
|
||||
static let bannerImage = "banner_image"
|
||||
static let datePublished = "date_published"
|
||||
static let dateModified = "date_modified"
|
||||
static let tags = "tags"
|
||||
static let uniqueID = "id"
|
||||
static let attachments = "attachments"
|
||||
static let mimeType = "mime_type"
|
||||
static let sizeInBytes = "size_in_bytes"
|
||||
static let durationInSeconds = "duration_in_seconds"
|
||||
static let language = "language"
|
||||
}
|
||||
|
||||
static let jsonFeedVersionMarker = "://jsonfeed.org/version/" // Allow for the mistake of not getting the scheme exactly correct.
|
||||
|
||||
public static func parse(urlString: String, data: Data) throws -> ParsedFeed? {
|
||||
|
||||
guard let d = JSONUtilities.dictionary(with: data) else {
|
||||
throw FeedParserError(.invalidJSON)
|
||||
}
|
||||
|
||||
guard let version = d[Key.version] as? String, let _ = version.range(of: JSONFeedParser.jsonFeedVersionMarker) else {
|
||||
throw FeedParserError(.jsonFeedVersionNotFound)
|
||||
}
|
||||
guard let itemsArray = d[Key.items] as? JSONArray else {
|
||||
throw FeedParserError(.jsonFeedItemsNotFound)
|
||||
}
|
||||
guard let title = d[Key.title] as? String else {
|
||||
throw FeedParserError(.jsonFeedTitleNotFound)
|
||||
}
|
||||
|
||||
let authors = parseAuthors(d)
|
||||
let homePageURL = d[Key.homePageURL] as? String
|
||||
let feedURL = d[Key.feedURL] as? String ?? urlString
|
||||
let feedDescription = d[Key.feedDescription] as? String
|
||||
let nextURL = d[Key.nextURL] as? String
|
||||
let iconURL = d[Key.icon] as? String
|
||||
let faviconURL = d[Key.favicon] as? String
|
||||
let expired = d[Key.expired] as? Bool ?? false
|
||||
let hubs = parseHubs(d)
|
||||
let language = d[Key.language] as? String
|
||||
|
||||
let items = parseItems(itemsArray, urlString)
|
||||
|
||||
return ParsedFeed(type: .jsonFeed, title: title, homePageURL: homePageURL, feedURL: feedURL, language: language, feedDescription: feedDescription, nextURL: nextURL, iconURL: iconURL, faviconURL: faviconURL, authors: authors, expired: expired, hubs: hubs, items: items)
|
||||
}
|
||||
}
|
||||
|
||||
private extension JSONFeedParser {
|
||||
|
||||
static func parseAuthors(_ dictionary: JSONDictionary) -> Set<ParsedAuthor>? {
|
||||
|
||||
if let authorsArray = dictionary[Key.authors] as? JSONArray {
|
||||
var authors = Set<ParsedAuthor>()
|
||||
for author in authorsArray {
|
||||
if let parsedAuthor = parseAuthor(author) {
|
||||
authors.insert(parsedAuthor)
|
||||
}
|
||||
}
|
||||
return authors
|
||||
}
|
||||
|
||||
guard let authorDictionary = dictionary[Key.author] as? JSONDictionary,
|
||||
let parsedAuthor = parseAuthor(authorDictionary) else {
|
||||
return nil
|
||||
}
|
||||
|
||||
return Set([parsedAuthor])
|
||||
}
|
||||
|
||||
static func parseAuthor(_ dictionary: JSONDictionary) -> ParsedAuthor? {
|
||||
let name = dictionary[Key.name] as? String
|
||||
let url = dictionary[Key.url] as? String
|
||||
let avatar = dictionary[Key.avatar] as? String
|
||||
if name == nil && url == nil && avatar == nil {
|
||||
return nil
|
||||
}
|
||||
return ParsedAuthor(name: name, url: url, avatarURL: avatar, emailAddress: nil)
|
||||
}
|
||||
|
||||
static func parseHubs(_ dictionary: JSONDictionary) -> Set<ParsedHub>? {
|
||||
|
||||
guard let hubsArray = dictionary[Key.hubs] as? JSONArray else {
|
||||
return nil
|
||||
}
|
||||
|
||||
let hubs = hubsArray.compactMap { (hubDictionary) -> ParsedHub? in
|
||||
guard let hubURL = hubDictionary[Key.url] as? String, let hubType = hubDictionary[Key.type] as? String else {
|
||||
return nil
|
||||
}
|
||||
return ParsedHub(type: hubType, url: hubURL)
|
||||
}
|
||||
return hubs.isEmpty ? nil : Set(hubs)
|
||||
}
|
||||
|
||||
static func parseItems(_ itemsArray: JSONArray, _ feedURL: String) -> Set<ParsedItem> {
|
||||
|
||||
return Set(itemsArray.compactMap { (oneItemDictionary) -> ParsedItem? in
|
||||
return parseItem(oneItemDictionary, feedURL)
|
||||
})
|
||||
}
|
||||
|
||||
static func parseItem(_ itemDictionary: JSONDictionary, _ feedURL: String) -> ParsedItem? {
|
||||
|
||||
guard let uniqueID = parseUniqueID(itemDictionary) else {
|
||||
return nil
|
||||
}
|
||||
|
||||
let contentHTML = itemDictionary[Key.contentHTML] as? String
|
||||
let contentText = itemDictionary[Key.contentText] as? String
|
||||
if contentHTML == nil && contentText == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
let url = itemDictionary[Key.url] as? String
|
||||
let externalURL = itemDictionary[Key.externalURL] as? String
|
||||
let title = parseTitle(itemDictionary, feedURL)
|
||||
let language = itemDictionary[Key.language] as? String
|
||||
let summary = itemDictionary[Key.summary] as? String
|
||||
let imageURL = itemDictionary[Key.image] as? String
|
||||
let bannerImageURL = itemDictionary[Key.bannerImage] as? String
|
||||
|
||||
let datePublished = parseDate(itemDictionary[Key.datePublished] as? String)
|
||||
let dateModified = parseDate(itemDictionary[Key.dateModified] as? String)
|
||||
|
||||
let authors = parseAuthors(itemDictionary)
|
||||
var tags: Set<String>? = nil
|
||||
if let tagsArray = itemDictionary[Key.tags] as? [String] {
|
||||
tags = Set(tagsArray)
|
||||
}
|
||||
let attachments = parseAttachments(itemDictionary)
|
||||
|
||||
return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: feedURL, url: url, externalURL: externalURL, title: title, language: language, contentHTML: contentHTML, contentText: contentText, summary: summary, imageURL: imageURL, bannerImageURL: bannerImageURL, datePublished: datePublished, dateModified: dateModified, authors: authors, tags: tags, attachments: attachments)
|
||||
}
|
||||
|
||||
static func parseTitle(_ itemDictionary: JSONDictionary, _ feedURL: String) -> String? {
|
||||
|
||||
guard let title = itemDictionary[Key.title] as? String else {
|
||||
return nil
|
||||
}
|
||||
|
||||
if isSpecialCaseTitleWithEntitiesFeed(feedURL) {
|
||||
return HTMLEntityDecoder.decodedString(title)
|
||||
}
|
||||
|
||||
return title
|
||||
}
|
||||
|
||||
static func isSpecialCaseTitleWithEntitiesFeed(_ feedURL: String) -> Bool {
|
||||
|
||||
// As of 16 Feb. 2018, Kottke’s and Heer’s feeds includes HTML entities in the title elements.
|
||||
// If we find more feeds like this, we’ll add them here. If these feeds get fixed, we’ll remove them.
|
||||
|
||||
let lowerFeedURL = feedURL.lowercased()
|
||||
let matchStrings = ["kottke.org", "pxlnv.com", "macstories.net", "macobserver.com"]
|
||||
for matchString in matchStrings {
|
||||
if lowerFeedURL.contains(matchString) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
static func parseUniqueID(_ itemDictionary: JSONDictionary) -> String? {
|
||||
|
||||
if let uniqueID = itemDictionary[Key.uniqueID] as? String {
|
||||
return uniqueID // Spec says it must be a string
|
||||
}
|
||||
// Version 1 spec also says that if it’s a number, even though that’s incorrect, it should be coerced to a string.
|
||||
if let uniqueID = itemDictionary[Key.uniqueID] as? Int {
|
||||
return "\(uniqueID)"
|
||||
}
|
||||
if let uniqueID = itemDictionary[Key.uniqueID] as? Double {
|
||||
return "\(uniqueID)"
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
static func parseDate(_ dateString: String?) -> Date? {
|
||||
|
||||
guard let dateString = dateString, !dateString.isEmpty else {
|
||||
return nil
|
||||
}
|
||||
return DateParser.date(string: dateString)
|
||||
}
|
||||
|
||||
static func parseAttachments(_ itemDictionary: JSONDictionary) -> Set<ParsedAttachment>? {
|
||||
|
||||
guard let attachmentsArray = itemDictionary[Key.attachments] as? JSONArray else {
|
||||
return nil
|
||||
}
|
||||
return Set(attachmentsArray.compactMap { parseAttachment($0) })
|
||||
}
|
||||
|
||||
static func parseAttachment(_ attachmentObject: JSONDictionary) -> ParsedAttachment? {
|
||||
|
||||
guard let url = attachmentObject[Key.url] as? String else {
|
||||
return nil
|
||||
}
|
||||
guard let mimeType = attachmentObject[Key.mimeType] as? String else {
|
||||
return nil
|
||||
}
|
||||
|
||||
let title = attachmentObject[Key.title] as? String
|
||||
let sizeInBytes = attachmentObject[Key.sizeInBytes] as? Int
|
||||
let durationInSeconds = attachmentObject[Key.durationInSeconds] as? Int
|
||||
|
||||
return ParsedAttachment(url: url, mimeType: mimeType, title: title, sizeInBytes: sizeInBytes, durationInSeconds: durationInSeconds)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,182 @@
|
||||
//
|
||||
// RSSInJSONParser.swift
|
||||
// Parser
|
||||
//
|
||||
// Created by Brent Simmons on 6/24/17.
|
||||
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import RSCore
|
||||
|
||||
// See https://github.com/scripting/Scripting-News/blob/master/rss-in-json/README.md
|
||||
// Also: http://cyber.harvard.edu/rss/rss.html
|
||||
|
||||
public struct RSSInJSONParser {
|
||||
|
||||
public static func parse(urlString: String, data: Data) throws -> ParsedFeed? {
|
||||
|
||||
do {
|
||||
guard let parsedObject = try JSONSerialization.jsonObject(with: data) as? JSONDictionary else {
|
||||
throw FeedParserError(.invalidJSON)
|
||||
}
|
||||
guard let rssObject = parsedObject["rss"] as? JSONDictionary else {
|
||||
throw FeedParserError(.rssChannelNotFound)
|
||||
}
|
||||
guard let channelObject = rssObject["channel"] as? JSONDictionary else {
|
||||
throw FeedParserError(.rssChannelNotFound)
|
||||
}
|
||||
|
||||
// I’d bet money that in practice the items array won’t always appear correctly inside the channel object.
|
||||
// I’d also bet that sometimes it gets called "items" instead of "item".
|
||||
var itemsObject = channelObject["item"] as? JSONArray
|
||||
if itemsObject == nil {
|
||||
itemsObject = parsedObject["item"] as? JSONArray
|
||||
}
|
||||
if itemsObject == nil {
|
||||
itemsObject = channelObject["items"] as? JSONArray
|
||||
}
|
||||
if itemsObject == nil {
|
||||
itemsObject = parsedObject["items"] as? JSONArray
|
||||
}
|
||||
if itemsObject == nil {
|
||||
throw FeedParserError(.rssItemsNotFound)
|
||||
}
|
||||
|
||||
let title = channelObject["title"] as? String
|
||||
let homePageURL = channelObject["link"] as? String
|
||||
let feedURL = urlString
|
||||
let feedDescription = channelObject["description"] as? String
|
||||
let feedLanguage = channelObject["language"] as? String
|
||||
|
||||
let items = parseItems(itemsObject!, urlString)
|
||||
|
||||
return ParsedFeed(type: .rssInJSON, title: title, homePageURL: homePageURL, feedURL: feedURL, language: feedLanguage, feedDescription: feedDescription, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items)
|
||||
|
||||
}
|
||||
catch { throw error }
|
||||
}
|
||||
}
|
||||
|
||||
private extension RSSInJSONParser {
|
||||
|
||||
static func parseItems(_ itemsObject: JSONArray, _ feedURL: String) -> Set<ParsedItem> {
|
||||
|
||||
return Set(itemsObject.compactMap{ (oneItemDictionary) -> ParsedItem? in
|
||||
|
||||
return parsedItemWithDictionary(oneItemDictionary, feedURL)
|
||||
})
|
||||
}
|
||||
|
||||
static func parsedItemWithDictionary(_ itemDictionary: JSONDictionary, _ feedURL: String) -> ParsedItem? {
|
||||
|
||||
let externalURL = itemDictionary["link"] as? String
|
||||
let title = itemDictionary["title"] as? String
|
||||
|
||||
var contentHTML = itemDictionary["description"] as? String
|
||||
var contentText: String? = nil
|
||||
if contentHTML != nil && !(contentHTML!.contains("<")) {
|
||||
contentText = contentHTML
|
||||
contentHTML = nil
|
||||
}
|
||||
if contentHTML == nil && contentText == nil && title == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
var datePublished: Date? = nil
|
||||
if let datePublishedString = itemDictionary["pubDate"] as? String {
|
||||
datePublished = DateParser.date(string: datePublishedString)
|
||||
}
|
||||
|
||||
let authors = parseAuthors(itemDictionary)
|
||||
let tags = parseTags(itemDictionary)
|
||||
let attachments = parseAttachments(itemDictionary)
|
||||
|
||||
var uniqueID: String? = itemDictionary["guid"] as? String
|
||||
if uniqueID == nil {
|
||||
|
||||
// Calculate a uniqueID based on a combination of non-empty elements. Then hash the result.
|
||||
// Items should have guids. When they don't, re-runs are very likely
|
||||
// because there's no other 100% reliable way to determine identity.
|
||||
// This calculated uniqueID is valid only for this particular feed. (Just like ids in JSON Feed.)
|
||||
|
||||
var s = ""
|
||||
if let datePublished = datePublished {
|
||||
s += "\(datePublished.timeIntervalSince1970)"
|
||||
}
|
||||
if let title = title {
|
||||
s += title
|
||||
}
|
||||
if let externalURL = externalURL {
|
||||
s += externalURL
|
||||
}
|
||||
if let authorEmailAddress = authors?.first?.emailAddress {
|
||||
s += authorEmailAddress
|
||||
}
|
||||
if let oneAttachmentURL = attachments?.first?.url {
|
||||
s += oneAttachmentURL
|
||||
}
|
||||
if s.isEmpty {
|
||||
// Sheesh. Tough case.
|
||||
if let _ = contentHTML {
|
||||
s = contentHTML!
|
||||
}
|
||||
if let _ = contentText {
|
||||
s = contentText!
|
||||
}
|
||||
}
|
||||
uniqueID = s.md5String
|
||||
}
|
||||
|
||||
if let uniqueID = uniqueID {
|
||||
return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: feedURL, url: nil, externalURL: externalURL, title: title, language: nil, contentHTML: contentHTML, contentText: contentText, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: nil, authors: authors, tags: tags, attachments: attachments)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
static func parseAuthors(_ itemDictionary: JSONDictionary) -> Set<ParsedAuthor>? {
|
||||
|
||||
guard let authorEmailAddress = itemDictionary["author"] as? String else {
|
||||
return nil
|
||||
}
|
||||
let parsedAuthor = ParsedAuthor(name: nil, url: nil, avatarURL: nil, emailAddress: authorEmailAddress)
|
||||
return Set([parsedAuthor])
|
||||
}
|
||||
|
||||
static func parseTags(_ itemDictionary: JSONDictionary) -> Set<String>? {
|
||||
|
||||
if let categoryObject = itemDictionary["category"] as? JSONDictionary {
|
||||
if let oneTag = categoryObject["#value"] as? String {
|
||||
return Set([oneTag])
|
||||
}
|
||||
return nil
|
||||
}
|
||||
else if let categoryArray = itemDictionary["category"] as? JSONArray {
|
||||
return Set(categoryArray.compactMap{ $0["#value"] as? String })
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
static func parseAttachments(_ itemDictionary: JSONDictionary) -> Set<ParsedAttachment>? {
|
||||
|
||||
guard let enclosureObject = itemDictionary["enclosure"] as? JSONDictionary else {
|
||||
return nil
|
||||
}
|
||||
guard let attachmentURL = enclosureObject["url"] as? String else {
|
||||
return nil
|
||||
}
|
||||
|
||||
var attachmentSize = enclosureObject["length"] as? Int
|
||||
if attachmentSize == nil {
|
||||
if let attachmentSizeString = enclosureObject["length"] as? String {
|
||||
attachmentSize = (attachmentSizeString as NSString).integerValue
|
||||
}
|
||||
}
|
||||
|
||||
let type = enclosureObject["type"] as? String
|
||||
if let attachment = ParsedAttachment(url: attachmentURL, mimeType: type, title: nil, sizeInBytes: attachmentSize, durationInSeconds: nil) {
|
||||
return Set([attachment])
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,42 @@
|
||||
//
|
||||
// ParsedAttachment.swift
|
||||
// Parser
|
||||
//
|
||||
// Created by Brent Simmons on 6/20/17.
|
||||
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
public final class ParsedAttachment: Hashable, Sendable {
|
||||
|
||||
public let url: String
|
||||
public let mimeType: String?
|
||||
public let title: String?
|
||||
public let sizeInBytes: Int?
|
||||
public let durationInSeconds: Int?
|
||||
|
||||
public init?(url: String, mimeType: String?, title: String?, sizeInBytes: Int?, durationInSeconds: Int?) {
|
||||
if url.isEmpty {
|
||||
return nil
|
||||
}
|
||||
|
||||
self.url = url
|
||||
self.mimeType = mimeType
|
||||
self.title = title
|
||||
self.sizeInBytes = sizeInBytes
|
||||
self.durationInSeconds = durationInSeconds
|
||||
}
|
||||
|
||||
// MARK: - Hashable
|
||||
|
||||
public func hash(into hasher: inout Hasher) {
|
||||
hasher.combine(url)
|
||||
}
|
||||
|
||||
// MARK: - Equatable
|
||||
|
||||
public static func ==(lhs: ParsedAttachment, rhs: ParsedAttachment) -> Bool {
|
||||
lhs.url == rhs.url && lhs.mimeType == rhs.mimeType && lhs.title == rhs.title && lhs.sizeInBytes == rhs.sizeInBytes && lhs.durationInSeconds == rhs.durationInSeconds
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,63 @@
|
||||
//
|
||||
// ParsedAuthor.swift
|
||||
// Parser
|
||||
//
|
||||
// Created by Brent Simmons on 6/20/17.
|
||||
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
public final class ParsedAuthor: Hashable, Codable, Sendable {
|
||||
|
||||
public let name: String?
|
||||
public let url: String?
|
||||
public let avatarURL: String?
|
||||
public let emailAddress: String?
|
||||
|
||||
public init(name: String?, url: String?, avatarURL: String?, emailAddress: String?) {
|
||||
self.name = name
|
||||
self.url = url
|
||||
self.avatarURL = avatarURL
|
||||
self.emailAddress = emailAddress
|
||||
}
|
||||
|
||||
/// Use when the actual property is unknown. Guess based on contents of the string. (This is common with RSS.)
|
||||
convenience init(singleString: String) {
|
||||
|
||||
if singleString.contains("@") {
|
||||
self.init(name: nil, url: nil, avatarURL: nil, emailAddress: singleString)
|
||||
} else if singleString.lowercased().hasPrefix("http") {
|
||||
self.init(name: nil, url: singleString, avatarURL: nil, emailAddress: nil)
|
||||
} else {
|
||||
self.init(name: singleString, url: nil, avatarURL: nil, emailAddress: nil)
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Hashable
|
||||
|
||||
public func hash(into hasher: inout Hasher) {
|
||||
if let name {
|
||||
hasher.combine(name)
|
||||
}
|
||||
else if let url {
|
||||
hasher.combine(url)
|
||||
}
|
||||
else if let emailAddress {
|
||||
hasher.combine(emailAddress)
|
||||
}
|
||||
else if let avatarURL{
|
||||
hasher.combine(avatarURL)
|
||||
}
|
||||
else {
|
||||
hasher.combine("")
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Equatable
|
||||
|
||||
public static func ==(lhs: ParsedAuthor, rhs: ParsedAuthor) -> Bool {
|
||||
|
||||
lhs.name == rhs.name && lhs.url == rhs.url && lhs.avatarURL == rhs.avatarURL && lhs.emailAddress == rhs.emailAddress
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,42 @@
|
||||
//
|
||||
// ParsedFeed.swift
|
||||
// Parser
|
||||
//
|
||||
// Created by Brent Simmons on 6/20/17.
|
||||
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
public final class ParsedFeed: Sendable {
|
||||
|
||||
public let type: FeedType
|
||||
public let title: String?
|
||||
public let homePageURL: String?
|
||||
public let feedURL: String?
|
||||
public let language: String?
|
||||
public let feedDescription: String?
|
||||
public let nextURL: String?
|
||||
public let iconURL: String?
|
||||
public let faviconURL: String?
|
||||
public let authors: Set<ParsedAuthor>?
|
||||
public let expired: Bool
|
||||
public let hubs: Set<ParsedHub>?
|
||||
public let items: Set<ParsedItem>
|
||||
|
||||
public init(type: FeedType, title: String?, homePageURL: String?, feedURL: String?, language: String?, feedDescription: String?, nextURL: String?, iconURL: String?, faviconURL: String?, authors: Set<ParsedAuthor>?, expired: Bool, hubs: Set<ParsedHub>?, items: Set<ParsedItem>) {
|
||||
self.type = type
|
||||
self.title = title
|
||||
self.homePageURL = homePageURL?.nilIfEmptyOrWhitespace
|
||||
self.feedURL = feedURL
|
||||
self.language = language
|
||||
self.feedDescription = feedDescription
|
||||
self.nextURL = nextURL
|
||||
self.iconURL = iconURL
|
||||
self.faviconURL = faviconURL
|
||||
self.authors = authors
|
||||
self.expired = expired
|
||||
self.hubs = hubs
|
||||
self.items = items
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
//
|
||||
// ParsedHub.swift
|
||||
// Parser
|
||||
//
|
||||
// Created by Brent Simmons on 6/20/17.
|
||||
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
public final class ParsedHub: Hashable, Sendable {
|
||||
|
||||
public let type: String
|
||||
public let url: String
|
||||
|
||||
init(type: String, url: String) {
|
||||
self.type = type
|
||||
self.url = url
|
||||
}
|
||||
|
||||
// MARK: - Hashable
|
||||
|
||||
public func hash(into hasher: inout Hasher) {
|
||||
hasher.combine(type)
|
||||
hasher.combine(url)
|
||||
}
|
||||
|
||||
// MARK: - Equatable
|
||||
|
||||
public static func ==(lhs: ParsedHub, rhs: ParsedHub) -> Bool {
|
||||
lhs.type == rhs.type && lhs.url == rhs.url
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,72 @@
|
||||
//
|
||||
// ParsedItem.swift
|
||||
// Parser
|
||||
//
|
||||
// Created by Brent Simmons on 6/20/17.
|
||||
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
public final class ParsedItem: Hashable, Sendable {
|
||||
|
||||
public let syncServiceID: String? //Nil when not syncing
|
||||
public let uniqueID: String //RSS guid, for instance; may be calculated
|
||||
public let feedURL: String
|
||||
public let url: String?
|
||||
public let externalURL: String?
|
||||
public let title: String?
|
||||
public let language: String?
|
||||
public let contentHTML: String?
|
||||
public let contentText: String?
|
||||
public let summary: String?
|
||||
public let imageURL: String?
|
||||
public let bannerImageURL: String?
|
||||
public let datePublished: Date?
|
||||
public let dateModified: Date?
|
||||
public let authors: Set<ParsedAuthor>?
|
||||
public let tags: Set<String>?
|
||||
public let attachments: Set<ParsedAttachment>?
|
||||
|
||||
public init(syncServiceID: String?, uniqueID: String, feedURL: String, url: String?, externalURL: String?, title: String?,
|
||||
language: String?, contentHTML: String?, contentText: String?, summary: String?, imageURL: String?,
|
||||
bannerImageURL: String?,datePublished: Date?, dateModified: Date?, authors: Set<ParsedAuthor>?,
|
||||
tags: Set<String>?, attachments: Set<ParsedAttachment>?) {
|
||||
|
||||
self.syncServiceID = syncServiceID
|
||||
self.uniqueID = uniqueID
|
||||
self.feedURL = feedURL
|
||||
self.url = url
|
||||
self.externalURL = externalURL
|
||||
self.title = title
|
||||
self.language = language
|
||||
self.contentHTML = contentHTML
|
||||
self.contentText = contentText
|
||||
self.summary = summary
|
||||
self.imageURL = imageURL
|
||||
self.bannerImageURL = bannerImageURL
|
||||
self.datePublished = datePublished
|
||||
self.dateModified = dateModified
|
||||
self.authors = authors
|
||||
self.tags = tags
|
||||
self.attachments = attachments
|
||||
}
|
||||
|
||||
// MARK: - Hashable
|
||||
|
||||
public func hash(into hasher: inout Hasher) {
|
||||
if let syncServiceID = syncServiceID {
|
||||
hasher.combine(syncServiceID)
|
||||
}
|
||||
else {
|
||||
hasher.combine(uniqueID)
|
||||
hasher.combine(feedURL)
|
||||
}
|
||||
}
|
||||
|
||||
public static func ==(lhs: ParsedItem, rhs: ParsedItem) -> Bool {
|
||||
|
||||
lhs.syncServiceID == rhs.syncServiceID && lhs.uniqueID == rhs.uniqueID && lhs.feedURL == rhs.feedURL && lhs.url == rhs.url && lhs.externalURL == rhs.externalURL && lhs.title == rhs.title && lhs.language == rhs.language && lhs.contentHTML == rhs.contentHTML && lhs.contentText == rhs.contentText && lhs.summary == rhs.summary && lhs.imageURL == rhs.imageURL && lhs.bannerImageURL == rhs.bannerImageURL && lhs.datePublished == rhs.datePublished && lhs.dateModified == rhs.dateModified && lhs.authors == rhs.authors && lhs.tags == rhs.tags && lhs.attachments == rhs.attachments
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,454 @@
|
||||
//
|
||||
// AtomParser.swift
|
||||
// Parser
|
||||
//
|
||||
// Created by Brent Simmons on 6/25/17.
|
||||
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import RSCore
|
||||
|
||||
final class AtomParser {
|
||||
|
||||
private var feedURL: String
|
||||
private let data: Data
|
||||
private let feed: RSSFeed
|
||||
|
||||
private var articles = [RSSArticle]()
|
||||
private var currentArticle: RSSArticle? {
|
||||
articles.last
|
||||
}
|
||||
|
||||
private var attributesStack = [StringDictionary]()
|
||||
private var currentAttributes: StringDictionary? {
|
||||
attributesStack.last
|
||||
}
|
||||
|
||||
private var parsingXHTML = false
|
||||
private var xhtmlString: String?
|
||||
|
||||
private var currentAuthor: RSSAuthor?
|
||||
private var parsingAuthor = false
|
||||
|
||||
private var parsingArticle = false
|
||||
private var parsingSource = false
|
||||
private var endFeedFound = false
|
||||
|
||||
static func parsedFeed(urlString: String, data: Data) -> RSSFeed {
|
||||
|
||||
let parser = AtomParser(urlString: urlString, data: data)
|
||||
parser.parse()
|
||||
return parser.feed
|
||||
}
|
||||
|
||||
init(urlString: String, data: Data) {
|
||||
self.feedURL = urlString
|
||||
self.data = data
|
||||
self.feed = RSSFeed(urlString: urlString)
|
||||
}
|
||||
}
|
||||
|
||||
private extension AtomParser {
|
||||
|
||||
func parse() {
|
||||
|
||||
let saxParser = SAXParser(delegate: self, data: data)
|
||||
saxParser.parse()
|
||||
feed.articles = articles
|
||||
}
|
||||
|
||||
private struct XMLName {
|
||||
static let entry = "entry".utf8CString
|
||||
static let content = "content".utf8CString
|
||||
static let summary = "summary".utf8CString
|
||||
static let link = "link".utf8CString
|
||||
static let feed = "feed".utf8CString
|
||||
static let source = "source".utf8CString
|
||||
static let author = "author".utf8CString
|
||||
static let name = "name".utf8CString
|
||||
static let email = "email".utf8CString
|
||||
static let uri = "uri".utf8CString
|
||||
static let title = "title".utf8CString
|
||||
static let id = "id".utf8CString
|
||||
static let published = "published".utf8CString
|
||||
static let updated = "updated".utf8CString
|
||||
static let issued = "issued".utf8CString
|
||||
static let modified = "modified".utf8CString
|
||||
}
|
||||
|
||||
private struct XMLString {
|
||||
static let rel = "rel"
|
||||
static let alternate = "alternate"
|
||||
static let related = "related"
|
||||
static let enclosure = "enclosure"
|
||||
static let href = "href"
|
||||
static let title = "title"
|
||||
static let type = "type"
|
||||
static let length = "length"
|
||||
static let xmlLang = "xml:lang"
|
||||
}
|
||||
|
||||
func currentString(_ saxParser: SAXParser) -> String? {
|
||||
|
||||
saxParser.currentStringWithTrimmedWhitespace
|
||||
}
|
||||
|
||||
func currentDate(_ saxParser: SAXParser) -> Date? {
|
||||
|
||||
guard let data = saxParser.currentCharacters else {
|
||||
assertionFailure("Unexpected nil saxParser.currentCharacters in AtomParser.currentDate")
|
||||
return nil
|
||||
}
|
||||
|
||||
return DateParser.date(data: data)
|
||||
}
|
||||
|
||||
func addFeedTitle(_ saxParser: SAXParser) {
|
||||
|
||||
guard feed.title == nil else {
|
||||
return
|
||||
}
|
||||
|
||||
if let title = currentString(saxParser), !title.isEmpty {
|
||||
feed.title = title
|
||||
}
|
||||
}
|
||||
|
||||
func addFeedLink() {
|
||||
|
||||
guard feed.link == nil, let currentAttributes else {
|
||||
return
|
||||
}
|
||||
guard let link = currentAttributes[XMLString.href] else {
|
||||
return
|
||||
}
|
||||
|
||||
let isRelated: Bool = {
|
||||
if let related = currentAttributes[XMLString.rel], related == XMLString.alternate { // rel="alternate"
|
||||
return true
|
||||
}
|
||||
return currentAttributes.count == 1 // Example: <link href="https://www.allenpike.com/"/> — no rel or anything
|
||||
}()
|
||||
|
||||
if isRelated {
|
||||
feed.link = link
|
||||
}
|
||||
}
|
||||
|
||||
func addFeedLanguage() {
|
||||
|
||||
guard feed.language == nil, let currentAttributes else {
|
||||
return
|
||||
}
|
||||
|
||||
feed.language = currentAttributes[XMLString.xmlLang]
|
||||
}
|
||||
|
||||
func addArticle() {
|
||||
let article = RSSArticle(feedURL)
|
||||
articles.append(article)
|
||||
}
|
||||
|
||||
func addArticleElement(_ saxParser: SAXParser, _ localName: XMLPointer, _ prefix: XMLPointer?) {
|
||||
|
||||
guard prefix == nil else {
|
||||
return
|
||||
}
|
||||
guard let currentArticle else {
|
||||
assertionFailure("currentArticle must not be nil in AtomParser.addArticleElement")
|
||||
return
|
||||
}
|
||||
|
||||
if SAXEqualTags(localName, XMLName.id) {
|
||||
currentArticle.guid = currentString(saxParser)
|
||||
}
|
||||
|
||||
else if SAXEqualTags(localName, XMLName.title) {
|
||||
currentArticle.title = currentString(saxParser)
|
||||
}
|
||||
|
||||
else if SAXEqualTags(localName, XMLName.content) {
|
||||
addContent(saxParser, currentArticle)
|
||||
}
|
||||
|
||||
else if SAXEqualTags(localName, XMLName.summary) {
|
||||
addSummary(saxParser, currentArticle)
|
||||
}
|
||||
|
||||
else if SAXEqualTags(localName, XMLName.link) {
|
||||
addLink(currentArticle)
|
||||
}
|
||||
|
||||
else if SAXEqualTags(localName, XMLName.published) {
|
||||
currentArticle.datePublished = currentDate(saxParser)
|
||||
}
|
||||
|
||||
else if SAXEqualTags(localName, XMLName.updated) {
|
||||
currentArticle.dateModified = currentDate(saxParser)
|
||||
}
|
||||
|
||||
// Atom 0.3 dates
|
||||
else if SAXEqualTags(localName, XMLName.issued) {
|
||||
if currentArticle.datePublished == nil {
|
||||
currentArticle.datePublished = currentDate(saxParser)
|
||||
}
|
||||
}
|
||||
else if SAXEqualTags(localName, XMLName.modified) {
|
||||
if currentArticle.dateModified == nil {
|
||||
currentArticle.dateModified = currentDate(saxParser)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func addContent(_ saxParser: SAXParser, _ article: RSSArticle) {
|
||||
|
||||
article.body = currentString(saxParser)
|
||||
}
|
||||
|
||||
func addSummary(_ saxParser: SAXParser, _ article: RSSArticle) {
|
||||
|
||||
guard article.body == nil else {
|
||||
return
|
||||
}
|
||||
article.body = currentString(saxParser)
|
||||
}
|
||||
|
||||
func addLink(_ article: RSSArticle) {
|
||||
|
||||
guard let attributes = currentAttributes else {
|
||||
return
|
||||
}
|
||||
guard let urlString = attributes[XMLString.href], !urlString.isEmpty else {
|
||||
return
|
||||
}
|
||||
|
||||
var rel = attributes[XMLString.rel]
|
||||
if rel?.isEmpty ?? true {
|
||||
rel = XMLString.alternate
|
||||
}
|
||||
|
||||
if rel == XMLString.related {
|
||||
if article.link == nil {
|
||||
article.link = urlString
|
||||
}
|
||||
}
|
||||
else if rel == XMLString.alternate {
|
||||
if article.permalink == nil {
|
||||
article.permalink = urlString
|
||||
}
|
||||
}
|
||||
else if rel == XMLString.enclosure {
|
||||
if let enclosure = enclosure(urlString, attributes) {
|
||||
article.addEnclosure(enclosure)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func enclosure(_ urlString: String, _ attributes: StringDictionary) -> RSSEnclosure? {
|
||||
|
||||
let enclosure = RSSEnclosure(url: urlString)
|
||||
enclosure.title = attributes[XMLString.title]
|
||||
enclosure.mimeType = attributes[XMLString.type]
|
||||
|
||||
if let lengthString = attributes[XMLString.length] {
|
||||
enclosure.length = Int(lengthString)
|
||||
}
|
||||
|
||||
return enclosure
|
||||
}
|
||||
|
||||
func addXHTMLTag(_ localName: XMLPointer) {
|
||||
|
||||
guard var xhtmlString else {
|
||||
assertionFailure("xhtmlString must not be nil when in addXHTMLTag.")
|
||||
return
|
||||
}
|
||||
|
||||
guard let name = String(xmlPointer: localName) else {
|
||||
assertionFailure("Unexpected failure converting XMLPointer to String in addXHTMLTag.")
|
||||
return
|
||||
}
|
||||
|
||||
xhtmlString.append("<")
|
||||
xhtmlString.append(name)
|
||||
|
||||
if let currentAttributes, currentAttributes.count > 0 {
|
||||
for (key, value) in currentAttributes {
|
||||
xhtmlString.append(" ")
|
||||
xhtmlString.append(key)
|
||||
xhtmlString.append("=\"")
|
||||
|
||||
let encodedValue = value.replacingOccurrences(of: "\"", with: """)
|
||||
xhtmlString.append(encodedValue)
|
||||
xhtmlString.append("\"")
|
||||
}
|
||||
}
|
||||
|
||||
xhtmlString.append(">")
|
||||
}
|
||||
}
|
||||
|
||||
extension AtomParser: SAXParserDelegate {
|
||||
|
||||
public func saxParser(_ saxParser: SAXParser, xmlStartElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafePointer<XMLPointer?>?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafePointer<XMLPointer?>?) {
|
||||
|
||||
if endFeedFound {
|
||||
return
|
||||
}
|
||||
|
||||
let xmlAttributes = saxParser.attributesDictionary(attributes, attributeCount: attributeCount) ?? StringDictionary()
|
||||
attributesStack.append(xmlAttributes)
|
||||
|
||||
if parsingXHTML {
|
||||
addXHTMLTag(localName)
|
||||
return
|
||||
}
|
||||
|
||||
if SAXEqualTags(localName, XMLName.entry) {
|
||||
parsingArticle = true
|
||||
addArticle()
|
||||
return
|
||||
}
|
||||
|
||||
if SAXEqualTags(localName, XMLName.author) {
|
||||
parsingAuthor = true
|
||||
currentAuthor = RSSAuthor()
|
||||
return
|
||||
}
|
||||
|
||||
if SAXEqualTags(localName, XMLName.source) {
|
||||
parsingSource = true
|
||||
return
|
||||
}
|
||||
|
||||
let isContentTag = SAXEqualTags(localName, XMLName.content)
|
||||
let isSummaryTag = SAXEqualTags(localName, XMLName.summary)
|
||||
|
||||
if parsingArticle && (isContentTag || isSummaryTag) {
|
||||
|
||||
if isContentTag {
|
||||
currentArticle?.language = xmlAttributes["xml:lang"]
|
||||
}
|
||||
|
||||
let contentType = xmlAttributes["type"];
|
||||
if contentType == "xhtml" {
|
||||
parsingXHTML = true
|
||||
xhtmlString = ""
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if !parsingArticle && SAXEqualTags(localName, XMLName.link) {
|
||||
addFeedLink()
|
||||
return
|
||||
}
|
||||
|
||||
if SAXEqualTags(localName, XMLName.feed) {
|
||||
addFeedLanguage()
|
||||
}
|
||||
|
||||
saxParser.beginStoringCharacters()
|
||||
}
|
||||
|
||||
public func saxParser(_ saxParser: SAXParser, xmlEndElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?) {
|
||||
|
||||
if SAXEqualTags(localName, XMLName.feed) {
|
||||
endFeedFound = true
|
||||
return
|
||||
}
|
||||
|
||||
if endFeedFound {
|
||||
return
|
||||
}
|
||||
|
||||
if parsingXHTML {
|
||||
|
||||
let isContentTag = SAXEqualTags(localName, XMLName.content)
|
||||
let isSummaryTag = SAXEqualTags(localName, XMLName.summary)
|
||||
|
||||
if parsingArticle && (isContentTag || isSummaryTag) {
|
||||
|
||||
if isContentTag {
|
||||
currentArticle?.body = xhtmlString
|
||||
}
|
||||
|
||||
else if isSummaryTag {
|
||||
if (currentArticle?.body?.count ?? 0) < 1 {
|
||||
currentArticle?.body = xhtmlString
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if isContentTag || isSummaryTag {
|
||||
parsingXHTML = false
|
||||
}
|
||||
|
||||
if var xhtmlString {
|
||||
if let localNameString = String(xmlPointer: localName) {
|
||||
xhtmlString.append("</")
|
||||
xhtmlString.append(localNameString)
|
||||
xhtmlString.append(">")
|
||||
}
|
||||
} else {
|
||||
assertionFailure("xhtmlString must not be nil when parsingXHTML in xmlEndElement.")
|
||||
}
|
||||
}
|
||||
|
||||
else if parsingAuthor {
|
||||
|
||||
if SAXEqualTags(localName, XMLName.author) {
|
||||
parsingAuthor = false
|
||||
if let currentAuthor, !currentAuthor.isEmpty() {
|
||||
currentArticle?.addAuthor(currentAuthor)
|
||||
}
|
||||
currentAuthor = nil
|
||||
}
|
||||
else if SAXEqualTags(localName, XMLName.name) {
|
||||
currentAuthor?.name = saxParser.currentStringWithTrimmedWhitespace
|
||||
}
|
||||
else if SAXEqualTags(localName, XMLName.email) {
|
||||
currentAuthor?.emailAddress = saxParser.currentStringWithTrimmedWhitespace
|
||||
}
|
||||
else if SAXEqualTags(localName, XMLName.uri) {
|
||||
currentAuthor?.url = saxParser.currentStringWithTrimmedWhitespace
|
||||
}
|
||||
}
|
||||
|
||||
else if SAXEqualTags(localName, XMLName.entry) {
|
||||
parsingArticle = false
|
||||
}
|
||||
|
||||
else if parsingArticle && !parsingSource {
|
||||
addArticleElement(saxParser, localName, prefix)
|
||||
}
|
||||
|
||||
else if SAXEqualTags(localName, XMLName.source) {
|
||||
parsingSource = false
|
||||
}
|
||||
|
||||
else if !parsingArticle && !parsingSource && SAXEqualTags(localName, XMLName.title) {
|
||||
addFeedTitle(saxParser)
|
||||
}
|
||||
|
||||
_ = attributesStack.popLast()
|
||||
}
|
||||
|
||||
public func saxParser(_ saxParser: SAXParser, xmlCharactersFound: XMLPointer, count: Int) {
|
||||
|
||||
guard parsingXHTML else {
|
||||
return
|
||||
}
|
||||
guard var s = String(xmlPointer: xmlCharactersFound, count: count) else {
|
||||
return
|
||||
}
|
||||
|
||||
// libxml decodes all entities; we need to re-encode certain characters
|
||||
// (<, >, and &) when inside XHTML text content.
|
||||
s = s.replacingOccurrences(of: "<", with: "&;lt;")
|
||||
s = s.replacingOccurrences(of: ">", with: "&;gt;")
|
||||
s = s.replacingOccurrences(of: "&", with: "&")
|
||||
|
||||
xhtmlString = s
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,111 @@
|
||||
//
|
||||
// RSSArticle.swift
|
||||
//
|
||||
//
|
||||
// Created by Brent Simmons on 8/27/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
//import FoundationExtras
|
||||
|
||||
final class RSSArticle {
|
||||
|
||||
var feedURL: String
|
||||
|
||||
/// An RSS guid, if present, or calculated from other attributes.
|
||||
/// Should be unique to the feed, but not necessarily unique
|
||||
/// across different feeds. (Not suitable for a database ID.)
|
||||
lazy var articleID: String = {
|
||||
if let guid {
|
||||
return guid
|
||||
}
|
||||
return calculatedArticleID()
|
||||
}()
|
||||
|
||||
var guid: String?
|
||||
var title: String?
|
||||
var body: String?
|
||||
var link: String?
|
||||
var permalink: String?
|
||||
var authors: [RSSAuthor]?
|
||||
var enclosures: [RSSEnclosure]?
|
||||
var datePublished: Date?
|
||||
var dateModified: Date?
|
||||
var dateParsed: Date
|
||||
var language: String?
|
||||
|
||||
init(_ feedURL: String) {
|
||||
self.feedURL = feedURL
|
||||
self.dateParsed = Date()
|
||||
}
|
||||
|
||||
func addEnclosure(_ enclosure: RSSEnclosure) {
|
||||
|
||||
if enclosures == nil {
|
||||
enclosures = [RSSEnclosure]()
|
||||
}
|
||||
enclosures!.append(enclosure)
|
||||
}
|
||||
|
||||
func addAuthor(_ author: RSSAuthor) {
|
||||
|
||||
if authors == nil {
|
||||
authors = [RSSAuthor]()
|
||||
}
|
||||
authors!.append(author)
|
||||
}
|
||||
}
|
||||
|
||||
private extension RSSArticle {
|
||||
|
||||
func calculatedArticleID() -> String {
|
||||
|
||||
// Concatenate a combination of properties when no guid. Then hash the result.
|
||||
// In general, feeds should have guids. When they don't, re-runs are very likely,
|
||||
// because there's no other 100% reliable way to determine identity.
|
||||
// This is intended to create an ID unique inside a feed, but not globally unique.
|
||||
// Not suitable for a database ID, in other words.
|
||||
|
||||
var s = ""
|
||||
|
||||
let datePublishedTimeStampString: String? = {
|
||||
guard let datePublished else {
|
||||
return nil
|
||||
}
|
||||
return String(format: "%.0f", datePublished.timeIntervalSince1970)
|
||||
}()
|
||||
|
||||
// Ideally we have a permalink and a pubDate.
|
||||
// Either one would probably be a good guid, but together they should be rock-solid.
|
||||
// (In theory. Feeds are buggy, though.)
|
||||
if let permalink, !permalink.isEmpty, let datePublishedTimeStampString {
|
||||
s.append(permalink)
|
||||
s.append(datePublishedTimeStampString)
|
||||
}
|
||||
else if let link, !link.isEmpty, let datePublishedTimeStampString {
|
||||
s.append(link)
|
||||
s.append(datePublishedTimeStampString)
|
||||
}
|
||||
else if let title, !title.isEmpty, let datePublishedTimeStampString {
|
||||
s.append(title)
|
||||
s.append(datePublishedTimeStampString)
|
||||
}
|
||||
else if let datePublishedTimeStampString {
|
||||
s.append(datePublishedTimeStampString)
|
||||
}
|
||||
else if let permalink, !permalink.isEmpty {
|
||||
s.append(permalink)
|
||||
}
|
||||
else if let link, !link.isEmpty {
|
||||
s.append(link)
|
||||
}
|
||||
else if let title, !title.isEmpty {
|
||||
s.append(title)
|
||||
}
|
||||
else if let body, !body.isEmpty {
|
||||
s.append(body)
|
||||
}
|
||||
|
||||
return s.md5String
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,40 @@
|
||||
//
|
||||
// RSSAuthor.swift
|
||||
//
|
||||
//
|
||||
// Created by Brent Simmons on 8/27/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
final class RSSAuthor {
|
||||
|
||||
var name: String?
|
||||
var url: String?
|
||||
var avatarURL: String?
|
||||
var emailAddress: String?
|
||||
|
||||
init(name: String? = nil, url: String? = nil, avatarURL: String? = nil, emailAddress: String? = nil) {
|
||||
self.name = name
|
||||
self.url = url
|
||||
self.avatarURL = avatarURL
|
||||
self.emailAddress = emailAddress
|
||||
}
|
||||
|
||||
/// Use when the actual property is unknown. Guess based on contents of the string. (This is common with RSS.)
|
||||
convenience init(singleString: String) {
|
||||
|
||||
if singleString.contains("@") {
|
||||
self.init(emailAddress: singleString)
|
||||
} else if singleString.lowercased().hasPrefix("http") {
|
||||
self.init(url: singleString)
|
||||
} else {
|
||||
self.init(name: singleString)
|
||||
}
|
||||
}
|
||||
|
||||
func isEmpty() -> Bool {
|
||||
|
||||
name == nil && url == nil && avatarURL == nil && emailAddress == nil
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
//
|
||||
// RSSEnclosure.swift
|
||||
//
|
||||
//
|
||||
// Created by Brent Simmons on 8/27/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
final class RSSEnclosure {
|
||||
|
||||
var url: String
|
||||
var length: Int?
|
||||
var mimeType: String?
|
||||
var title: String?
|
||||
|
||||
init(url: String) {
|
||||
self.url = url
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
//
|
||||
// RSSFeed.swift
|
||||
//
|
||||
//
|
||||
// Created by Brent Simmons on 8/27/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
final class RSSFeed {
|
||||
|
||||
var urlString: String
|
||||
var title: String?
|
||||
var link: String?
|
||||
var language: String?
|
||||
|
||||
var articles: [RSSArticle]?
|
||||
|
||||
init(urlString: String) {
|
||||
self.urlString = urlString
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,75 @@
|
||||
//
|
||||
// RSSFeedTransformer.swift
|
||||
// Parser
|
||||
//
|
||||
// Created by Brent Simmons on 6/25/17.
|
||||
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
struct RSSFeedTransformer {
|
||||
|
||||
/// Turn an internal RSSFeed into a public ParsedFeed.
|
||||
static func parsedFeed(with feed: RSSFeed, feedType: FeedType) -> ParsedFeed {
|
||||
|
||||
let items = parsedItems(feed.articles)
|
||||
return ParsedFeed(type: feedType, title: feed.title, homePageURL: feed.link, feedURL: feed.urlString, language: feed.language, feedDescription: nil, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items)
|
||||
}
|
||||
}
|
||||
|
||||
private extension RSSFeedTransformer {
|
||||
|
||||
static func parsedItems(_ articles: [RSSArticle]?) -> Set<ParsedItem> {
|
||||
|
||||
guard let articles else {
|
||||
return Set<ParsedItem>()
|
||||
}
|
||||
|
||||
return Set(articles.map(parsedItem))
|
||||
}
|
||||
|
||||
static func parsedItem(_ article: RSSArticle) -> ParsedItem {
|
||||
|
||||
let uniqueID = article.articleID
|
||||
let url = article.permalink
|
||||
let externalURL = article.link
|
||||
let title = article.title
|
||||
let language = article.language
|
||||
let contentHTML = article.body
|
||||
let datePublished = article.datePublished
|
||||
let dateModified = article.dateModified
|
||||
let authors = parsedAuthors(article.authors)
|
||||
let attachments = parsedAttachments(article.enclosures)
|
||||
|
||||
return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: article.feedURL, url: url, externalURL: externalURL, title: title, language: language, contentHTML: contentHTML, contentText: nil, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: dateModified, authors: authors, tags: nil, attachments: attachments)
|
||||
}
|
||||
|
||||
static func parsedAuthors(_ authors: [RSSAuthor]?) -> Set<ParsedAuthor>? {
|
||||
|
||||
guard let authors = authors, !authors.isEmpty else {
|
||||
return nil
|
||||
}
|
||||
|
||||
let transformedAuthors = authors.compactMap { (author) -> ParsedAuthor? in
|
||||
return ParsedAuthor(name: author.name, url: author.url, avatarURL: nil, emailAddress: author.emailAddress)
|
||||
}
|
||||
|
||||
return transformedAuthors.isEmpty ? nil : Set(transformedAuthors)
|
||||
}
|
||||
|
||||
static func parsedAttachments(_ enclosures: [RSSEnclosure]?) -> Set<ParsedAttachment>? {
|
||||
|
||||
guard let enclosures = enclosures, !enclosures.isEmpty else {
|
||||
return nil
|
||||
}
|
||||
|
||||
let attachments = enclosures.compactMap { (enclosure) -> ParsedAttachment? in
|
||||
|
||||
let sizeInBytes = (enclosure.length ?? 0) > 0 ? enclosure.length : nil
|
||||
return ParsedAttachment(url: enclosure.url, mimeType: enclosure.mimeType, title: nil, sizeInBytes: sizeInBytes, durationInSeconds: nil)
|
||||
}
|
||||
|
||||
return attachments.isEmpty ? nil : Set(attachments)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,366 @@
|
||||
//
|
||||
// RSSParser.swift
|
||||
// Parser
|
||||
//
|
||||
// Created by Brent Simmons on 6/25/17.
|
||||
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import RSCore
|
||||
|
||||
public final class RSSParser {
|
||||
|
||||
private let feedURL: String
|
||||
private let data: Data
|
||||
private let feed: RSSFeed
|
||||
private var articles = [RSSArticle]()
|
||||
private var currentArticle: RSSArticle? {
|
||||
articles.last
|
||||
}
|
||||
|
||||
private var endRSSFound = false
|
||||
private var isRDF = false
|
||||
private var parsingArticle = false
|
||||
private var parsingChannelImage = false
|
||||
private var parsingAuthor = false
|
||||
private var currentAttributes: StringDictionary?
|
||||
|
||||
static func parsedFeed(urlString: String, data: Data) -> RSSFeed {
|
||||
|
||||
let parser = RSSParser(urlString: urlString, data: data)
|
||||
parser.parse()
|
||||
return parser.feed
|
||||
}
|
||||
|
||||
init(urlString: String, data: Data) {
|
||||
self.feedURL = urlString
|
||||
self.data = data
|
||||
self.feed = RSSFeed(urlString: urlString)
|
||||
}
|
||||
}
|
||||
|
||||
private extension RSSParser {
|
||||
|
||||
func parse() {
|
||||
|
||||
let saxParser = SAXParser(delegate: self, data: data)
|
||||
saxParser.parse()
|
||||
feed.articles = articles
|
||||
}
|
||||
|
||||
private struct XMLName {
|
||||
static let uppercaseRDF = "RDF".utf8CString
|
||||
static let item = "item".utf8CString
|
||||
static let guid = "guid".utf8CString
|
||||
static let enclosure = "enclosure".utf8CString
|
||||
static let image = "image".utf8CString
|
||||
static let author = "author".utf8CString
|
||||
static let rss = "rss".utf8CString
|
||||
static let link = "link".utf8CString
|
||||
static let title = "title".utf8CString
|
||||
static let language = "language".utf8CString
|
||||
static let dc = "dc".utf8CString
|
||||
static let content = "content".utf8CString
|
||||
static let encoded = "encoded".utf8CString
|
||||
static let creator = "creator".utf8CString
|
||||
static let date = "date".utf8CString
|
||||
static let pubDate = "pubDate".utf8CString
|
||||
static let description = "description".utf8CString
|
||||
}
|
||||
|
||||
func addFeedElement(_ saxParser: SAXParser, _ localName: XMLPointer, _ prefix: XMLPointer?) {
|
||||
|
||||
guard prefix == nil else {
|
||||
return
|
||||
}
|
||||
|
||||
if SAXEqualTags(localName, XMLName.link) {
|
||||
if feed.link == nil {
|
||||
feed.link = saxParser.currentString
|
||||
}
|
||||
}
|
||||
else if SAXEqualTags(localName, XMLName.title) {
|
||||
feed.title = saxParser.currentString
|
||||
}
|
||||
else if SAXEqualTags(localName, XMLName.language) {
|
||||
feed.language = saxParser.currentString
|
||||
}
|
||||
}
|
||||
|
||||
func addArticle() {
|
||||
let article = RSSArticle(feedURL)
|
||||
articles.append(article)
|
||||
}
|
||||
|
||||
func addArticleElement(_ saxParser: SAXParser, _ localName: XMLPointer, _ prefix: XMLPointer?) {
|
||||
|
||||
guard let currentArticle else {
|
||||
return
|
||||
}
|
||||
|
||||
if let prefix, SAXEqualTags(prefix, XMLName.dc) {
|
||||
addDCElement(saxParser, localName, currentArticle)
|
||||
return
|
||||
}
|
||||
|
||||
if let prefix, SAXEqualTags(prefix, XMLName.content) && SAXEqualTags(localName, XMLName.encoded) {
|
||||
if let currentString = saxParser.currentString, !currentString.isEmpty {
|
||||
currentArticle.body = currentString
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
guard prefix == nil else {
|
||||
return
|
||||
}
|
||||
|
||||
if let currentString = saxParser.currentString {
|
||||
if SAXEqualTags(localName, XMLName.guid) {
|
||||
addGuid(currentString, currentArticle)
|
||||
}
|
||||
else if SAXEqualTags(localName, XMLName.author) {
|
||||
addAuthorWithString(currentString, currentArticle)
|
||||
}
|
||||
else if SAXEqualTags(localName, XMLName.link) {
|
||||
currentArticle.link = urlString(currentString)
|
||||
}
|
||||
else if SAXEqualTags(localName, XMLName.description) {
|
||||
if currentArticle.body == nil {
|
||||
currentArticle.body = currentString
|
||||
}
|
||||
}
|
||||
else if !parsingAuthor && SAXEqualTags(localName, XMLName.title) {
|
||||
currentArticle.title = currentString
|
||||
}
|
||||
else if SAXEqualTags(localName, XMLName.pubDate) {
|
||||
currentArticle.datePublished = currentDate(saxParser)
|
||||
}
|
||||
}
|
||||
else if SAXEqualTags(localName, XMLName.enclosure), let currentAttributes {
|
||||
addEnclosure(currentAttributes, currentArticle)
|
||||
}
|
||||
}
|
||||
|
||||
func addDCElement(_ saxParser: SAXParser, _ localName: XMLPointer, _ currentArticle: RSSArticle) {
|
||||
|
||||
if SAXEqualTags(localName, XMLName.creator) {
|
||||
if let currentString = saxParser.currentString {
|
||||
addAuthorWithString(currentString, currentArticle)
|
||||
}
|
||||
}
|
||||
else if SAXEqualTags(localName, XMLName.date) {
|
||||
currentArticle.datePublished = currentDate(saxParser)
|
||||
}
|
||||
}
|
||||
|
||||
static let isPermalinkKey = "isPermaLink"
|
||||
static let isPermalinkLowercaseKey = "ispermalink"
|
||||
static let falseValue = "false"
|
||||
|
||||
func addGuid(_ guid: String, _ currentArticle: RSSArticle) {
|
||||
|
||||
currentArticle.guid = guid
|
||||
|
||||
guard let currentAttributes else {
|
||||
return
|
||||
}
|
||||
|
||||
let isPermaLinkValue: String? = {
|
||||
|
||||
if let value = currentAttributes[Self.isPermalinkKey] {
|
||||
return value
|
||||
}
|
||||
// Allow for `ispermalink`, `isPermalink`, etc.
|
||||
for (key, value) in currentAttributes {
|
||||
if key.lowercased() == Self.isPermalinkLowercaseKey {
|
||||
return value
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}()
|
||||
|
||||
// Spec: `isPermaLink is optional, its default value is true.`
|
||||
// https://cyber.harvard.edu/rss/rss.html#ltguidgtSubelementOfLtitemgt
|
||||
// Return only if non-nil and equal to false — otherwise it’s a permalink.
|
||||
if let isPermaLinkValue, isPermaLinkValue == Self.falseValue {
|
||||
return
|
||||
}
|
||||
|
||||
// Feed bug found in the wild: using a guid that’s not really a permalink
|
||||
// and not realizing that `isPermaLink` is true by default.
|
||||
if stringIsProbablyAURLOrRelativePath(guid) {
|
||||
currentArticle.permalink = urlString(guid)
|
||||
}
|
||||
}
|
||||
|
||||
func stringIsProbablyAURLOrRelativePath(_ s: String) -> Bool {
|
||||
|
||||
// The RSS guid is defined as a permalink, except when it appears like this:
|
||||
// `<guid isPermaLink="false">some—identifier</guid>`
|
||||
// However, people often seem to think it’s *not* a permalink by default, even
|
||||
// though it is. So we try to detect the situation where the value is not a URL string,
|
||||
// and not even a relative path. This may need to evolve over time.
|
||||
|
||||
if !s.contains("/") {
|
||||
// This seems to be just about the best possible check.
|
||||
// Bad guids are often just integers, for instance.
|
||||
return false
|
||||
}
|
||||
|
||||
if s.lowercased().hasPrefix("tag:") {
|
||||
// A common non-URL guid form starts with `tag:`.
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
/// Do best attempt at turning a string into a URL string.
|
||||
///
|
||||
/// If it already appears to be a URL, return it.
|
||||
/// Otherwise, treat it like a relative URL and resolve using
|
||||
/// the URL of the home page of the feed (if available)
|
||||
/// or the URL of the feed.
|
||||
///
|
||||
/// The returned value is not guaranteed to be a valid URL string.
|
||||
/// It’s a best attempt without going to heroic lengths.
|
||||
func urlString(_ s: String) -> String {
|
||||
|
||||
if s.lowercased().hasPrefix("http") {
|
||||
return s
|
||||
}
|
||||
|
||||
let baseURLString = feed.link ?? feedURL
|
||||
guard let baseURL = URL(string: baseURLString) else {
|
||||
return s
|
||||
}
|
||||
guard let resolvedURL = URL(string: s, relativeTo: baseURL) else {
|
||||
return s
|
||||
}
|
||||
|
||||
return resolvedURL.absoluteString
|
||||
}
|
||||
|
||||
func addAuthorWithString(_ authorString: String, _ currentArticle: RSSArticle) {
|
||||
|
||||
if authorString.isEmpty {
|
||||
return
|
||||
}
|
||||
|
||||
let author = RSSAuthor(singleString: authorString)
|
||||
currentArticle.addAuthor(author)
|
||||
}
|
||||
|
||||
private struct EnclosureKey {
|
||||
static let url = "url"
|
||||
static let length = "length"
|
||||
static let type = "type"
|
||||
}
|
||||
|
||||
func addEnclosure(_ attributes: StringDictionary, _ currentArticle: RSSArticle) {
|
||||
|
||||
guard let url = attributes[EnclosureKey.url], !url.isEmpty else {
|
||||
return
|
||||
}
|
||||
|
||||
let enclosure = RSSEnclosure(url: url)
|
||||
if let lengthValue = attributes[EnclosureKey.length], let length = Int(lengthValue) {
|
||||
enclosure.length = length
|
||||
}
|
||||
enclosure.mimeType = attributes[EnclosureKey.type]
|
||||
|
||||
currentArticle.addEnclosure(enclosure)
|
||||
}
|
||||
|
||||
func currentDate(_ saxParser: SAXParser) -> Date? {
|
||||
|
||||
guard let data = saxParser.currentCharacters else {
|
||||
return nil
|
||||
}
|
||||
return DateParser.date(data: data)
|
||||
}
|
||||
}
|
||||
|
||||
extension RSSParser: SAXParserDelegate {
|
||||
|
||||
static let rdfAbout = "rdf:about"
|
||||
|
||||
public func saxParser(_ saxParser: SAXParser, xmlStartElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafePointer<XMLPointer?>?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafePointer<XMLPointer?>?) {
|
||||
|
||||
if endRSSFound {
|
||||
return
|
||||
}
|
||||
|
||||
if SAXEqualTags(localName, XMLName.uppercaseRDF) {
|
||||
isRDF = true
|
||||
return
|
||||
}
|
||||
|
||||
var xmlAttributes: StringDictionary? = nil
|
||||
if (isRDF && SAXEqualTags(localName, XMLName.item)) || SAXEqualTags(localName, XMLName.guid) || SAXEqualTags(localName, XMLName.enclosure) {
|
||||
xmlAttributes = saxParser.attributesDictionary(attributes, attributeCount: attributeCount)
|
||||
}
|
||||
if currentAttributes != xmlAttributes {
|
||||
currentAttributes = xmlAttributes
|
||||
}
|
||||
|
||||
if prefix == nil && SAXEqualTags(localName, XMLName.item) {
|
||||
addArticle()
|
||||
parsingArticle = true
|
||||
|
||||
if isRDF, let rdfGuid = xmlAttributes?[Self.rdfAbout], let currentArticle { // RSS 1.0 guid
|
||||
currentArticle.guid = rdfGuid
|
||||
currentArticle.permalink = rdfGuid
|
||||
}
|
||||
}
|
||||
else if prefix == nil && SAXEqualTags(localName, XMLName.image) {
|
||||
parsingChannelImage = true
|
||||
}
|
||||
else if prefix == nil && SAXEqualTags(localName, XMLName.author) {
|
||||
if parsingArticle {
|
||||
parsingAuthor = true
|
||||
}
|
||||
}
|
||||
|
||||
if !parsingChannelImage {
|
||||
saxParser.beginStoringCharacters()
|
||||
}
|
||||
}
|
||||
|
||||
public func saxParser(_ saxParser: SAXParser, xmlEndElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?) {
|
||||
|
||||
if endRSSFound {
|
||||
return
|
||||
}
|
||||
|
||||
if isRDF && SAXEqualTags(localName, XMLName.uppercaseRDF) {
|
||||
endRSSFound = true
|
||||
}
|
||||
else if SAXEqualTags(localName, XMLName.rss) {
|
||||
endRSSFound = true
|
||||
}
|
||||
else if SAXEqualTags(localName, XMLName.image) {
|
||||
parsingChannelImage = false
|
||||
}
|
||||
else if SAXEqualTags(localName, XMLName.item) {
|
||||
parsingArticle = false
|
||||
}
|
||||
else if parsingArticle {
|
||||
addArticleElement(saxParser, localName, prefix)
|
||||
if SAXEqualTags(localName, XMLName.author) {
|
||||
parsingAuthor = false
|
||||
}
|
||||
}
|
||||
else if !parsingChannelImage {
|
||||
addFeedElement(saxParser, localName, prefix)
|
||||
}
|
||||
}
|
||||
|
||||
public func saxParser(_ saxParser: SAXParser, xmlCharactersFound: XMLPointer, count: Int) {
|
||||
|
||||
// Required method.
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,12 @@
|
||||
//
|
||||
// JSONDictionary.swift
|
||||
// Parser
|
||||
//
|
||||
// Created by Brent Simmons on 6/24/17.
|
||||
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
public typealias JSONDictionary = [String: Any]
|
||||
public typealias JSONArray = [JSONDictionary]
|
||||
@@ -0,0 +1,27 @@
|
||||
//
|
||||
// JSONUtilities.swift
|
||||
// Parser
|
||||
//
|
||||
// Created by Brent Simmons on 12/10/17.
|
||||
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
public struct JSONUtilities {
|
||||
|
||||
public static func object(with data: Data) -> Any? {
|
||||
|
||||
return try? JSONSerialization.jsonObject(with: data)
|
||||
}
|
||||
|
||||
public static func dictionary(with data: Data) -> JSONDictionary? {
|
||||
|
||||
return object(with: data) as? JSONDictionary
|
||||
}
|
||||
|
||||
public static func array(with data: Data) -> JSONArray? {
|
||||
|
||||
return object(with: data) as? JSONArray
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user