From ccd87040c3a12492b1fdcbfae1e6492fa93bd744 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Tue, 19 Dec 2017 13:03:05 -0800 Subject: [PATCH] Support multiple authors in RSS and Atom feeds. --- .../RSParser/Feeds/XML/RSParsedArticle.h | 4 ++- .../RSParser/Feeds/XML/RSParsedArticle.m | 17 ++++++++-- .../RSParser/Feeds/XML/RSParsedAuthor.h | 19 +++++++++++ .../RSParser/Feeds/XML/RSParsedAuthor.m | 33 +++++++++++++++++++ .../Feeds/XML/RSParsedFeedTransformer.swift | 14 +++++--- Frameworks/RSParser/Feeds/XML/RSRSSParser.m | 16 ++++++--- Frameworks/RSParser/RSParser.h | 1 + .../RSParser.xcodeproj/project.pbxproj | 8 +++++ .../RSParser/Utilities/NSString+RSParser.h | 2 ++ .../RSParser/Utilities/NSString+RSParser.m | 5 +++ 10 files changed, 107 insertions(+), 12 deletions(-) create mode 100644 Frameworks/RSParser/Feeds/XML/RSParsedAuthor.h create mode 100644 Frameworks/RSParser/Feeds/XML/RSParsedAuthor.m diff --git a/Frameworks/RSParser/Feeds/XML/RSParsedArticle.h b/Frameworks/RSParser/Feeds/XML/RSParsedArticle.h index a83fce3f8..d5ae83b7b 100755 --- a/Frameworks/RSParser/Feeds/XML/RSParsedArticle.h +++ b/Frameworks/RSParser/Feeds/XML/RSParsedArticle.h @@ -9,6 +9,7 @@ @import Foundation; @class RSParsedEnclosure; +@class RSParsedAuthor; @interface RSParsedArticle : NSObject @@ -22,13 +23,14 @@ @property (nonatomic, nullable) NSString *body; @property (nonatomic, nullable) NSString *link; @property (nonatomic, nullable) NSString *permalink; -@property (nonatomic, nullable) NSString *author; +@property (nonatomic, nullable) NSSet *authors; @property (nonatomic, nullable) NSSet *enclosures; @property (nonatomic, nullable) NSDate *datePublished; @property (nonatomic, nullable) NSDate *dateModified; @property (nonatomic, nonnull) NSDate *dateParsed; - (void)addEnclosure:(RSParsedEnclosure *_Nonnull)enclosure; +- (void)addAuthor:(RSParsedAuthor *_Nonnull)author; @end diff --git a/Frameworks/RSParser/Feeds/XML/RSParsedArticle.m b/Frameworks/RSParser/Feeds/XML/RSParsedArticle.m index 6b4439479..436f5afc3 100755 --- a/Frameworks/RSParser/Feeds/XML/RSParsedArticle.m +++ b/Frameworks/RSParser/Feeds/XML/RSParsedArticle.m @@ -9,7 +9,8 @@ #import #import #import - +#import +#import @implementation RSParsedArticle @@ -44,7 +45,19 @@ } } -#pragma mark - Accessors +#pragma mark - Authors + +- (void)addAuthor:(RSParsedAuthor *)author { + + if (self.authors) { + self.authors = [self.authors setByAddingObject:author]; + } + else { + self.authors = [NSSet setWithObject:author]; + } +} + +#pragma mark - articleID - (NSString *)articleID { diff --git a/Frameworks/RSParser/Feeds/XML/RSParsedAuthor.h b/Frameworks/RSParser/Feeds/XML/RSParsedAuthor.h new file mode 100644 index 000000000..2c28236a2 --- /dev/null +++ b/Frameworks/RSParser/Feeds/XML/RSParsedAuthor.h @@ -0,0 +1,19 @@ +// +// RSParsedAuthor.h +// RSParserTests +// +// Created by Brent Simmons on 12/19/17. +// Copyright © 2017 Ranchero Software, LLC. All rights reserved. +// + +@import Foundation; + +@interface RSParsedAuthor : NSObject + +@property (nonatomic, nullable) NSString *name; +@property (nonatomic, nullable) NSString *emailAddress; +@property (nonatomic, nullable) NSString *url; + ++ (instancetype _Nonnull )authorWithSingleString:(NSString *_Nonnull)s; // Don’t know which property it is. Guess based on contents of the string. Common with RSS. + +@end diff --git a/Frameworks/RSParser/Feeds/XML/RSParsedAuthor.m b/Frameworks/RSParser/Feeds/XML/RSParsedAuthor.m new file mode 100644 index 000000000..01faf6306 --- /dev/null +++ b/Frameworks/RSParser/Feeds/XML/RSParsedAuthor.m @@ -0,0 +1,33 @@ +// +// RSParsedAuthor.m +// RSParserTests +// +// Created by Brent Simmons on 12/19/17. +// Copyright © 2017 Ranchero Software, LLC. All rights reserved. +// + +#import +#import "RSParsedAuthor.h" + +@implementation RSParsedAuthor + ++ (instancetype)authorWithSingleString:(NSString *)s { + + // The author element in RSS is supposed to be email address — but often it’s a name, and sometimes a URL. + + RSParsedAuthor *author = [[self alloc] init]; + + if ([s rsparser_contains:@"@"]) { + author.emailAddress = s; + } + else if ([s.lowercaseString hasPrefix:@"http"]) { + author.url = s; + } + else { + author.name = s; + } + + return author; +} + +@end diff --git a/Frameworks/RSParser/Feeds/XML/RSParsedFeedTransformer.swift b/Frameworks/RSParser/Feeds/XML/RSParsedFeedTransformer.swift index 77d6d4098..05c9c0eee 100644 --- a/Frameworks/RSParser/Feeds/XML/RSParsedFeedTransformer.swift +++ b/Frameworks/RSParser/Feeds/XML/RSParsedFeedTransformer.swift @@ -40,19 +40,23 @@ private extension RSParsedFeedTransformer { let contentHTML = parsedArticle.body let datePublished = parsedArticle.datePublished let dateModified = parsedArticle.dateModified - let authors = parsedAuthors(parsedArticle.author) + let authors = parsedAuthors(parsedArticle.authors) let attachments = parsedAttachments(parsedArticle.enclosures) return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: parsedArticle.feedURL, url: url, externalURL: externalURL, title: title, contentHTML: contentHTML, contentText: nil, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: dateModified, authors: authors, tags: nil, attachments: attachments) } - static func parsedAuthors(_ authorEmailAddress: String?) -> Set? { + static func parsedAuthors(_ authors: Set?) -> Set? { - guard let authorEmailAddress = authorEmailAddress else { + guard let authors = authors, !authors.isEmpty else { return nil } - let author = ParsedAuthor(name: nil, url: nil, avatarURL: nil, emailAddress: authorEmailAddress) - return Set([author]) + + let transformedAuthors = authors.flatMap { (author) -> ParsedAuthor? in + return ParsedAuthor(name: author.name, url: author.url, avatarURL: nil, emailAddress: author.emailAddress) + } + + return transformedAuthors.isEmpty ? nil : Set(transformedAuthors) } static func parsedAttachments(_ enclosures: Set?) -> Set? { diff --git a/Frameworks/RSParser/Feeds/XML/RSRSSParser.m b/Frameworks/RSParser/Feeds/XML/RSRSSParser.m index 47b5552f9..0ff49471c 100755 --- a/Frameworks/RSParser/Feeds/XML/RSRSSParser.m +++ b/Frameworks/RSParser/Feeds/XML/RSRSSParser.m @@ -16,6 +16,7 @@ #import #import #import +#import @interface RSRSSParser () @@ -207,15 +208,22 @@ static const NSInteger kEnclosureLength = 10; } } +- (void)addAuthorWithString:(NSString *)authorString { + + if (RSParserStringIsEmpty(authorString)) { + return; + } + + RSParsedAuthor *author = [RSParsedAuthor authorWithSingleString:self.parser.currentStringWithTrimmedWhitespace]; + [self.currentArticle addAuthor:author]; +} - (void)addDCElement:(const xmlChar *)localName { if (RSSAXEqualTags(localName, kCreator, kCreatorLength)) { - - self.currentArticle.author = self.parser.currentStringWithTrimmedWhitespace; + [self addAuthorWithString:self.parser.currentStringWithTrimmedWhitespace]; } else if (RSSAXEqualTags(localName, kDate, kDateLength)) { - self.currentArticle.datePublished = self.currentDate; } } @@ -304,7 +312,7 @@ static const NSInteger kEnclosureLength = 10; self.currentArticle.datePublished = self.currentDate; } else if (RSSAXEqualTags(localName, kAuthor, kAuthorLength)) { - self.currentArticle.author = self.parser.currentStringWithTrimmedWhitespace; + [self addAuthorWithString:self.parser.currentStringWithTrimmedWhitespace]; } else if (RSSAXEqualTags(localName, kLink, kLinkLength)) { self.currentArticle.link = [self urlString:self.parser.currentStringWithTrimmedWhitespace]; diff --git a/Frameworks/RSParser/RSParser.h b/Frameworks/RSParser/RSParser.h index 9059aa5e2..9bb1b10b0 100644 --- a/Frameworks/RSParser/RSParser.h +++ b/Frameworks/RSParser/RSParser.h @@ -38,6 +38,7 @@ #import #import #import +#import // HTML diff --git a/Frameworks/RSParser/RSParser.xcodeproj/project.pbxproj b/Frameworks/RSParser/RSParser.xcodeproj/project.pbxproj index dd15a9231..31b0beb99 100644 --- a/Frameworks/RSParser/RSParser.xcodeproj/project.pbxproj +++ b/Frameworks/RSParser/RSParser.xcodeproj/project.pbxproj @@ -66,6 +66,8 @@ 84469D441F002CEF004A6B28 /* JSONFeedParser.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84469D431F002CEF004A6B28 /* JSONFeedParser.swift */; }; 844B5B3E1FE9A13C00C7C76A /* 4fsodonline.atom in Resources */ = {isa = PBXBuildFile; fileRef = 844B5B3D1FE9A13B00C7C76A /* 4fsodonline.atom */; }; 844B5B401FE9A45200C7C76A /* expertopinionent.atom in Resources */ = {isa = PBXBuildFile; fileRef = 844B5B3F1FE9A45200C7C76A /* expertopinionent.atom */; }; + 844B5B441FE9AFDB00C7C76A /* RSParsedAuthor.h in Headers */ = {isa = PBXBuildFile; fileRef = 844B5B411FE9AF2700C7C76A /* RSParsedAuthor.h */; settings = {ATTRIBUTES = (Public, ); }; }; + 844B5B451FE9AFE000C7C76A /* RSParsedAuthor.m in Sources */ = {isa = PBXBuildFile; fileRef = 844B5B421FE9AF2700C7C76A /* RSParsedAuthor.m */; }; 845213251FCB3C76003B6E93 /* coco.html in Resources */ = {isa = PBXBuildFile; fileRef = 845213241FCB3C75003B6E93 /* coco.html */; }; 845213281FCB4042003B6E93 /* RSHTMLTag.h in Headers */ = {isa = PBXBuildFile; fileRef = 845213261FCB4042003B6E93 /* RSHTMLTag.h */; settings = {ATTRIBUTES = (Public, ); }; }; 845213291FCB4042003B6E93 /* RSHTMLTag.m in Sources */ = {isa = PBXBuildFile; fileRef = 845213271FCB4042003B6E93 /* RSHTMLTag.m */; }; @@ -176,6 +178,8 @@ 84469D431F002CEF004A6B28 /* JSONFeedParser.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = JSONFeedParser.swift; path = Feeds/JSON/JSONFeedParser.swift; sourceTree = ""; }; 844B5B3D1FE9A13B00C7C76A /* 4fsodonline.atom */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xml; path = 4fsodonline.atom; sourceTree = ""; }; 844B5B3F1FE9A45200C7C76A /* expertopinionent.atom */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xml; path = expertopinionent.atom; sourceTree = ""; }; + 844B5B411FE9AF2700C7C76A /* RSParsedAuthor.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = RSParsedAuthor.h; sourceTree = ""; }; + 844B5B421FE9AF2700C7C76A /* RSParsedAuthor.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = RSParsedAuthor.m; sourceTree = ""; }; 845213241FCB3C75003B6E93 /* coco.html */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.html; path = coco.html; sourceTree = ""; }; 845213261FCB4042003B6E93 /* RSHTMLTag.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = RSHTMLTag.h; sourceTree = ""; }; 845213271FCB4042003B6E93 /* RSHTMLTag.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = RSHTMLTag.m; sourceTree = ""; }; @@ -355,6 +359,8 @@ 84469D241EFA3134004A6B28 /* RSParsedFeed.m */, 8401FF7F1FE862E70080F13F /* RSParsedEnclosure.h */, 8401FF801FE862E70080F13F /* RSParsedEnclosure.m */, + 844B5B411FE9AF2700C7C76A /* RSParsedAuthor.h */, + 844B5B421FE9AF2700C7C76A /* RSParsedAuthor.m */, ); name = XML; path = Feeds/XML; @@ -469,6 +475,7 @@ 84469CFC1EFA3069004A6B28 /* RSSAXParser.h in Headers */, 845213281FCB4042003B6E93 /* RSHTMLTag.h in Headers */, 84E7E69F1F85780D0046719D /* ParserData.h in Headers */, + 844B5B441FE9AFDB00C7C76A /* RSParsedAuthor.h in Headers */, 8401FF811FE862E70080F13F /* RSParsedEnclosure.h in Headers */, 84469D071EFA307E004A6B28 /* RSHTMLLinkParser.h in Headers */, 84469D0D1EFA307E004A6B28 /* RSSAXHTMLParser.h in Headers */, @@ -624,6 +631,7 @@ 84285AAC1F006754002E8708 /* AtomParser.swift in Sources */, 84285AA81F005D53002E8708 /* RSSParser.swift in Sources */, 84469D421EFF2B2D004A6B28 /* JSONTypes.swift in Sources */, + 844B5B451FE9AFE000C7C76A /* RSParsedAuthor.m in Sources */, 84469D0C1EFA307E004A6B28 /* RSHTMLMetadataParser.m in Sources */, 84469D0A1EFA307E004A6B28 /* RSHTMLMetadata.m in Sources */, 84469D171EFA30A2004A6B28 /* NSString+RSParser.m in Sources */, diff --git a/Frameworks/RSParser/Utilities/NSString+RSParser.h b/Frameworks/RSParser/Utilities/NSString+RSParser.h index 6b9b65d9c..ddfed6e6b 100755 --- a/Frameworks/RSParser/Utilities/NSString+RSParser.h +++ b/Frameworks/RSParser/Utilities/NSString+RSParser.h @@ -16,6 +16,8 @@ NS_ASSUME_NONNULL_BEGIN - (NSString *)rsparser_md5Hash; +- (BOOL)rsparser_contains:(NSString *)s; + @end NS_ASSUME_NONNULL_END diff --git a/Frameworks/RSParser/Utilities/NSString+RSParser.m b/Frameworks/RSParser/Utilities/NSString+RSParser.m index 40768975e..0e8a05112 100755 --- a/Frameworks/RSParser/Utilities/NSString+RSParser.m +++ b/Frameworks/RSParser/Utilities/NSString+RSParser.m @@ -19,6 +19,11 @@ @implementation NSString (RSParser) +- (BOOL)rsparser_contains:(NSString *)s { + + return [self rangeOfString:s].location != NSNotFound; +} + - (NSString *)rsparser_stringByDecodingHTMLEntities { @autoreleasepool {