From b00b2f39d7dcb2215d6e369f890e17e1d1b56a75 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Tue, 20 Jun 2017 21:18:46 -0700 Subject: [PATCH] Start work on turning RSXML.framework into RSParser.framework. --- Frameworks/RSParser/Feeds/FeedParser.h | 24 + Frameworks/RSParser/Feeds/RSFeedParser.h | 28 + Frameworks/RSParser/Feeds/RSFeedParser.m | 216 +++++++ Frameworks/RSParser/Feeds/RSParsedArticle.h | 32 + Frameworks/RSParser/Feeds/RSParsedArticle.m | 101 +++ Frameworks/RSParser/Feeds/RSParsedFeed.h | 22 + Frameworks/RSParser/Feeds/RSParsedFeed.m | 29 + Frameworks/RSParser/HTML/RSHTMLLinkParser.h | 31 + Frameworks/RSParser/HTML/RSHTMLLinkParser.m | 151 +++++ Frameworks/RSParser/HTML/RSHTMLMetadata.h | 45 ++ Frameworks/RSParser/HTML/RSHTMLMetadata.m | 245 +++++++ .../RSParser/HTML/RSHTMLMetadataParser.h | 28 + .../RSParser/HTML/RSHTMLMetadataParser.m | 129 ++++ Frameworks/RSParser/HTML/RSSAXHTMLParser.h | 49 ++ Frameworks/RSParser/HTML/RSSAXHTMLParser.m | 315 +++++++++ Frameworks/RSParser/Info.plist | 26 + Frameworks/RSParser/RSParser.h | 40 ++ .../RSParser.xcodeproj/project.pbxproj | 420 ++++++++++++ .../contents.xcworkspacedata | 7 + Frameworks/RSParser/RSParserData.h | 22 + Frameworks/RSParser/RSParserData.m | 28 + Frameworks/RSParser/RSParserError.h | 19 + Frameworks/RSParser/RSParserError.m | 22 + Frameworks/RSParser/RSParserTests/Info.plist | 22 + .../RSParserTests/RSParserTests.swift | 36 ++ .../RSParser/Utilities/NSString+RSXML.h | 16 + .../RSParser/Utilities/NSString+RSXML.m | 408 ++++++++++++ Frameworks/RSParser/Utilities/RSDateParser.h | 25 + Frameworks/RSParser/Utilities/RSDateParser.m | 435 +++++++++++++ Frameworks/RSParser/Utilities/RSXMLInternal.h | 31 + Frameworks/RSParser/Utilities/RSXMLInternal.m | 83 +++ Frameworks/RSParser/XML/Feeds/RSAtomParser.h | 13 + Frameworks/RSParser/XML/Feeds/RSAtomParser.m | 604 ++++++++++++++++++ Frameworks/RSParser/XML/Feeds/RSRSSParser.h | 13 + Frameworks/RSParser/XML/Feeds/RSRSSParser.m | 469 ++++++++++++++ .../RSParser/XML/OPML/RSOPMLAttributes.h | 36 ++ .../RSParser/XML/OPML/RSOPMLAttributes.m | 66 ++ Frameworks/RSParser/XML/OPML/RSOPMLDocument.h | 17 + Frameworks/RSParser/XML/OPML/RSOPMLDocument.m | 13 + .../RSParser/XML/OPML/RSOPMLFeedSpecifier.h | 23 + .../RSParser/XML/OPML/RSOPMLFeedSpecifier.m | 50 ++ Frameworks/RSParser/XML/OPML/RSOPMLItem.h | 26 + Frameworks/RSParser/XML/OPML/RSOPMLItem.m | 86 +++ Frameworks/RSParser/XML/OPML/RSOPMLParser.h | 29 + Frameworks/RSParser/XML/OPML/RSOPMLParser.m | 297 +++++++++ Frameworks/RSParser/XML/SAX/RSSAXParser.h | 68 ++ Frameworks/RSParser/XML/SAX/RSSAXParser.m | 346 ++++++++++ 47 files changed, 5241 insertions(+) create mode 100755 Frameworks/RSParser/Feeds/FeedParser.h create mode 100755 Frameworks/RSParser/Feeds/RSFeedParser.h create mode 100755 Frameworks/RSParser/Feeds/RSFeedParser.m create mode 100755 Frameworks/RSParser/Feeds/RSParsedArticle.h create mode 100755 Frameworks/RSParser/Feeds/RSParsedArticle.m create mode 100755 Frameworks/RSParser/Feeds/RSParsedFeed.h create mode 100755 Frameworks/RSParser/Feeds/RSParsedFeed.m create mode 100755 Frameworks/RSParser/HTML/RSHTMLLinkParser.h create mode 100755 Frameworks/RSParser/HTML/RSHTMLLinkParser.m create mode 100755 Frameworks/RSParser/HTML/RSHTMLMetadata.h create mode 100755 Frameworks/RSParser/HTML/RSHTMLMetadata.m create mode 100755 Frameworks/RSParser/HTML/RSHTMLMetadataParser.h create mode 100755 Frameworks/RSParser/HTML/RSHTMLMetadataParser.m create mode 100755 Frameworks/RSParser/HTML/RSSAXHTMLParser.h create mode 100755 Frameworks/RSParser/HTML/RSSAXHTMLParser.m create mode 100644 Frameworks/RSParser/Info.plist create mode 100644 Frameworks/RSParser/RSParser.h create mode 100644 Frameworks/RSParser/RSParser.xcodeproj/project.pbxproj create mode 100644 Frameworks/RSParser/RSParser.xcodeproj/project.xcworkspace/contents.xcworkspacedata create mode 100755 Frameworks/RSParser/RSParserData.h create mode 100755 Frameworks/RSParser/RSParserData.m create mode 100755 Frameworks/RSParser/RSParserError.h create mode 100755 Frameworks/RSParser/RSParserError.m create mode 100644 Frameworks/RSParser/RSParserTests/Info.plist create mode 100644 Frameworks/RSParser/RSParserTests/RSParserTests.swift create mode 100755 Frameworks/RSParser/Utilities/NSString+RSXML.h create mode 100755 Frameworks/RSParser/Utilities/NSString+RSXML.m create mode 100755 Frameworks/RSParser/Utilities/RSDateParser.h create mode 100755 Frameworks/RSParser/Utilities/RSDateParser.m create mode 100755 Frameworks/RSParser/Utilities/RSXMLInternal.h create mode 100755 Frameworks/RSParser/Utilities/RSXMLInternal.m create mode 100755 Frameworks/RSParser/XML/Feeds/RSAtomParser.h create mode 100755 Frameworks/RSParser/XML/Feeds/RSAtomParser.m create mode 100755 Frameworks/RSParser/XML/Feeds/RSRSSParser.h create mode 100755 Frameworks/RSParser/XML/Feeds/RSRSSParser.m create mode 100755 Frameworks/RSParser/XML/OPML/RSOPMLAttributes.h create mode 100755 Frameworks/RSParser/XML/OPML/RSOPMLAttributes.m create mode 100755 Frameworks/RSParser/XML/OPML/RSOPMLDocument.h create mode 100755 Frameworks/RSParser/XML/OPML/RSOPMLDocument.m create mode 100755 Frameworks/RSParser/XML/OPML/RSOPMLFeedSpecifier.h create mode 100755 Frameworks/RSParser/XML/OPML/RSOPMLFeedSpecifier.m create mode 100755 Frameworks/RSParser/XML/OPML/RSOPMLItem.h create mode 100755 Frameworks/RSParser/XML/OPML/RSOPMLItem.m create mode 100755 Frameworks/RSParser/XML/OPML/RSOPMLParser.h create mode 100755 Frameworks/RSParser/XML/OPML/RSOPMLParser.m create mode 100755 Frameworks/RSParser/XML/SAX/RSSAXParser.h create mode 100755 Frameworks/RSParser/XML/SAX/RSSAXParser.m diff --git a/Frameworks/RSParser/Feeds/FeedParser.h b/Frameworks/RSParser/Feeds/FeedParser.h new file mode 100755 index 000000000..0f8df6b07 --- /dev/null +++ b/Frameworks/RSParser/Feeds/FeedParser.h @@ -0,0 +1,24 @@ +// +// FeedParser.h +// RSXML +// +// Created by Brent Simmons on 7/12/15. +// Copyright © 2015 Ranchero Software, LLC. All rights reserved. +// + +@import Foundation; + +@class RSParsedFeed; +@class RSXMLData; + + +@protocol FeedParser + ++ (BOOL)canParseFeed:(RSXMLData * _Nonnull)xmlData; + +- (nonnull instancetype)initWithXMLData:(RSXMLData * _Nonnull)xmlData; + +- (nullable RSParsedFeed *)parseFeed:(NSError * _Nullable * _Nullable)error; + + +@end diff --git a/Frameworks/RSParser/Feeds/RSFeedParser.h b/Frameworks/RSParser/Feeds/RSFeedParser.h new file mode 100755 index 000000000..40b2fef4f --- /dev/null +++ b/Frameworks/RSParser/Feeds/RSFeedParser.h @@ -0,0 +1,28 @@ +// +// RSFeedParser.h +// RSXML +// +// Created by Brent Simmons on 1/4/15. +// Copyright (c) 2015 Ranchero Software LLC. All rights reserved. +// + +#import "FeedParser.h" + +// If you have a feed and don’t know or care what it is (RSS or Atom), +// then call RSParseFeed or RSParseFeedSync. + +@class RSXMLData; +@class RSParsedFeed; + +NS_ASSUME_NONNULL_BEGIN + +BOOL RSCanParseFeed(RSXMLData *xmlData); + + +typedef void (^RSParsedFeedBlock)(RSParsedFeed * _Nullable parsedFeed, NSError * _Nullable error); + +// callback is called on main queue. +void RSParseFeed(RSXMLData *xmlData, RSParsedFeedBlock callback); +RSParsedFeed * _Nullable RSParseFeedSync(RSXMLData *xmlData, NSError * _Nullable * _Nullable error); + +NS_ASSUME_NONNULL_END diff --git a/Frameworks/RSParser/Feeds/RSFeedParser.m b/Frameworks/RSParser/Feeds/RSFeedParser.m new file mode 100755 index 000000000..d4fee6ca1 --- /dev/null +++ b/Frameworks/RSParser/Feeds/RSFeedParser.m @@ -0,0 +1,216 @@ +// +// FeedParser.m +// RSXML +// +// Created by Brent Simmons on 1/4/15. +// Copyright (c) 2015 Ranchero Software LLC. All rights reserved. +// + +#import "RSFeedParser.h" +#import "FeedParser.h" +#import "RSXMLData.h" +#import "RSRSSParser.h" +#import "RSAtomParser.h" + +static NSArray *parserClasses(void) { + + static NSArray *gParserClasses = nil; + + static dispatch_once_t onceToken; + dispatch_once(&onceToken, ^{ + + gParserClasses = @[[RSRSSParser class], [RSAtomParser class]]; + }); + + return gParserClasses; +} + +static BOOL feedMayBeParseable(RSXMLData *xmlData) { + + /*Sanity checks.*/ + + if (!xmlData.data) { + return NO; + } + + /*TODO: check size, type, etc.*/ + + return YES; +} + +static BOOL optimisticCanParseRSSData(const char *bytes, NSUInteger numberOfBytes); +static BOOL optimisticCanParseAtomData(const char *bytes, NSUInteger numberOfBytes); +static BOOL optimisticCanParseRDF(const char *bytes, NSUInteger numberOfBytes); +static BOOL dataIsProbablyHTML(const char *bytes, NSUInteger numberOfBytes); +static BOOL dataIsSomeWeirdException(const char *bytes, NSUInteger numberOfBytes); +static BOOL dataHasLeftCaret(const char *bytes, NSUInteger numberOfBytes); + +static const NSUInteger maxNumberOfBytesToSearch = 4096; +static const NSUInteger minNumberOfBytesToSearch = 20; + +static Class parserClassForXMLData(RSXMLData *xmlData) { + + if (!feedMayBeParseable(xmlData)) { + return nil; + } + + // TODO: check for things like images and movies and return nil. + + const char *bytes = xmlData.data.bytes; + NSUInteger numberOfBytes = xmlData.data.length; + + if (numberOfBytes > minNumberOfBytesToSearch) { + + if (numberOfBytes > maxNumberOfBytesToSearch) { + numberOfBytes = maxNumberOfBytesToSearch; + } + + if (!dataHasLeftCaret(bytes, numberOfBytes)) { + return nil; + } + + if (optimisticCanParseRSSData(bytes, numberOfBytes)) { + return [RSRSSParser class]; + } + if (optimisticCanParseAtomData(bytes, numberOfBytes)) { + return [RSAtomParser class]; + } + + if (optimisticCanParseRDF(bytes, numberOfBytes)) { + return nil; //TODO: parse RDF feeds + } + + if (dataIsProbablyHTML(bytes, numberOfBytes)) { + return nil; + } + if (dataIsSomeWeirdException(bytes, numberOfBytes)) { + return nil; + } + } + + for (Class parserClass in parserClasses()) { + if ([parserClass canParseFeed:xmlData]) { + return [[parserClass alloc] initWithXMLData:xmlData]; + } + } + + return nil; +} + +static id parserForXMLData(RSXMLData *xmlData) { + + Class parserClass = parserClassForXMLData(xmlData); + if (!parserClass) { + return nil; + } + return [[parserClass alloc] initWithXMLData:xmlData]; +} + +static BOOL canParseXMLData(RSXMLData *xmlData) { + + return parserClassForXMLData(xmlData) != nil; +} + +static BOOL didFindString(const char *string, const char *bytes, NSUInteger numberOfBytes) { + + char *foundString = strnstr(bytes, string, numberOfBytes); + return foundString != NULL; +} + +static BOOL dataHasLeftCaret(const char *bytes, NSUInteger numberOfBytes) { + + return didFindString("<", bytes, numberOfBytes); +} + +static BOOL dataIsProbablyHTML(const char *bytes, NSUInteger numberOfBytes) { + + // Won’t catch every single case, which is fine. + + if (didFindString("* _Nonnull)articles; + +@property (nonatomic, readonly, nonnull) NSString *urlString; +@property (nonatomic, readonly, nullable) NSString *title; +@property (nonatomic, readonly, nullable) NSString *link; +@property (nonatomic, readonly, nonnull) NSSet *articles; + +@end diff --git a/Frameworks/RSParser/Feeds/RSParsedFeed.m b/Frameworks/RSParser/Feeds/RSParsedFeed.m new file mode 100755 index 000000000..330b7da3c --- /dev/null +++ b/Frameworks/RSParser/Feeds/RSParsedFeed.m @@ -0,0 +1,29 @@ +// +// RSParsedFeed.m +// RSXML +// +// Created by Brent Simmons on 7/12/15. +// Copyright © 2015 Ranchero Software, LLC. All rights reserved. +// + +#import "RSParsedFeed.h" + +@implementation RSParsedFeed + +- (instancetype)initWithURLString:(NSString *)urlString title:(NSString *)title link:(NSString *)link articles:(NSSet *)articles { + + self = [super init]; + if (!self) { + return nil; + } + + _urlString = urlString; + _title = title; + _link = link; + _articles = articles; + + return self; +} + + +@end diff --git a/Frameworks/RSParser/HTML/RSHTMLLinkParser.h b/Frameworks/RSParser/HTML/RSHTMLLinkParser.h new file mode 100755 index 000000000..b8ec138ce --- /dev/null +++ b/Frameworks/RSParser/HTML/RSHTMLLinkParser.h @@ -0,0 +1,31 @@ +// +// RSHTMLLinkParser.h +// RSXML +// +// Created by Brent Simmons on 8/7/16. +// Copyright © 2016 Ranchero Software, LLC. All rights reserved. +// + +@import Foundation; + +/*Returns all some_text as RSHTMLLink object array.*/ + +@class RSXMLData; +@class RSHTMLLink; + +@interface RSHTMLLinkParser : NSObject + ++ (NSArray *)htmlLinksWithData:(RSXMLData *)xmlData; + +@end + + +@interface RSHTMLLink : NSObject + +// Any of these, even urlString, may be nil, because HTML can be bad. + +@property (nonatomic, readonly) NSString *urlString; //absolute +@property (nonatomic, readonly) NSString *text; +@property (nonatomic, readonly) NSString *title; //title attribute inside anchor tag + +@end diff --git a/Frameworks/RSParser/HTML/RSHTMLLinkParser.m b/Frameworks/RSParser/HTML/RSHTMLLinkParser.m new file mode 100755 index 000000000..23ab6bbf9 --- /dev/null +++ b/Frameworks/RSParser/HTML/RSHTMLLinkParser.m @@ -0,0 +1,151 @@ +// +// RSHTMLLinkParser.m +// RSXML +// +// Created by Brent Simmons on 8/7/16. +// Copyright © 2016 Ranchero Software, LLC. All rights reserved. +// + +#import +#import "RSHTMLLinkParser.h" +#import "RSSAXHTMLParser.h" +#import "RSSAXParser.h" +#import "RSXMLData.h" +#import "RSXMLInternal.h" + + +@interface RSHTMLLinkParser() + +@property (nonatomic, readonly) NSMutableArray *links; +@property (nonatomic, readonly) RSXMLData *xmlData; +@property (nonatomic, readonly) NSMutableArray *dictionaries; +@property (nonatomic, readonly) NSURL *baseURL; + +@end + + +@interface RSHTMLLink() + +@property (nonatomic, readwrite) NSString *urlString; //absolute +@property (nonatomic, readwrite) NSString *text; +@property (nonatomic, readwrite) NSString *title; //title attribute inside anchor tag + +@end + + +@implementation RSHTMLLinkParser + + +#pragma mark - Class Methods + ++ (NSArray *)htmlLinksWithData:(RSXMLData *)xmlData { + + RSHTMLLinkParser *parser = [[self alloc] initWithXMLData:xmlData]; + return parser.links; +} + + +#pragma mark - Init + +- (instancetype)initWithXMLData:(RSXMLData *)xmlData { + + NSParameterAssert(xmlData.data); + NSParameterAssert(xmlData.urlString); + + self = [super init]; + if (!self) { + return nil; + } + + _links = [NSMutableArray new]; + _xmlData = xmlData; + _dictionaries = [NSMutableArray new]; + _baseURL = [NSURL URLWithString:xmlData.urlString]; + + [self parse]; + + return self; +} + + +#pragma mark - Parse + +- (void)parse { + + RSSAXHTMLParser *parser = [[RSSAXHTMLParser alloc] initWithDelegate:self]; + [parser parseData:self.xmlData.data]; + [parser finishParsing]; +} + + +- (RSHTMLLink *)currentLink { + + return self.links.lastObject; +} + + +static NSString *kHrefKey = @"href"; + +- (NSString *)urlStringFromDictionary:(NSDictionary *)d { + + NSString *href = [d rsxml_objectForCaseInsensitiveKey:kHrefKey]; + if (!href) { + return nil; + } + + NSURL *absoluteURL = [NSURL URLWithString:href relativeToURL:self.baseURL]; + return absoluteURL.absoluteString; +} + + +static NSString *kTitleKey = @"title"; + +- (NSString *)titleFromDictionary:(NSDictionary *)d { + + return [d rsxml_objectForCaseInsensitiveKey:kTitleKey]; +} + + +- (void)handleLinkAttributes:(NSDictionary *)d { + + RSHTMLLink *link = self.currentLink; + link.urlString = [self urlStringFromDictionary:d]; + link.title = [self titleFromDictionary:d]; +} + + +static const char *kAnchor = "a"; +static const NSInteger kAnchorLength = 2; + +- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLStartElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes { + + if (!RSSAXEqualTags(localName, kAnchor, kAnchorLength)) { + return; + } + + RSHTMLLink *link = [RSHTMLLink new]; + [self.links addObject:link]; + + NSDictionary *d = [SAXParser attributesDictionary:attributes]; + if (!RSXMLIsEmpty(d)) { + [self handleLinkAttributes:d]; + } + + [SAXParser beginStoringCharacters]; +} + + +- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName { + + if (!RSSAXEqualTags(localName, kAnchor, kAnchorLength)) { + return; + } + + self.currentLink.text = SAXParser.currentStringWithTrimmedWhitespace; +} + +@end + +@implementation RSHTMLLink + +@end diff --git a/Frameworks/RSParser/HTML/RSHTMLMetadata.h b/Frameworks/RSParser/HTML/RSHTMLMetadata.h new file mode 100755 index 000000000..42a370563 --- /dev/null +++ b/Frameworks/RSParser/HTML/RSHTMLMetadata.h @@ -0,0 +1,45 @@ +// +// RSHTMLMetadata.h +// RSXML +// +// Created by Brent Simmons on 3/6/16. +// Copyright © 2016 Ranchero Software, LLC. All rights reserved. +// + +@import Foundation; + +@class RSHTMLMetadataFeedLink; +@class RSHTMLMetadataAppleTouchIcon; + + +@interface RSHTMLMetadata : NSObject + +- (instancetype)initWithURLString:(NSString *)urlString dictionaries:(NSArray *)dictionaries; + +@property (nonatomic, readonly) NSString *baseURLString; +@property (nonatomic, readonly) NSArray *dictionaries; + +@property (nonatomic, readonly) NSString *faviconLink; +@property (nonatomic, readonly) NSArray *appleTouchIcons; +@property (nonatomic, readonly) NSArray *feedLinks; + +@end + + +@interface RSHTMLMetadataAppleTouchIcon : NSObject + +@property (nonatomic, readonly) NSString *rel; +@property (nonatomic, readonly) NSString *sizes; +@property (nonatomic, readonly) NSString *urlString; // Absolute. + +@end + + +@interface RSHTMLMetadataFeedLink : NSObject + +@property (nonatomic, readonly) NSString *title; +@property (nonatomic, readonly) NSString *type; +@property (nonatomic, readonly) NSString *urlString; // Absolute. + +@end + diff --git a/Frameworks/RSParser/HTML/RSHTMLMetadata.m b/Frameworks/RSParser/HTML/RSHTMLMetadata.m new file mode 100755 index 000000000..a5c4b0d71 --- /dev/null +++ b/Frameworks/RSParser/HTML/RSHTMLMetadata.m @@ -0,0 +1,245 @@ +// +// RSHTMLMetadata.m +// RSXML +// +// Created by Brent Simmons on 3/6/16. +// Copyright © 2016 Ranchero Software, LLC. All rights reserved. +// + +#import "RSHTMLMetadata.h" +#import "RSXMLInternal.h" + +static NSString *urlStringFromDictionary(NSDictionary *d); +static NSString *absoluteURLStringWithRelativeURLString(NSString *relativeURLString, NSString *baseURLString); +static NSString *absoluteURLStringWithDictionary(NSDictionary *d, NSString *baseURLString); +static NSArray *objectsOfClassWithDictionaries(Class class, NSArray *dictionaries, NSString *baseURLString); +static NSString *relValue(NSDictionary *d); +static BOOL typeIsFeedType(NSString *type); + +static NSString *kShortcutIconRelValue = @"shortcut icon"; +static NSString *kHrefKey = @"href"; +static NSString *kSrcKey = @"src"; +static NSString *kAppleTouchIconValue = @"apple-touch-icon"; +static NSString *kAppleTouchIconPrecomposedValue = @"apple-touch-icon-precomposed"; +static NSString *kSizesKey = @"sizes"; +static NSString *kTitleKey = @"title"; +static NSString *kRelKey = @"rel"; +static NSString *kAlternateKey = @"alternate"; +static NSString *kRSSSuffix = @"/rss+xml"; +static NSString *kAtomSuffix = @"/atom+xml"; +static NSString *kTypeKey = @"type"; + +@interface RSHTMLMetadataAppleTouchIcon () + +- (instancetype)initWithDictionary:(NSDictionary *)d baseURLString:(NSString *)baseURLString; + +@end + + +@interface RSHTMLMetadataFeedLink () + +- (instancetype)initWithDictionary:(NSDictionary *)d baseURLString:(NSString *)baseURLString; + +@end + + +@implementation RSHTMLMetadata + + +#pragma mark - Init + +- (instancetype)initWithURLString:(NSString *)urlString dictionaries:(NSArray *)dictionaries { + + self = [super init]; + if (!self) { + return nil; + } + + _baseURLString = urlString; + _dictionaries = dictionaries; + _faviconLink = [self resolvedLinkFromFirstDictionaryWithMatchingRel:kShortcutIconRelValue]; + + NSArray *appleTouchIconDictionaries = [self appleTouchIconDictionaries]; + _appleTouchIcons = objectsOfClassWithDictionaries([RSHTMLMetadataAppleTouchIcon class], appleTouchIconDictionaries, urlString); + + NSArray *feedLinkDictionaries = [self feedLinkDictionaries]; + _feedLinks = objectsOfClassWithDictionaries([RSHTMLMetadataFeedLink class], feedLinkDictionaries, urlString); + + return self; +} + + +#pragma mark - Private + +- (NSDictionary *)firstDictionaryWithMatchingRel:(NSString *)valueToMatch { + + // Case-insensitive. + + for (NSDictionary *oneDictionary in self.dictionaries) { + + NSString *oneRelValue = relValue(oneDictionary); + if (oneRelValue && [oneRelValue compare:valueToMatch options:NSCaseInsensitiveSearch] == NSOrderedSame) { + return oneDictionary; + } + } + + return nil; +} + + +- (NSArray *)appleTouchIconDictionaries { + + NSMutableArray *dictionaries = [NSMutableArray new]; + + for (NSDictionary *oneDictionary in self.dictionaries) { + + NSString *oneRelValue = relValue(oneDictionary).lowercaseString; + if ([oneRelValue isEqualToString:kAppleTouchIconValue] || [oneRelValue isEqualToString:kAppleTouchIconPrecomposedValue]) { + [dictionaries addObject:oneDictionary]; + } + } + + return dictionaries; +} + + +- (NSArray *)feedLinkDictionaries { + + NSMutableArray *dictionaries = [NSMutableArray new]; + + for (NSDictionary *oneDictionary in self.dictionaries) { + + NSString *oneRelValue = relValue(oneDictionary).lowercaseString; + if (![oneRelValue isEqualToString:kAlternateKey]) { + continue; + } + + NSString *oneType = [oneDictionary rsxml_objectForCaseInsensitiveKey:kTypeKey]; + if (!typeIsFeedType(oneType)) { + continue; + } + + if (RSXMLStringIsEmpty(urlStringFromDictionary(oneDictionary))) { + continue; + } + + [dictionaries addObject:oneDictionary]; + } + + return dictionaries; +} + + +- (NSString *)resolvedLinkFromFirstDictionaryWithMatchingRel:(NSString *)relValue { + + NSDictionary *d = [self firstDictionaryWithMatchingRel:relValue]; + return absoluteURLStringWithDictionary(d, self.baseURLString); +} + + +@end + + +static NSString *relValue(NSDictionary *d) { + + return [d rsxml_objectForCaseInsensitiveKey:kRelKey]; +} + + +static NSString *urlStringFromDictionary(NSDictionary *d) { + + NSString *urlString = [d rsxml_objectForCaseInsensitiveKey:kHrefKey]; + if (urlString) { + return urlString; + } + + return [d rsxml_objectForCaseInsensitiveKey:kSrcKey]; +} + + +static NSString *absoluteURLStringWithRelativeURLString(NSString *relativeURLString, NSString *baseURLString) { + + NSURL *url = [NSURL URLWithString:baseURLString]; + if (!url) { + return nil; + } + + NSURL *absoluteURL = [NSURL URLWithString:relativeURLString relativeToURL:url]; + return absoluteURL.absoluteString; +} + + +static NSString *absoluteURLStringWithDictionary(NSDictionary *d, NSString *baseURLString) { + + NSString *urlString = urlStringFromDictionary(d); + if (RSXMLStringIsEmpty(urlString)) { + return nil; + } + return absoluteURLStringWithRelativeURLString(urlString, baseURLString); +} + + +static NSArray *objectsOfClassWithDictionaries(Class class, NSArray *dictionaries, NSString *baseURLString) { + + NSMutableArray *objects = [NSMutableArray new]; + + for (NSDictionary *oneDictionary in dictionaries) { + + id oneObject = [[class alloc] initWithDictionary:oneDictionary baseURLString:baseURLString]; + if (oneObject) { + [objects addObject:oneObject]; + } + } + + return [objects copy]; +} + + +static BOOL typeIsFeedType(NSString *type) { + + type = type.lowercaseString; + return [type hasSuffix:kRSSSuffix] || [type hasSuffix:kAtomSuffix]; +} + + +@implementation RSHTMLMetadataAppleTouchIcon + + +- (instancetype)initWithDictionary:(NSDictionary *)d baseURLString:(NSString *)baseURLString { + + self = [super init]; + if (!self) { + return nil; + } + + _urlString = absoluteURLStringWithDictionary(d, baseURLString); + _sizes = [d rsxml_objectForCaseInsensitiveKey:kSizesKey]; + _rel = [d rsxml_objectForCaseInsensitiveKey:kRelKey]; + + return self; +} + + +@end + + +@implementation RSHTMLMetadataFeedLink + + +- (instancetype)initWithDictionary:(NSDictionary *)d baseURLString:(NSString *)baseURLString { + + self = [super init]; + if (!self) { + return nil; + } + + _urlString = absoluteURLStringWithDictionary(d, baseURLString); + _title = [d rsxml_objectForCaseInsensitiveKey:kTitleKey]; + _type = [d rsxml_objectForCaseInsensitiveKey:kTypeKey]; + + return self; +} + + +@end + diff --git a/Frameworks/RSParser/HTML/RSHTMLMetadataParser.h b/Frameworks/RSParser/HTML/RSHTMLMetadataParser.h new file mode 100755 index 000000000..310de9f44 --- /dev/null +++ b/Frameworks/RSParser/HTML/RSHTMLMetadataParser.h @@ -0,0 +1,28 @@ +// +// RSHTMLMetadataParser.h +// RSXML +// +// Created by Brent Simmons on 3/6/16. +// Copyright © 2016 Ranchero Software, LLC. All rights reserved. +// + +@import Foundation; + + +@class RSHTMLMetadata; +@class RSXMLData; + +NS_ASSUME_NONNULL_BEGIN + +@interface RSHTMLMetadataParser : NSObject + ++ (RSHTMLMetadata *)HTMLMetadataWithXMLData:(RSXMLData *)xmlData; + +- (instancetype)initWithXMLData:(RSXMLData *)xmlData; + +@property (nonatomic, readonly) RSHTMLMetadata *metadata; + + +@end + +NS_ASSUME_NONNULL_END diff --git a/Frameworks/RSParser/HTML/RSHTMLMetadataParser.m b/Frameworks/RSParser/HTML/RSHTMLMetadataParser.m new file mode 100755 index 000000000..ad8f957ee --- /dev/null +++ b/Frameworks/RSParser/HTML/RSHTMLMetadataParser.m @@ -0,0 +1,129 @@ +// +// RSHTMLMetadataParser.m +// RSXML +// +// Created by Brent Simmons on 3/6/16. +// Copyright © 2016 Ranchero Software, LLC. All rights reserved. +// + +#import +#import "RSHTMLMetadataParser.h" +#import "RSXMLData.h" +#import "RSHTMLMetadata.h" +#import "RSSAXHTMLParser.h" +#import "RSSAXParser.h" +#import "RSXMLInternal.h" + + +@interface RSHTMLMetadataParser () + +@property (nonatomic, readonly) RSXMLData *xmlData; +@property (nonatomic, readwrite) RSHTMLMetadata *metadata; +@property (nonatomic) NSMutableArray *dictionaries; +@property (nonatomic) BOOL didFinishParsing; + +@end + + +@implementation RSHTMLMetadataParser + + +#pragma mark - Class Methods + ++ (RSHTMLMetadata *)HTMLMetadataWithXMLData:(RSXMLData *)xmlData { + + RSHTMLMetadataParser *parser = [[self alloc] initWithXMLData:xmlData]; + return parser.metadata; +} + + +#pragma mark - Init + +- (instancetype)initWithXMLData:(RSXMLData *)xmlData { + + NSParameterAssert(xmlData.data); + NSParameterAssert(xmlData.urlString); + + self = [super init]; + if (!self) { + return nil; + } + + _xmlData = xmlData; + _dictionaries = [NSMutableArray new]; + + [self parse]; + + return self; +} + + +#pragma mark - Parse + +- (void)parse { + + RSSAXHTMLParser *parser = [[RSSAXHTMLParser alloc] initWithDelegate:self]; + [parser parseData:self.xmlData.data]; + [parser finishParsing]; + + self.metadata = [[RSHTMLMetadata alloc] initWithURLString:self.xmlData.urlString dictionaries:[self.dictionaries copy]]; +} + + +static NSString *kHrefKey = @"href"; +static NSString *kSrcKey = @"src"; +static NSString *kRelKey = @"rel"; + +- (NSString *)linkForDictionary:(NSDictionary *)d { + + NSString *link = [d rsxml_objectForCaseInsensitiveKey:kHrefKey]; + if (link) { + return link; + } + + return [d rsxml_objectForCaseInsensitiveKey:kSrcKey]; +} + + +- (void)handleLinkAttributes:(NSDictionary *)d { + + if (RSXMLStringIsEmpty([d rsxml_objectForCaseInsensitiveKey:kRelKey])) { + return; + } + if (RSXMLStringIsEmpty([self linkForDictionary:d])) { + return; + } + + [self.dictionaries addObject:d]; +} + + +#pragma mark - RSSAXHTMLParserDelegate + +static const char *kBody = "body"; +static const NSInteger kBodyLength = 5; +static const char *kLink = "link"; +static const NSInteger kLinkLength = 5; + +- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLStartElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes { + + if (self.didFinishParsing) { + return; + } + + if (RSSAXEqualTags(localName, kBody, kBodyLength)) { + self.didFinishParsing = YES; + return; + } + + if (!RSSAXEqualTags(localName, kLink, kLinkLength)) { + return; + } + + NSDictionary *d = [SAXParser attributesDictionary:attributes]; + if (!RSXMLIsEmpty(d)) { + [self handleLinkAttributes:d]; + } +} + +@end diff --git a/Frameworks/RSParser/HTML/RSSAXHTMLParser.h b/Frameworks/RSParser/HTML/RSSAXHTMLParser.h new file mode 100755 index 000000000..46305c12a --- /dev/null +++ b/Frameworks/RSParser/HTML/RSSAXHTMLParser.h @@ -0,0 +1,49 @@ +// +// RSSAXHTMLParser.h +// RSXML +// +// Created by Brent Simmons on 3/6/16. +// Copyright © 2016 Ranchero Software, LLC. All rights reserved. +// + +@import Foundation; + +@class RSSAXHTMLParser; + +@protocol RSSAXHTMLParserDelegate + +@optional + +- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLStartElement:(const unsigned char *)localName attributes:(const unsigned char **)attributes; + +- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLEndElement:(const unsigned char *)localName; + +- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLCharactersFound:(const unsigned char *)characters length:(NSUInteger)length; + +- (void)saxParserDidReachEndOfDocument:(RSSAXHTMLParser *)SAXParser; // If canceled, may not get called (but might). + +@end + + +@interface RSSAXHTMLParser : NSObject + + +- (instancetype)initWithDelegate:(id)delegate; + +- (void)parseData:(NSData *)data; +- (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes; +- (void)finishParsing; +- (void)cancel; + +@property (nonatomic, strong, readonly) NSData *currentCharacters; // nil if not storing characters. UTF-8 encoded. +@property (nonatomic, strong, readonly) NSString *currentString; // Convenience to get string version of currentCharacters. +@property (nonatomic, strong, readonly) NSString *currentStringWithTrimmedWhitespace; + +- (void)beginStoringCharacters; // Delegate can call from XMLStartElement. Characters will be available in XMLEndElement as currentCharacters property. Storing characters is stopped after each XMLEndElement. + +// Delegate can call from within XMLStartElement. + +- (NSDictionary *)attributesDictionary:(const unsigned char **)attributes; + + +@end diff --git a/Frameworks/RSParser/HTML/RSSAXHTMLParser.m b/Frameworks/RSParser/HTML/RSSAXHTMLParser.m new file mode 100755 index 000000000..17f0b6221 --- /dev/null +++ b/Frameworks/RSParser/HTML/RSSAXHTMLParser.m @@ -0,0 +1,315 @@ +// +// RSSAXHTMLParser.m +// RSXML +// +// Created by Brent Simmons on 3/6/16. +// Copyright © 2016 Ranchero Software, LLC. All rights reserved. +// + +#import "RSSAXHTMLParser.h" +#import "RSSAXParser.h" +#import +#import +#import +#import "RSXMLInternal.h" + + +@interface RSSAXHTMLParser () + +@property (nonatomic) id delegate; +@property (nonatomic, assign) htmlParserCtxtPtr context; +@property (nonatomic, assign) BOOL storingCharacters; +@property (nonatomic) NSMutableData *characters; +@property (nonatomic) BOOL delegateRespondsToStartElementMethod; +@property (nonatomic) BOOL delegateRespondsToEndElementMethod; +@property (nonatomic) BOOL delegateRespondsToCharactersFoundMethod; +@property (nonatomic) BOOL delegateRespondsToEndOfDocumentMethod; + +@end + + +@implementation RSSAXHTMLParser + + ++ (void)initialize { + + RSSAXInitLibXMLParser(); +} + + +#pragma mark - Init + +- (instancetype)initWithDelegate:(id)delegate { + + self = [super init]; + if (self == nil) + return nil; + + _delegate = delegate; + + if ([_delegate respondsToSelector:@selector(saxParser:XMLStartElement:attributes:)]) { + _delegateRespondsToStartElementMethod = YES; + } + if ([_delegate respondsToSelector:@selector(saxParser:XMLEndElement:)]) { + _delegateRespondsToEndElementMethod = YES; + } + if ([_delegate respondsToSelector:@selector(saxParser:XMLCharactersFound:length:)]) { + _delegateRespondsToCharactersFoundMethod = YES; + } + if ([_delegate respondsToSelector:@selector(saxParserDidReachEndOfDocument:)]) { + _delegateRespondsToEndOfDocumentMethod = YES; + } + + return self; +} + + +#pragma mark - Dealloc + +- (void)dealloc { + + if (_context != nil) { + htmlFreeParserCtxt(_context); + _context = nil; + } + _delegate = nil; +} + + +#pragma mark - API + +static xmlSAXHandler saxHandlerStruct; + +- (void)parseData:(NSData *)data { + + [self parseBytes:data.bytes numberOfBytes:data.length]; +} + + +- (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes { + + if (self.context == nil) { + + xmlCharEncoding characterEncoding = xmlDetectCharEncoding(bytes, (int)numberOfBytes); + self.context = htmlCreatePushParserCtxt(&saxHandlerStruct, (__bridge void *)self, nil, 0, nil, characterEncoding); + htmlCtxtUseOptions(self.context, XML_PARSE_RECOVER | XML_PARSE_NONET | HTML_PARSE_COMPACT); + } + + @autoreleasepool { + htmlParseChunk(self.context, (const char *)bytes, (int)numberOfBytes, 0); + } +} + + +- (void)finishParsing { + + NSAssert(self.context != nil, nil); + if (self.context == nil) + return; + + @autoreleasepool { + htmlParseChunk(self.context, nil, 0, 1); + htmlFreeParserCtxt(self.context); + self.context = nil; + self.characters = nil; + } +} + + +- (void)cancel { + + @autoreleasepool { + xmlStopParser(self.context); + } +} + + + +- (void)beginStoringCharacters { + self.storingCharacters = YES; + self.characters = [NSMutableData new]; +} + + +- (void)endStoringCharacters { + self.storingCharacters = NO; + self.characters = nil; +} + + +- (NSData *)currentCharacters { + + if (!self.storingCharacters) { + return nil; + } + + return self.characters; +} + + +- (NSString *)currentString { + + NSData *d = self.currentCharacters; + if (RSXMLIsEmpty(d)) { + return nil; + } + + return [[NSString alloc] initWithData:d encoding:NSUTF8StringEncoding]; +} + + +- (NSString *)currentStringWithTrimmedWhitespace { + + return [self.currentString stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]]; +} + + +#pragma mark - Attributes Dictionary + +- (NSDictionary *)attributesDictionary:(const xmlChar **)attributes { + + if (!attributes) { + return nil; + } + + NSMutableDictionary *d = [NSMutableDictionary new]; + + NSInteger ix = 0; + NSString *currentKey = nil; + while (true) { + + const xmlChar *oneAttribute = attributes[ix]; + ix++; + + if (!currentKey && !oneAttribute) { + break; + } + + if (!currentKey) { + currentKey = [NSString stringWithUTF8String:(const char *)oneAttribute]; + } + else { + NSString *value = nil; + if (oneAttribute) { + value = [NSString stringWithUTF8String:(const char *)oneAttribute]; + } + + d[currentKey] = value ? value : @""; + currentKey = nil; + } + } + + return [d copy]; +} + + +#pragma mark - Callbacks + +- (void)xmlEndDocument { + + @autoreleasepool { + if (self.delegateRespondsToEndOfDocumentMethod) { + [self.delegate saxParserDidReachEndOfDocument:self]; + } + + [self endStoringCharacters]; + } +} + + +- (void)xmlCharactersFound:(const xmlChar *)ch length:(NSUInteger)length { + + @autoreleasepool { + if (self.storingCharacters) { + [self.characters appendBytes:(const void *)ch length:length]; + } + + if (self.delegateRespondsToCharactersFoundMethod) { + [self.delegate saxParser:self XMLCharactersFound:ch length:length]; + } + } +} + + +- (void)xmlStartElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes { + + @autoreleasepool { + if (self.delegateRespondsToStartElementMethod) { + + [self.delegate saxParser:self XMLStartElement:localName attributes:attributes]; + } + } +} + + +- (void)xmlEndElement:(const xmlChar *)localName { + + @autoreleasepool { + if (self.delegateRespondsToEndElementMethod) { + [self.delegate saxParser:self XMLEndElement:localName]; + } + + [self endStoringCharacters]; + } +} + + +@end + + +static void startElementSAX(void *context, const xmlChar *localname, const xmlChar **attributes) { + + [(__bridge RSSAXHTMLParser *)context xmlStartElement:localname attributes:attributes]; +} + + +static void endElementSAX(void *context, const xmlChar *localname) { + [(__bridge RSSAXHTMLParser *)context xmlEndElement:localname]; +} + + +static void charactersFoundSAX(void *context, const xmlChar *ch, int len) { + [(__bridge RSSAXHTMLParser *)context xmlCharactersFound:ch length:(NSUInteger)len]; +} + + +static void endDocumentSAX(void *context) { + [(__bridge RSSAXHTMLParser *)context xmlEndDocument]; +} + + +static htmlSAXHandler saxHandlerStruct = { + nil, /* internalSubset */ + nil, /* isStandalone */ + nil, /* hasInternalSubset */ + nil, /* hasExternalSubset */ + nil, /* resolveEntity */ + nil, /* getEntity */ + nil, /* entityDecl */ + nil, /* notationDecl */ + nil, /* attributeDecl */ + nil, /* elementDecl */ + nil, /* unparsedEntityDecl */ + nil, /* setDocumentLocator */ + nil, /* startDocument */ + endDocumentSAX, /* endDocument */ + startElementSAX, /* startElement*/ + endElementSAX, /* endElement */ + nil, /* reference */ + charactersFoundSAX, /* characters */ + nil, /* ignorableWhitespace */ + nil, /* processingInstruction */ + nil, /* comment */ + nil, /* warning */ + nil, /* error */ + nil, /* fatalError //: unused error() get all the errors */ + nil, /* getParameterEntity */ + nil, /* cdataBlock */ + nil, /* externalSubset */ + XML_SAX2_MAGIC, + nil, + nil, /* startElementNs */ + nil, /* endElementNs */ + nil /* serror */ +}; + diff --git a/Frameworks/RSParser/Info.plist b/Frameworks/RSParser/Info.plist new file mode 100644 index 000000000..b3516740c --- /dev/null +++ b/Frameworks/RSParser/Info.plist @@ -0,0 +1,26 @@ + + + + + CFBundleDevelopmentRegion + $(DEVELOPMENT_LANGUAGE) + CFBundleExecutable + $(EXECUTABLE_NAME) + CFBundleIdentifier + $(PRODUCT_BUNDLE_IDENTIFIER) + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + $(PRODUCT_NAME) + CFBundlePackageType + FMWK + CFBundleShortVersionString + 1.0 + CFBundleVersion + $(CURRENT_PROJECT_VERSION) + NSHumanReadableCopyright + Copyright © 2017 Ranchero Software, LLC. All rights reserved. + NSPrincipalClass + + + diff --git a/Frameworks/RSParser/RSParser.h b/Frameworks/RSParser/RSParser.h new file mode 100644 index 000000000..7ffc0324e --- /dev/null +++ b/Frameworks/RSParser/RSParser.h @@ -0,0 +1,40 @@ +// +// RSParser.h +// RSParser +// +// Created by Brent Simmons on 6/20/17. +// Copyright © 2017 Ranchero Software, LLC. All rights reserved. +// + +@import Foundation; + + +#import +#import + +#import +#import +#import +#import +#import +#import + +#import +#import +#import +#import +#import + +#import + +#import +#import + +// HTML + +#import + +#import +#import +#import + diff --git a/Frameworks/RSParser/RSParser.xcodeproj/project.pbxproj b/Frameworks/RSParser/RSParser.xcodeproj/project.pbxproj new file mode 100644 index 000000000..676f908ae --- /dev/null +++ b/Frameworks/RSParser/RSParser.xcodeproj/project.pbxproj @@ -0,0 +1,420 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 48; + objects = { + +/* Begin PBXBuildFile section */ + 84D81BDC1EFA28E700652332 /* RSParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 84D81BDA1EFA28E700652332 /* RSParser.h */; settings = {ATTRIBUTES = (Public, ); }; }; + 84FF5F8E1EFA285800C15A01 /* RSParser.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 84FF5F841EFA285800C15A01 /* RSParser.framework */; }; + 84FF5F931EFA285800C15A01 /* RSParserTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84FF5F921EFA285800C15A01 /* RSParserTests.swift */; }; +/* End PBXBuildFile section */ + +/* Begin PBXContainerItemProxy section */ + 84FF5F8F1EFA285800C15A01 /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 84FF5F7B1EFA285800C15A01 /* Project object */; + proxyType = 1; + remoteGlobalIDString = 84FF5F831EFA285800C15A01; + remoteInfo = RSParser; + }; +/* End PBXContainerItemProxy section */ + +/* Begin PBXFileReference section */ + 84D81BD91EFA28E700652332 /* Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; + 84D81BDA1EFA28E700652332 /* RSParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RSParser.h; sourceTree = ""; }; + 84FF5F841EFA285800C15A01 /* RSParser.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = RSParser.framework; sourceTree = BUILT_PRODUCTS_DIR; }; + 84FF5F8D1EFA285800C15A01 /* RSParserTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = RSParserTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; + 84FF5F921EFA285800C15A01 /* RSParserTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RSParserTests.swift; sourceTree = ""; }; + 84FF5F941EFA285800C15A01 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 84FF5F801EFA285800C15A01 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 84FF5F8A1EFA285800C15A01 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 84FF5F8E1EFA285800C15A01 /* RSParser.framework in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 84FF5F7A1EFA285800C15A01 = { + isa = PBXGroup; + children = ( + 84D81BDA1EFA28E700652332 /* RSParser.h */, + 84D81BD91EFA28E700652332 /* Info.plist */, + 84FF5F911EFA285800C15A01 /* RSParserTests */, + 84FF5F851EFA285800C15A01 /* Products */, + ); + sourceTree = ""; + }; + 84FF5F851EFA285800C15A01 /* Products */ = { + isa = PBXGroup; + children = ( + 84FF5F841EFA285800C15A01 /* RSParser.framework */, + 84FF5F8D1EFA285800C15A01 /* RSParserTests.xctest */, + ); + name = Products; + sourceTree = ""; + }; + 84FF5F911EFA285800C15A01 /* RSParserTests */ = { + isa = PBXGroup; + children = ( + 84FF5F921EFA285800C15A01 /* RSParserTests.swift */, + 84FF5F941EFA285800C15A01 /* Info.plist */, + ); + path = RSParserTests; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXHeadersBuildPhase section */ + 84FF5F811EFA285800C15A01 /* Headers */ = { + isa = PBXHeadersBuildPhase; + buildActionMask = 2147483647; + files = ( + 84D81BDC1EFA28E700652332 /* RSParser.h in Headers */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXHeadersBuildPhase section */ + +/* Begin PBXNativeTarget section */ + 84FF5F831EFA285800C15A01 /* RSParser */ = { + isa = PBXNativeTarget; + buildConfigurationList = 84FF5F981EFA285800C15A01 /* Build configuration list for PBXNativeTarget "RSParser" */; + buildPhases = ( + 84FF5F7F1EFA285800C15A01 /* Sources */, + 84FF5F801EFA285800C15A01 /* Frameworks */, + 84FF5F811EFA285800C15A01 /* Headers */, + 84FF5F821EFA285800C15A01 /* Resources */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = RSParser; + productName = RSParser; + productReference = 84FF5F841EFA285800C15A01 /* RSParser.framework */; + productType = "com.apple.product-type.framework"; + }; + 84FF5F8C1EFA285800C15A01 /* RSParserTests */ = { + isa = PBXNativeTarget; + buildConfigurationList = 84FF5F9B1EFA285800C15A01 /* Build configuration list for PBXNativeTarget "RSParserTests" */; + buildPhases = ( + 84FF5F891EFA285800C15A01 /* Sources */, + 84FF5F8A1EFA285800C15A01 /* Frameworks */, + 84FF5F8B1EFA285800C15A01 /* Resources */, + ); + buildRules = ( + ); + dependencies = ( + 84FF5F901EFA285800C15A01 /* PBXTargetDependency */, + ); + name = RSParserTests; + productName = RSParserTests; + productReference = 84FF5F8D1EFA285800C15A01 /* RSParserTests.xctest */; + productType = "com.apple.product-type.bundle.unit-test"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 84FF5F7B1EFA285800C15A01 /* Project object */ = { + isa = PBXProject; + attributes = { + LastSwiftUpdateCheck = 0900; + LastUpgradeCheck = 0900; + ORGANIZATIONNAME = "Ranchero Software, LLC"; + TargetAttributes = { + 84FF5F831EFA285800C15A01 = { + CreatedOnToolsVersion = 9.0; + }; + 84FF5F8C1EFA285800C15A01 = { + CreatedOnToolsVersion = 9.0; + }; + }; + }; + buildConfigurationList = 84FF5F7E1EFA285800C15A01 /* Build configuration list for PBXProject "RSParser" */; + compatibilityVersion = "Xcode 8.0"; + developmentRegion = en; + hasScannedForEncodings = 0; + knownRegions = ( + en, + ); + mainGroup = 84FF5F7A1EFA285800C15A01; + productRefGroup = 84FF5F851EFA285800C15A01 /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 84FF5F831EFA285800C15A01 /* RSParser */, + 84FF5F8C1EFA285800C15A01 /* RSParserTests */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXResourcesBuildPhase section */ + 84FF5F821EFA285800C15A01 /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 84FF5F8B1EFA285800C15A01 /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXResourcesBuildPhase section */ + +/* Begin PBXSourcesBuildPhase section */ + 84FF5F7F1EFA285800C15A01 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 84FF5F891EFA285800C15A01 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 84FF5F931EFA285800C15A01 /* RSParserTests.swift in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin PBXTargetDependency section */ + 84FF5F901EFA285800C15A01 /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + target = 84FF5F831EFA285800C15A01 /* RSParser */; + targetProxy = 84FF5F8F1EFA285800C15A01 /* PBXContainerItemProxy */; + }; +/* End PBXTargetDependency section */ + +/* Begin XCBuildConfiguration section */ + 84FF5F961EFA285800C15A01 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + CODE_SIGN_IDENTITY = "Mac Developer"; + COPY_PHASE_STRIP = NO; + CURRENT_PROJECT_VERSION = 1; + DEBUG_INFORMATION_FORMAT = dwarf; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_TESTABILITY = YES; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_DYNAMIC_NO_PIC = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + MACOSX_DEPLOYMENT_TARGET = 10.12; + MTL_ENABLE_DEBUG_INFO = YES; + ONLY_ACTIVE_ARCH = YES; + SDKROOT = macosx; + SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG; + SWIFT_OPTIMIZATION_LEVEL = "-Onone"; + VERSIONING_SYSTEM = "apple-generic"; + VERSION_INFO_PREFIX = ""; + }; + name = Debug; + }; + 84FF5F971EFA285800C15A01 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + CODE_SIGN_IDENTITY = "Mac Developer"; + COPY_PHASE_STRIP = NO; + CURRENT_PROJECT_VERSION = 1; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_STRICT_OBJC_MSGSEND = YES; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_NO_COMMON_BLOCKS = YES; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + MACOSX_DEPLOYMENT_TARGET = 10.12; + MTL_ENABLE_DEBUG_INFO = NO; + SDKROOT = macosx; + SWIFT_OPTIMIZATION_LEVEL = "-Owholemodule"; + VERSIONING_SYSTEM = "apple-generic"; + VERSION_INFO_PREFIX = ""; + }; + name = Release; + }; + 84FF5F991EFA285800C15A01 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + CODE_SIGN_IDENTITY = ""; + COMBINE_HIDPI_IMAGES = YES; + DEFINES_MODULE = YES; + DEVELOPMENT_TEAM = M8L2WTLA8W; + DYLIB_COMPATIBILITY_VERSION = 1; + DYLIB_CURRENT_VERSION = 1; + DYLIB_INSTALL_NAME_BASE = "@rpath"; + FRAMEWORK_VERSION = A; + INFOPLIST_FILE = RSParser/Info.plist; + INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks"; + LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks @loader_path/Frameworks"; + PRODUCT_BUNDLE_IDENTIFIER = com.ranchero.RSParser; + PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)"; + SKIP_INSTALL = YES; + SWIFT_VERSION = 4.0; + }; + name = Debug; + }; + 84FF5F9A1EFA285800C15A01 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + CODE_SIGN_IDENTITY = ""; + COMBINE_HIDPI_IMAGES = YES; + DEFINES_MODULE = YES; + DEVELOPMENT_TEAM = M8L2WTLA8W; + DYLIB_COMPATIBILITY_VERSION = 1; + DYLIB_CURRENT_VERSION = 1; + DYLIB_INSTALL_NAME_BASE = "@rpath"; + FRAMEWORK_VERSION = A; + INFOPLIST_FILE = RSParser/Info.plist; + INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks"; + LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks @loader_path/Frameworks"; + PRODUCT_BUNDLE_IDENTIFIER = com.ranchero.RSParser; + PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)"; + SKIP_INSTALL = YES; + SWIFT_VERSION = 4.0; + }; + name = Release; + }; + 84FF5F9C1EFA285800C15A01 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + COMBINE_HIDPI_IMAGES = YES; + DEVELOPMENT_TEAM = M8L2WTLA8W; + INFOPLIST_FILE = RSParserTests/Info.plist; + LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks @loader_path/../Frameworks"; + PRODUCT_BUNDLE_IDENTIFIER = com.ranchero.RSParserTests; + PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_VERSION = 4.0; + }; + name = Debug; + }; + 84FF5F9D1EFA285800C15A01 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + COMBINE_HIDPI_IMAGES = YES; + DEVELOPMENT_TEAM = M8L2WTLA8W; + INFOPLIST_FILE = RSParserTests/Info.plist; + LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks @loader_path/../Frameworks"; + PRODUCT_BUNDLE_IDENTIFIER = com.ranchero.RSParserTests; + PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_VERSION = 4.0; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 84FF5F7E1EFA285800C15A01 /* Build configuration list for PBXProject "RSParser" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 84FF5F961EFA285800C15A01 /* Debug */, + 84FF5F971EFA285800C15A01 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 84FF5F981EFA285800C15A01 /* Build configuration list for PBXNativeTarget "RSParser" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 84FF5F991EFA285800C15A01 /* Debug */, + 84FF5F9A1EFA285800C15A01 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 84FF5F9B1EFA285800C15A01 /* Build configuration list for PBXNativeTarget "RSParserTests" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 84FF5F9C1EFA285800C15A01 /* Debug */, + 84FF5F9D1EFA285800C15A01 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 84FF5F7B1EFA285800C15A01 /* Project object */; +} diff --git a/Frameworks/RSParser/RSParser.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/Frameworks/RSParser/RSParser.xcodeproj/project.xcworkspace/contents.xcworkspacedata new file mode 100644 index 000000000..9a349e055 --- /dev/null +++ b/Frameworks/RSParser/RSParser.xcodeproj/project.xcworkspace/contents.xcworkspacedata @@ -0,0 +1,7 @@ + + + + + diff --git a/Frameworks/RSParser/RSParserData.h b/Frameworks/RSParser/RSParserData.h new file mode 100755 index 000000000..5004884e6 --- /dev/null +++ b/Frameworks/RSParser/RSParserData.h @@ -0,0 +1,22 @@ +// +// RSXMLData.h +// RSXML +// +// Created by Brent Simmons on 8/24/15. +// Copyright © 2015 Ranchero Software, LLC. All rights reserved. +// + +@import Foundation; + +NS_ASSUME_NONNULL_BEGIN + +@interface RSXMLData : NSObject + +- (instancetype)initWithData:(NSData *)data urlString:(NSString *)urlString; + +@property (nonatomic, readonly) NSData *data; +@property (nonatomic, readonly) NSString *urlString; + +@end + +NS_ASSUME_NONNULL_END diff --git a/Frameworks/RSParser/RSParserData.m b/Frameworks/RSParser/RSParserData.m new file mode 100755 index 000000000..d6a36f132 --- /dev/null +++ b/Frameworks/RSParser/RSParserData.m @@ -0,0 +1,28 @@ +// +// RSXMLData.m +// RSXML +// +// Created by Brent Simmons on 8/24/15. +// Copyright © 2015 Ranchero Software, LLC. All rights reserved. +// + +#import "RSXMLData.h" + +@implementation RSXMLData + + +- (instancetype)initWithData:(NSData *)data urlString:(NSString *)urlString { + + self = [super init]; + if (!self) { + return nil; + } + + _data = data; + _urlString = urlString; + + return self; +} + + +@end diff --git a/Frameworks/RSParser/RSParserError.h b/Frameworks/RSParser/RSParserError.h new file mode 100755 index 000000000..95b0f782a --- /dev/null +++ b/Frameworks/RSParser/RSParserError.h @@ -0,0 +1,19 @@ +// +// RSXMLError.h +// RSXML +// +// Created by Brent Simmons on 2/28/16. +// Copyright © 2016 Ranchero Software, LLC. All rights reserved. +// + +@import Foundation; + +extern NSString *RSXMLErrorDomain; + + +typedef NS_ENUM(NSInteger, RSXMLErrorCode) { + RSXMLErrorCodeDataIsWrongFormat = 1024 +}; + + +NSError *RSOPMLWrongFormatError(NSString *fileName); diff --git a/Frameworks/RSParser/RSParserError.m b/Frameworks/RSParser/RSParserError.m new file mode 100755 index 000000000..c713906b9 --- /dev/null +++ b/Frameworks/RSParser/RSParserError.m @@ -0,0 +1,22 @@ +// +// RSXMLError.m +// RSXML +// +// Created by Brent Simmons on 2/28/16. +// Copyright © 2016 Ranchero Software, LLC. All rights reserved. +// + +#import "RSXMLError.h" + +NSString *RSXMLErrorDomain = @"com.ranchero.RSXML"; + +NSError *RSOPMLWrongFormatError(NSString *fileName) { + + NSString *localizedDescriptionFormatString = NSLocalizedString(@"The file ‘%@’ can’t be parsed because it’s not an OPML file.", @"OPML wrong format"); + NSString *localizedDescription = [NSString stringWithFormat:localizedDescriptionFormatString, fileName]; + + NSString *localizedFailureString = NSLocalizedString(@"The file is not an OPML file.", @"OPML wrong format"); + NSDictionary *userInfo = @{NSLocalizedDescriptionKey: localizedDescription, NSLocalizedFailureReasonErrorKey: localizedFailureString}; + + return [[NSError alloc] initWithDomain:RSXMLErrorDomain code:RSXMLErrorCodeDataIsWrongFormat userInfo:userInfo]; +} diff --git a/Frameworks/RSParser/RSParserTests/Info.plist b/Frameworks/RSParser/RSParserTests/Info.plist new file mode 100644 index 000000000..6c40a6cd0 --- /dev/null +++ b/Frameworks/RSParser/RSParserTests/Info.plist @@ -0,0 +1,22 @@ + + + + + CFBundleDevelopmentRegion + $(DEVELOPMENT_LANGUAGE) + CFBundleExecutable + $(EXECUTABLE_NAME) + CFBundleIdentifier + $(PRODUCT_BUNDLE_IDENTIFIER) + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + $(PRODUCT_NAME) + CFBundlePackageType + BNDL + CFBundleShortVersionString + 1.0 + CFBundleVersion + 1 + + diff --git a/Frameworks/RSParser/RSParserTests/RSParserTests.swift b/Frameworks/RSParser/RSParserTests/RSParserTests.swift new file mode 100644 index 000000000..be080c86b --- /dev/null +++ b/Frameworks/RSParser/RSParserTests/RSParserTests.swift @@ -0,0 +1,36 @@ +// +// RSParserTests.swift +// RSParserTests +// +// Created by Brent Simmons on 6/20/17. +// Copyright © 2017 Ranchero Software, LLC. All rights reserved. +// + +import XCTest +@testable import RSParser + +class RSParserTests: XCTestCase { + + override func setUp() { + super.setUp() + // Put setup code here. This method is called before the invocation of each test method in the class. + } + + override func tearDown() { + // Put teardown code here. This method is called after the invocation of each test method in the class. + super.tearDown() + } + + func testExample() { + // This is an example of a functional test case. + // Use XCTAssert and related functions to verify your tests produce the correct results. + } + + func testPerformanceExample() { + // This is an example of a performance test case. + self.measure { + // Put the code you want to measure the time of here. + } + } + +} diff --git a/Frameworks/RSParser/Utilities/NSString+RSXML.h b/Frameworks/RSParser/Utilities/NSString+RSXML.h new file mode 100755 index 000000000..352caf60c --- /dev/null +++ b/Frameworks/RSParser/Utilities/NSString+RSXML.h @@ -0,0 +1,16 @@ +// +// NSString+RSXML.h +// RSXML +// +// Created by Brent Simmons on 9/25/15. +// Copyright © 2015 Ranchero Software, LLC. All rights reserved. +// + +@import Foundation; + +@interface NSString (RSXML) + +- (NSString *)rs_stringByDecodingHTMLEntities; + +@end + diff --git a/Frameworks/RSParser/Utilities/NSString+RSXML.m b/Frameworks/RSParser/Utilities/NSString+RSXML.m new file mode 100755 index 000000000..3856d2acd --- /dev/null +++ b/Frameworks/RSParser/Utilities/NSString+RSXML.m @@ -0,0 +1,408 @@ +// +// NSString+RSXML.m +// RSXML +// +// Created by Brent Simmons on 9/25/15. +// Copyright © 2015 Ranchero Software, LLC. All rights reserved. +// + +#import "NSString+RSXML.h" + + +@interface NSScanner (RSXML) + +- (BOOL)rs_scanEntityValue:(NSString * _Nullable * _Nullable)decodedEntity; + +@end + + +@implementation NSString (RSXML) + +- (NSString *)rs_stringByDecodingHTMLEntities { + + @autoreleasepool { + + NSScanner *scanner = [[NSScanner alloc] initWithString:self]; + scanner.charactersToBeSkipped = nil; + NSMutableString *result = [[NSMutableString alloc] init]; + + while (true) { + + NSString *scannedString = nil; + if ([scanner scanUpToString:@"&" intoString:&scannedString]) { + [result appendString:scannedString]; + } + if (scanner.isAtEnd) { + break; + } + NSUInteger savedScanLocation = scanner.scanLocation; + + NSString *decodedEntity = nil; + if ([scanner rs_scanEntityValue:&decodedEntity]) { + [result appendString:decodedEntity]; + } + else { + [result appendString:@"&"]; + scanner.scanLocation = savedScanLocation + 1; + } + + if (scanner.isAtEnd) { + break; + } + } + + if ([self isEqualToString:result]) { + return self; + } + return [result copy]; + } +} + + +static NSDictionary *RSEntitiesDictionary(void); +static NSString *RSXMLStringWithValue(unichar value); + +- (NSString * _Nullable)rs_stringByDecodingEntity { + + // self may or may not have outer & and ; characters. + + NSMutableString *s = [self mutableCopy]; + + if ([s hasPrefix:@"&"]) { + [s deleteCharactersInRange:NSMakeRange(0, 1)]; + } + if ([s hasSuffix:@";"]) { + [s deleteCharactersInRange:NSMakeRange(s.length - 1, 1)]; + } + + NSDictionary *entitiesDictionary = RSEntitiesDictionary(); + + NSString *decodedEntity = entitiesDictionary[self]; + if (decodedEntity) { + return decodedEntity; + } + + if ([s hasPrefix:@"#x"]) { // Hex + NSScanner *scanner = [[NSScanner alloc] initWithString:s]; + scanner.charactersToBeSkipped = [NSCharacterSet characterSetWithCharactersInString:@"#x"]; + unsigned int hexValue = 0; + if ([scanner scanHexInt:&hexValue]) { + return RSXMLStringWithValue((unichar)hexValue); + } + return nil; + } + + else if ([s hasPrefix:@"#"]) { + [s deleteCharactersInRange:NSMakeRange(0, 1)]; + NSInteger value = s.integerValue; + if (value < 1) { + return nil; + } + return RSXMLStringWithValue((unichar)value); + } + + return nil; +} + +@end + +@implementation NSScanner (RSXML) + +- (BOOL)rs_scanEntityValue:(NSString * _Nullable * _Nullable)decodedEntity { + + NSString *s = self.string; + NSUInteger initialScanLocation = self.scanLocation; + static NSUInteger maxEntityLength = 20; // It’s probably smaller, but this is just for sanity. + + while (true) { + + unichar ch = [s characterAtIndex:self.scanLocation]; + if ([NSCharacterSet.whitespaceAndNewlineCharacterSet characterIsMember:ch]) { + break; + } + if (ch == ';') { + if (!decodedEntity) { + return YES; + } + NSString *rawEntity = [s substringWithRange:NSMakeRange(initialScanLocation + 1, (self.scanLocation - initialScanLocation) - 1)]; + *decodedEntity = [rawEntity rs_stringByDecodingEntity]; + self.scanLocation = self.scanLocation + 1; + return *decodedEntity != nil; + } + + self.scanLocation = self.scanLocation + 1; + if (self.scanLocation - initialScanLocation > maxEntityLength) { + break; + } + if (self.isAtEnd) { + break; + } + } + + return NO; +} + +@end + +static NSString *RSXMLStringWithValue(unichar value) { + + return [[NSString alloc] initWithFormat:@"%C", value]; +} + +static NSDictionary *RSEntitiesDictionary(void) { + + static NSDictionary *entitiesDictionary = nil; + + static dispatch_once_t onceToken; + dispatch_once(&onceToken, ^{ + + entitiesDictionary = + @{@"#034": @"\"", + @"#038": @"&", + @"#38": @"&", + @"#039": @"'", + @"#145": @"‘", + @"#146": @"’", + @"#147": @"“", + @"#148": @"”", + @"#149": @"•", + @"#150": @"-", + @"#151": @"—", + @"#153": @"™", + @"#160": RSXMLStringWithValue(160), + @"#161": @"¡", + @"#162": @"¢", + @"#163": @"£", + @"#164": @"?", + @"#165": @"¥", + @"#166": @"?", + @"#167": @"§", + @"#168": @"¨", + @"#169": @"©", + @"#170": @"©", + @"#171": @"«", + @"#172": @"¬", + @"#173": @"¬", + @"#174": @"®", + @"#175": @"¯", + @"#176": @"°", + @"#177": @"±", + @"#178": @" ", + @"#179": @" ", + @"#180": @"´", + @"#181": @"µ", + @"#182": @"µ", + @"#183": @"·", + @"#184": @"¸", + @"#185": @" ", + @"#186": @"º", + @"#187": @"»", + @"#188": @"1/4", + @"#189": @"1/2", + @"#190": @"1/2", + @"#191": @"¿", + @"#192": @"À", + @"#193": @"Á", + @"#194": @"Â", + @"#195": @"Ã", + @"#196": @"Ä", + @"#197": @"Å", + @"#198": @"Æ", + @"#199": @"Ç", + @"#200": @"È", + @"#201": @"É", + @"#202": @"Ê", + @"#203": @"Ë", + @"#204": @"Ì", + @"#205": @"Í", + @"#206": @"Î", + @"#207": @"Ï", + @"#208": @"?", + @"#209": @"Ñ", + @"#210": @"Ò", + @"#211": @"Ó", + @"#212": @"Ô", + @"#213": @"Õ", + @"#214": @"Ö", + @"#215": @"x", + @"#216": @"Ø", + @"#217": @"Ù", + @"#218": @"Ú", + @"#219": @"Û", + @"#220": @"Ü", + @"#221": @"Y", + @"#222": @"?", + @"#223": @"ß", + @"#224": @"à", + @"#225": @"á", + @"#226": @"â", + @"#227": @"ã", + @"#228": @"ä", + @"#229": @"å", + @"#230": @"æ", + @"#231": @"ç", + @"#232": @"è", + @"#233": @"é", + @"#234": @"ê", + @"#235": @"ë", + @"#236": @"ì", + @"#237": @"í", + @"#238": @"î", + @"#239": @"ï", + @"#240": @"?", + @"#241": @"ñ", + @"#242": @"ò", + @"#243": @"ó", + @"#244": @"ô", + @"#245": @"õ", + @"#246": @"ö", + @"#247": @"÷", + @"#248": @"ø", + @"#249": @"ù", + @"#250": @"ú", + @"#251": @"û", + @"#252": @"ü", + @"#253": @"y", + @"#254": @"?", + @"#255": @"ÿ", + @"#32": @" ", + @"#34": @"\"", + @"#39": @"", + @"#8194": @" ", + @"#8195": @" ", + @"#8211": @"-", + @"#8212": @"—", + @"#8216": @"‘", + @"#8217": @"’", + @"#8220": @"“", + @"#8221": @"”", + @"#8230": @"…", + @"#8617": RSXMLStringWithValue(8617), + @"AElig": @"Æ", + @"Aacute": @"Á", + @"Acirc": @"Â", + @"Agrave": @"À", + @"Aring": @"Å", + @"Atilde": @"Ã", + @"Auml": @"Ä", + @"Ccedil": @"Ç", + @"Dstrok": @"?", + @"ETH": @"?", + @"Eacute": @"É", + @"Ecirc": @"Ê", + @"Egrave": @"È", + @"Euml": @"Ë", + @"Iacute": @"Í", + @"Icirc": @"Î", + @"Igrave": @"Ì", + @"Iuml": @"Ï", + @"Ntilde": @"Ñ", + @"Oacute": @"Ó", + @"Ocirc": @"Ô", + @"Ograve": @"Ò", + @"Oslash": @"Ø", + @"Otilde": @"Õ", + @"Ouml": @"Ö", + @"Pi": @"Π", + @"THORN": @"?", + @"Uacute": @"Ú", + @"Ucirc": @"Û", + @"Ugrave": @"Ù", + @"Uuml": @"Ü", + @"Yacute": @"Y", + @"aacute": @"á", + @"acirc": @"â", + @"acute": @"´", + @"aelig": @"æ", + @"agrave": @"à", + @"amp": @"&", + @"apos": @"'", + @"aring": @"å", + @"atilde": @"ã", + @"auml": @"ä", + @"brkbar": @"?", + @"brvbar": @"?", + @"ccedil": @"ç", + @"cedil": @"¸", + @"cent": @"¢", + @"copy": @"©", + @"curren": @"?", + @"deg": @"°", + @"die": @"?", + @"divide": @"÷", + @"eacute": @"é", + @"ecirc": @"ê", + @"egrave": @"è", + @"eth": @"?", + @"euml": @"ë", + @"euro": @"€", + @"frac12": @"1/2", + @"frac14": @"1/4", + @"frac34": @"3/4", + @"gt": @">", + @"hearts": @"♥", + @"hellip": @"…", + @"iacute": @"í", + @"icirc": @"î", + @"iexcl": @"¡", + @"igrave": @"ì", + @"iquest": @"¿", + @"iuml": @"ï", + @"laquo": @"«", + @"ldquo": @"“", + @"lsquo": @"‘", + @"lt": @"<", + @"macr": @"¯", + @"mdash": @"—", + @"micro": @"µ", + @"middot": @"·", + @"ndash": @"-", + @"not": @"¬", + @"ntilde": @"ñ", + @"oacute": @"ó", + @"ocirc": @"ô", + @"ograve": @"ò", + @"ordf": @"ª", + @"ordm": @"º", + @"oslash": @"ø", + @"otilde": @"õ", + @"ouml": @"ö", + @"para": @"¶", + @"pi": @"π", + @"plusmn": @"±", + @"pound": @"£", + @"quot": @"\"", + @"raquo": @"»", + @"rdquo": @"”", + @"reg": @"®", + @"rsquo": @"’", + @"sect": @"§", + @"shy": @" ", + @"sup1": @" ", + @"sup2": @" ", + @"sup3": @" ", + @"szlig": @"ß", + @"thorn": @"?", + @"times": @"x", + @"trade": @"™", + @"uacute": @"ú", + @"ucirc": @"û", + @"ugrave": @"ù", + @"uml": @"¨", + @"uuml": @"ü", + @"yacute": @"y", + @"yen": @"¥", + @"yuml": @"ÿ", + @"infin": @"∞", + @"nbsp": RSXMLStringWithValue(160), + @"#x21A9": RSXMLStringWithValue(8617), + @"#xFE0E": RSXMLStringWithValue(65038), + @"#x2019": RSXMLStringWithValue(8217), + @"#x2026": RSXMLStringWithValue(8230), + @"#x201C": RSXMLStringWithValue(8220), + @"#x201D": RSXMLStringWithValue(8221), + @"#x2014": RSXMLStringWithValue(8212)}; + }); + + return entitiesDictionary; +} diff --git a/Frameworks/RSParser/Utilities/RSDateParser.h b/Frameworks/RSParser/Utilities/RSDateParser.h new file mode 100755 index 000000000..c593a0527 --- /dev/null +++ b/Frameworks/RSParser/Utilities/RSDateParser.h @@ -0,0 +1,25 @@ +// +// RSDateParser.h +// RSXML +// +// Created by Brent Simmons on 3/25/15. +// Copyright (c) 2015 Ranchero Software, LLC. All rights reserved. +// + +@import Foundation; + + +/*Common web dates -- RFC 822 and 8601 -- are handled here: + the formats you find in JSON and XML feeds. + + Any of these may return nil. They may also return garbage, given bad input.*/ + + +NSDate *RSDateWithString(NSString *dateString); + +/*If you're using a SAX parser, you have the bytes and don't need to convert to a string first. + It's faster and uses less memory. + (Assumes bytes are UTF-8 or ASCII. If you're using the libxml SAX parser, this will work.)*/ + +NSDate *RSDateWithBytes(const char *bytes, NSUInteger numberOfBytes); + diff --git a/Frameworks/RSParser/Utilities/RSDateParser.m b/Frameworks/RSParser/Utilities/RSDateParser.m new file mode 100755 index 000000000..e08b3bf8f --- /dev/null +++ b/Frameworks/RSParser/Utilities/RSDateParser.m @@ -0,0 +1,435 @@ +// +// RSDateParser.m +// RSXML +// +// Created by Brent Simmons on 3/25/15. +// Copyright (c) 2015 Ranchero Software, LLC. All rights reserved. +// + +#import +#import "RSDateParser.h" + +typedef struct { + const char *abbreviation; + const NSInteger offsetHours; + const NSInteger offsetMinutes; +} RSTimeZoneAbbreviationAndOffset; + + +#define kNumberOfTimeZones 96 + +static const RSTimeZoneAbbreviationAndOffset timeZoneTable[kNumberOfTimeZones] = { + {"GMT", 0, 0}, //Most common at top, for performance + {"PDT", -7, 0}, {"PST", -8, 0}, {"EST", -5, 0}, {"EDT", -4, 0}, + {"MDT", -6, 0}, {"MST", -7, 0}, {"CST", -6, 0}, {"CDT", -5, 0}, + {"ACT", -8, 0}, {"AFT", 4, 30}, {"AMT", 4, 0}, {"ART", -3, 0}, + {"AST", 3, 0}, {"AZT", 4, 0}, {"BIT", -12, 0}, {"BDT", 8, 0}, + {"ACST", 9, 30}, {"AEST", 10, 0}, {"AKST", -9, 0}, {"AMST", 5, 0}, + {"AWST", 8, 0}, {"AZOST", -1, 0}, {"BIOT", 6, 0}, {"BRT", -3, 0}, + {"BST", 6, 0}, {"BTT", 6, 0}, {"CAT", 2, 0}, {"CCT", 6, 30}, + {"CET", 1, 0}, {"CEST", 2, 0}, {"CHAST", 12, 45}, {"ChST", 10, 0}, + {"CIST", -8, 0}, {"CKT", -10, 0}, {"CLT", -4, 0}, {"CLST", -3, 0}, + {"COT", -5, 0}, {"COST", -4, 0}, {"CVT", -1, 0}, {"CXT", 7, 0}, + {"EAST", -6, 0}, {"EAT", 3, 0}, {"ECT", -4, 0}, {"EEST", 3, 0}, + {"EET", 2, 0}, {"FJT", 12, 0}, {"FKST", -4, 0}, {"GALT", -6, 0}, + {"GET", 4, 0}, {"GFT", -3, 0}, {"GILT", 7, 0}, {"GIT", -9, 0}, + {"GST", -2, 0}, {"GYT", -4, 0}, {"HAST", -10, 0}, {"HKT", 8, 0}, + {"HMT", 5, 0}, {"IRKT", 8, 0}, {"IRST", 3, 30}, {"IST", 2, 0}, + {"JST", 9, 0}, {"KRAT", 7, 0}, {"KST", 9, 0}, {"LHST", 10, 30}, + {"LINT", 14, 0}, {"MAGT", 11, 0}, {"MIT", -9, 30}, {"MSK", 3, 0}, + {"MUT", 4, 0}, {"NDT", -2, 30}, {"NFT", 11, 30}, {"NPT", 5, 45}, + {"NT", -3, 30}, {"OMST", 6, 0}, {"PETT", 12, 0}, {"PHOT", 13, 0}, + {"PKT", 5, 0}, {"RET", 4, 0}, {"SAMT", 4, 0}, {"SAST", 2, 0}, + {"SBT", 11, 0}, {"SCT", 4, 0}, {"SLT", 5, 30}, {"SST", 8, 0}, + {"TAHT", -10, 0}, {"THA", 7, 0}, {"UYT", -3, 0}, {"UYST", -2, 0}, + {"VET", -4, 30}, {"VLAT", 10, 0}, {"WAT", 1, 0}, {"WET", 0, 0}, + {"WEST", 1, 0}, {"YAKT", 9, 0}, {"YEKT", 5, 0} +}; /*See http://en.wikipedia.org/wiki/List_of_time_zone_abbreviations for list*/ + + + +#pragma mark - Parser + +enum { + RSJanuary = 1, + RSFebruary, + RSMarch, + RSApril, + RSMay, + RSJune, + RSJuly, + RSAugust, + RSSeptember, + RSOctober, + RSNovember, + RSDecember +}; + +static NSInteger nextMonthValue(const char *bytes, NSUInteger numberOfBytes, NSUInteger startingIndex, NSUInteger *finalIndex) { + + /*Months are 1-based -- January is 1, Dec is 12. + Lots of short-circuits here. Not strict. GIGO.*/ + + NSUInteger i;// = startingIndex; + NSUInteger numberOfAlphaCharactersFound = 0; + char monthCharacters[3] = {0, 0, 0}; + + for (i = startingIndex; i < numberOfBytes; i++) { + + *finalIndex = i; + char character = bytes[i]; + + BOOL isAlphaCharacter = (BOOL)isalpha(character); + if (!isAlphaCharacter && numberOfAlphaCharactersFound < 1) + continue; + if (!isAlphaCharacter && numberOfAlphaCharactersFound > 0) + break; + + numberOfAlphaCharactersFound++; + if (numberOfAlphaCharactersFound == 1) { + if (character == 'F' || character == 'f') + return RSFebruary; + if (character == 'S' || character == 's') + return RSSeptember; + if (character == 'O' || character == 'o') + return RSOctober; + if (character == 'N' || character == 'n') + return RSNovember; + if (character == 'D' || character == 'd') + return RSDecember; + } + + monthCharacters[numberOfAlphaCharactersFound - 1] = character; + if (numberOfAlphaCharactersFound >=3) + break; + } + + if (numberOfAlphaCharactersFound < 2) + return NSNotFound; + + if (monthCharacters[0] == 'J' || monthCharacters[0] == 'j') { //Jan, Jun, Jul + if (monthCharacters[1] == 'a' || monthCharacters[i] == 'A') + return RSJanuary; + if (monthCharacters[1] == 'u' || monthCharacters[1] == 'U') { + if (monthCharacters[2] == 'n' || monthCharacters[2] == 'N') + return RSJune; + return RSJuly; + } + return RSJanuary; + } + + if (monthCharacters[0] == 'M' || monthCharacters[0] == 'm') { //March, May + if (monthCharacters[2] == 'y' || monthCharacters[2] == 'Y') + return RSMay; + return RSMarch; + } + + if (monthCharacters[0] == 'A' || monthCharacters[0] == 'a') { //April, August + if (monthCharacters[1] == 'u' || monthCharacters[1] == 'U') + return RSAugust; + return RSApril; + } + + return RSJanuary; //should never get here +} + + +static NSInteger nextNumericValue(const char *bytes, NSUInteger numberOfBytes, NSUInteger startingIndex, NSUInteger maximumNumberOfDigits, NSUInteger *finalIndex) { + + /*maximumNumberOfDigits has a maximum limit of 4 (for time zone offsets and years). + *finalIndex will be the index of the last character looked at.*/ + + if (maximumNumberOfDigits > 4) + maximumNumberOfDigits = 4; + + NSUInteger i = 0; + NSUInteger numberOfDigitsFound = 0; + NSInteger digits[4] = {0, 0, 0, 0}; + + for (i = startingIndex; i < numberOfBytes; i++) { + *finalIndex = i; + BOOL isDigit = (BOOL)isdigit(bytes[i]); + if (!isDigit && numberOfDigitsFound < 1) + continue; + if (!isDigit && numberOfDigitsFound > 0) + break; + digits[numberOfDigitsFound] = bytes[i] - 48; // '0' is 48 + numberOfDigitsFound++; + if (numberOfDigitsFound >= maximumNumberOfDigits) + break; + } + + if (numberOfDigitsFound < 1) + return NSNotFound; + if (numberOfDigitsFound == 1) + return digits[0]; + if (numberOfDigitsFound == 2) + return (digits[0] * 10) + digits[1]; + if (numberOfDigitsFound == 3) + return (digits[0] * 100) + (digits[1] * 10) + digits[2]; + return (digits[0] * 1000) + (digits[1] * 100) + (digits[2] * 10) + digits[3]; +} + + +static BOOL hasAtLeastOneAlphaCharacter(const char *s) { + + NSUInteger length = strlen(s); + NSUInteger i = 0; + + for (i = 0; i < length; i++) { + if (isalpha(s[i])) + return YES; + } + + return NO; +} + + +#pragma mark - Time Zones and offsets + +static NSInteger offsetInSecondsForTimeZoneAbbreviation(const char *abbreviation) { + + /*Linear search should be fine. It's a C array, and short (under 100 items). + Most common time zones are at the beginning of the array. (We can tweak this as needed.)*/ + + NSUInteger i; + + for (i = 0; i < kNumberOfTimeZones; i++) { + + RSTimeZoneAbbreviationAndOffset zone = timeZoneTable[i]; + if (strcmp(abbreviation, zone.abbreviation) == 0) { + if (zone.offsetHours < 0) + return (zone.offsetHours * 60 * 60) - (zone.offsetMinutes * 60); + return (zone.offsetHours * 60 * 60) + (zone.offsetMinutes * 60); + } + } + + return 0; +} + + +static NSInteger offsetInSecondsForOffsetCharacters(const char *timeZoneCharacters) { + + BOOL isPlus = timeZoneCharacters[0] == '+'; + NSUInteger finalIndex = 0; + NSInteger hours = nextNumericValue(timeZoneCharacters, strlen(timeZoneCharacters), 0, 2, &finalIndex); + NSInteger minutes = nextNumericValue(timeZoneCharacters, strlen(timeZoneCharacters), finalIndex + 1, 2, &finalIndex); + + if (hours == NSNotFound) + hours = 0; + if (minutes == NSNotFound) + minutes = 0; + if (hours == 0 && minutes == 0) + return 0; + + NSInteger seconds = (hours * 60 * 60) + (minutes * 60); + if (!isPlus) + seconds = 0 - seconds; + return seconds; +} + + +static const char *rs_GMT = "GMT"; +static const char *rs_UTC = "UTC"; + +static NSInteger parsedTimeZoneOffset(const char *bytes, NSUInteger numberOfBytes, NSUInteger startingIndex) { + + /*Examples: GMT Z +0000 -0000 +07:00 -0700 PDT EST + Parse into char[5] -- drop any colon characters. If numeric, calculate seconds from GMT. + If alpha, special-case GMT and Z, otherwise look up in time zone list to get offset.*/ + + char timeZoneCharacters[6] = {0, 0, 0, 0, 0, 0}; //nil-terminated last character + NSUInteger i = 0; + NSUInteger numberOfCharactersFound = 0; + + for (i = startingIndex; i < numberOfBytes; i++) { + char ch = bytes[i]; + if (ch == ':' || ch == ' ') + continue; + if (isdigit(ch) || isalpha(ch) || ch == '+' || ch == '-') { + numberOfCharactersFound++; + timeZoneCharacters[numberOfCharactersFound - 1] = ch; + } + if (numberOfCharactersFound >= 5) + break; + } + + if (numberOfCharactersFound < 1 || timeZoneCharacters[0] == 'Z' || timeZoneCharacters[0] == 'z') + return 0; + if (strcasestr(timeZoneCharacters, rs_GMT) != nil || strcasestr(timeZoneCharacters, rs_UTC)) + return 0; + + if (hasAtLeastOneAlphaCharacter(timeZoneCharacters)) + return offsetInSecondsForTimeZoneAbbreviation(timeZoneCharacters); + return offsetInSecondsForOffsetCharacters(timeZoneCharacters); +} + + +#pragma mark - Date Creation + +static NSDate *dateWithYearMonthDayHourMinuteSecondAndTimeZoneOffset(NSInteger year, NSInteger month, NSInteger day, NSInteger hour, NSInteger minute, NSInteger second, NSInteger milliseconds, NSInteger timeZoneOffset) { + + struct tm timeInfo; + timeInfo.tm_sec = (int)second; + timeInfo.tm_min = (int)minute; + timeInfo.tm_hour = (int)hour; + timeInfo.tm_mday = (int)day; + timeInfo.tm_mon = (int)(month - 1); //It's 1-based coming in + timeInfo.tm_year = (int)(year - 1900); //see time.h -- it's years since 1900 + timeInfo.tm_wday = -1; + timeInfo.tm_yday = -1; + timeInfo.tm_isdst = -1; + timeInfo.tm_gmtoff = 0;//[timeZone secondsFromGMT]; + timeInfo.tm_zone = nil; + + NSTimeInterval rawTime = (NSTimeInterval)(timegm(&timeInfo) - timeZoneOffset); //timegm instead of mktime (which uses local time zone) + if (rawTime == (time_t)ULONG_MAX) { + + /*NSCalendar is super-amazingly-slow (which is partly why RSDateParser exists), so this is used only when the date is far enough in the future (19 January 2038 03:14:08Z on 32-bit systems) that timegm fails. If profiling says that this is a performance issue, then you've got a weird app that needs to work with dates far in the future.*/ + + NSDateComponents *dateComponents = [NSDateComponents new]; + + dateComponents.timeZone = [NSTimeZone timeZoneForSecondsFromGMT:timeZoneOffset]; + dateComponents.year = year; + dateComponents.month = month; + dateComponents.day = day; + dateComponents.hour = hour; + dateComponents.minute = minute; + dateComponents.second = second + (milliseconds / 1000); + + return [[NSCalendar autoupdatingCurrentCalendar] dateFromComponents:dateComponents]; + } + + if (milliseconds > 0) { + rawTime += ((float)milliseconds / 1000.0f); + } + + return [NSDate dateWithTimeIntervalSince1970:rawTime]; +} + + +#pragma mark - Standard Formats + +static NSDate *RSParsePubDateWithBytes(const char *bytes, NSUInteger numberOfBytes) { + + /*@"EEE',' dd MMM yyyy HH':'mm':'ss ZZZ" + @"EEE, dd MMM yyyy HH:mm:ss zzz" + @"dd MMM yyyy HH:mm zzz" + @"dd MMM yyyy HH:mm ZZZ" + @"EEE, dd MMM yyyy" + @"EEE, dd MMM yyyy HH:mm zzz" + etc.*/ + + NSUInteger finalIndex = 0; + NSInteger day = 1; + NSInteger month = RSJanuary; + NSInteger year = 1970; + NSInteger hour = 0; + NSInteger minute = 0; + NSInteger second = 0; + NSInteger timeZoneOffset = 0; + + day = nextNumericValue(bytes, numberOfBytes, 0, 2, &finalIndex); + if (day < 1 || day == NSNotFound) + day = 1; + + month = nextMonthValue(bytes, numberOfBytes, finalIndex + 1, &finalIndex); + year = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 4, &finalIndex); + hour = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex); + if (hour == NSNotFound) + hour = 0; + + minute = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex); + if (minute == NSNotFound) + minute = 0; + + NSUInteger currentIndex = finalIndex + 1; + + BOOL hasSeconds = (currentIndex < numberOfBytes) && (bytes[currentIndex] == ':'); + if (hasSeconds) + second = nextNumericValue(bytes, numberOfBytes, currentIndex, 2, &finalIndex); + + currentIndex = finalIndex + 1; + BOOL hasTimeZone = (currentIndex < numberOfBytes) && (bytes[currentIndex] == ' '); + if (hasTimeZone) + timeZoneOffset = parsedTimeZoneOffset(bytes, numberOfBytes, currentIndex); + + return dateWithYearMonthDayHourMinuteSecondAndTimeZoneOffset(year, month, day, hour, minute, second, 0, timeZoneOffset); +} + + +static NSDate *RSParseW3CWithBytes(const char *bytes, NSUInteger numberOfBytes) { + + /*@"yyyy'-'MM'-'dd'T'HH':'mm':'ss" + @"yyyy-MM-dd'T'HH:mm:sszzz" + @"yyyy-MM-dd'T'HH:mm:ss'.'SSSzzz" + etc.*/ + + NSUInteger finalIndex = 0; + NSInteger day = 1; + NSInteger month = RSJanuary; + NSInteger year = 1970; + NSInteger hour = 0; + NSInteger minute = 0; + NSInteger second = 0; + NSInteger milliseconds = 0; + NSInteger timeZoneOffset = 0; + + year = nextNumericValue(bytes, numberOfBytes, 0, 4, &finalIndex); + month = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex); + day = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex); + hour = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex); + minute = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex); + second = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex); + + NSUInteger currentIndex = finalIndex + 1; + BOOL hasMilliseconds = (currentIndex < numberOfBytes) && (bytes[currentIndex] == '.'); + if (hasMilliseconds) { + milliseconds = nextNumericValue(bytes, numberOfBytes, currentIndex, 3, &finalIndex); + currentIndex = finalIndex + 1; + } + + timeZoneOffset = parsedTimeZoneOffset(bytes, numberOfBytes, currentIndex); + + return dateWithYearMonthDayHourMinuteSecondAndTimeZoneOffset(year, month, day, hour, minute, second, milliseconds, timeZoneOffset); +} + + +static BOOL dateIsPubDate(const char *bytes, NSUInteger numberOfBytes) { + + NSUInteger i = 0; + + for (i = 0; i < numberOfBytes; i++) { + if (bytes[i] == ' ' || bytes[i] == ',') + return YES; + } + + return NO; +} + + +static BOOL numberOfBytesIsOutsideReasonableRange(NSUInteger numberOfBytes) { + return numberOfBytes < 6 || numberOfBytes > 50; +} + + +#pragma mark - API + +NSDate *RSDateWithBytes(const char *bytes, NSUInteger numberOfBytes) { + + if (numberOfBytesIsOutsideReasonableRange(numberOfBytes)) + return nil; + + if (dateIsPubDate(bytes, numberOfBytes)) + return RSParsePubDateWithBytes(bytes, numberOfBytes); + + return RSParseW3CWithBytes(bytes, numberOfBytes); +} + + +NSDate *RSDateWithString(NSString *dateString) { + + const char *utf8String = [dateString UTF8String]; + return RSDateWithBytes(utf8String, strlen(utf8String)); +} + diff --git a/Frameworks/RSParser/Utilities/RSXMLInternal.h b/Frameworks/RSParser/Utilities/RSXMLInternal.h new file mode 100755 index 000000000..cb719be88 --- /dev/null +++ b/Frameworks/RSParser/Utilities/RSXMLInternal.h @@ -0,0 +1,31 @@ +// +// RSXMLInternal.h +// RSXML +// +// Created by Brent Simmons on 12/26/16. +// Copyright © 2016 Ranchero Software, LLC. All rights reserved. +// + +@import Foundation; + +NS_ASSUME_NONNULL_BEGIN + +BOOL RSXMLIsEmpty(id _Nullable obj); +BOOL RSXMLStringIsEmpty(NSString * _Nullable s); + + +@interface NSString (RSXMLInternal) + +- (NSString *)rsxml_md5HashString; + +@end + + +@interface NSDictionary (RSXMLInternal) + +- (nullable id)rsxml_objectForCaseInsensitiveKey:(NSString *)key; + +@end + +NS_ASSUME_NONNULL_END + diff --git a/Frameworks/RSParser/Utilities/RSXMLInternal.m b/Frameworks/RSParser/Utilities/RSXMLInternal.m new file mode 100755 index 000000000..130b01d48 --- /dev/null +++ b/Frameworks/RSParser/Utilities/RSXMLInternal.m @@ -0,0 +1,83 @@ +// +// RSXMLInternal.m +// RSXML +// +// Created by Brent Simmons on 12/26/16. +// Copyright © 2016 Ranchero Software, LLC. All rights reserved. +// + +#import +#import "RSXMLInternal.h" + + +static BOOL RSXMLIsNil(id obj) { + + return obj == nil || obj == [NSNull null]; +} + +BOOL RSXMLIsEmpty(id obj) { + + if (RSXMLIsNil(obj)) { + return YES; + } + + if ([obj respondsToSelector:@selector(count)]) { + return [obj count] < 1; + } + + if ([obj respondsToSelector:@selector(length)]) { + return [obj length] < 1; + } + + return NO; /*Shouldn't get here very often.*/ +} + +BOOL RSXMLStringIsEmpty(NSString *s) { + + return RSXMLIsNil(s) || s.length < 1; +} + + +@implementation NSString (RSXMLInternal) + +- (NSData *)rsxml_md5Hash { + + NSData *data = [self dataUsingEncoding:NSUTF8StringEncoding]; + unsigned char hash[CC_MD5_DIGEST_LENGTH]; + CC_MD5(data.bytes, (CC_LONG)data.length, hash); + + return [NSData dataWithBytes:(const void *)hash length:CC_MD5_DIGEST_LENGTH]; +} + +- (NSString *)rsxml_md5HashString { + + NSData *md5Data = [self rsxml_md5Hash]; + const Byte *bytes = md5Data.bytes; + return [NSString stringWithFormat:@"%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x", bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], bytes[8], bytes[9], bytes[10], bytes[11], bytes[12], bytes[13], bytes[14], bytes[15]]; +} + +@end + + +@implementation NSDictionary (RSXMLInternal) + + +- (nullable id)rsxml_objectForCaseInsensitiveKey:(NSString *)key { + + id obj = self[key]; + if (obj) { + return obj; + } + + for (NSString *oneKey in self.allKeys) { + + if ([oneKey isKindOfClass:[NSString class]] && [key caseInsensitiveCompare:oneKey] == NSOrderedSame) { + return self[oneKey]; + } + } + + return nil; +} + + +@end diff --git a/Frameworks/RSParser/XML/Feeds/RSAtomParser.h b/Frameworks/RSParser/XML/Feeds/RSAtomParser.h new file mode 100755 index 000000000..5092ddd19 --- /dev/null +++ b/Frameworks/RSParser/XML/Feeds/RSAtomParser.h @@ -0,0 +1,13 @@ +// +// RSAtomParser.h +// RSXML +// +// Created by Brent Simmons on 1/15/15. +// Copyright (c) 2015 Ranchero Software LLC. All rights reserved. +// + +#import "FeedParser.h" + +@interface RSAtomParser : NSObject + +@end diff --git a/Frameworks/RSParser/XML/Feeds/RSAtomParser.m b/Frameworks/RSParser/XML/Feeds/RSAtomParser.m new file mode 100755 index 000000000..4def3a4ab --- /dev/null +++ b/Frameworks/RSParser/XML/Feeds/RSAtomParser.m @@ -0,0 +1,604 @@ +// +// RSAtomParser.m +// RSXML +// +// Created by Brent Simmons on 1/15/15. +// Copyright (c) 2015 Ranchero Software LLC. All rights reserved. +// + +#import +#import "RSAtomParser.h" +#import "RSSAXParser.h" +#import "FeedParser.h" +#import "RSParsedFeed.h" +#import "RSParsedArticle.h" +#import "RSXMLData.h" +#import "NSString+RSXML.h" +#import "RSDateParser.h" + + +@interface RSAtomParser () + +@property (nonatomic) NSData *feedData; +@property (nonatomic) NSString *urlString; +@property (nonatomic) BOOL endFeedFound; +@property (nonatomic) BOOL parsingXHTML; +@property (nonatomic) BOOL parsingSource; +@property (nonatomic) BOOL parsingArticle; +@property (nonatomic) BOOL parsingAuthor; +@property (nonatomic) NSMutableArray *attributesStack; +@property (nonatomic, readonly) NSDictionary *currentAttributes; +@property (nonatomic) NSMutableString *xhtmlString; +@property (nonatomic) NSString *link; +@property (nonatomic) NSString *title; +@property (nonatomic) NSMutableArray *articles; +@property (nonatomic) NSDate *dateParsed; +@property (nonatomic) RSSAXParser *parser; +@property (nonatomic, readonly) RSParsedArticle *currentArticle; +@property (nonatomic, readonly) NSDate *currentDate; + +@end + + +@implementation RSAtomParser + +#pragma mark - Class Methods + ++ (BOOL)canParseFeed:(RSXMLData *)xmlData { + + // Checking for ' entryRange.location) { + return NO; // Wrong order. + } + } + + return YES; +} + + +#pragma mark - Init + +- (instancetype)initWithXMLData:(RSXMLData *)xmlData { + + self = [super init]; + if (!self) { + return nil; + } + + _feedData = xmlData.data; + _urlString = xmlData.urlString; + _parser = [[RSSAXParser alloc] initWithDelegate:self]; + _attributesStack = [NSMutableArray new]; + _articles = [NSMutableArray new]; + + return self; +} + + +#pragma mark - API + +- (RSParsedFeed *)parseFeed:(NSError **)error { + + [self parse]; + + RSParsedFeed *parsedFeed = [[RSParsedFeed alloc] initWithURLString:self.urlString title:self.title link:self.link articles:self.articles]; + + return parsedFeed; +} + + +#pragma mark - Constants + +static NSString *kTypeKey = @"type"; +static NSString *kXHTMLType = @"xhtml"; +static NSString *kRelKey = @"rel"; +static NSString *kAlternateValue = @"alternate"; +static NSString *kHrefKey = @"href"; +static NSString *kXMLKey = @"xml"; +static NSString *kBaseKey = @"base"; +static NSString *kLangKey = @"lang"; +static NSString *kXMLBaseKey = @"xml:base"; +static NSString *kXMLLangKey = @"xml:lang"; +static NSString *kTextHTMLValue = @"text/html"; +static NSString *kRelatedValue = @"related"; +static NSString *kShortURLValue = @"shorturl"; +static NSString *kHTMLValue = @"html"; +static NSString *kEnValue = @"en"; +static NSString *kTextValue = @"text"; +static NSString *kSelfValue = @"self"; + +static const char *kID = "id"; +static const NSInteger kIDLength = 3; + +static const char *kTitle = "title"; +static const NSInteger kTitleLength = 6; + +static const char *kContent = "content"; +static const NSInteger kContentLength = 8; + +static const char *kSummary = "summary"; +static const NSInteger kSummaryLength = 8; + +static const char *kLink = "link"; +static const NSInteger kLinkLength = 5; + +static const char *kPublished = "published"; +static const NSInteger kPublishedLength = 10; + +static const char *kUpdated = "updated"; +static const NSInteger kUpdatedLength = 8; + +static const char *kAuthor = "author"; +static const NSInteger kAuthorLength = 7; + +static const char *kEntry = "entry"; +static const NSInteger kEntryLength = 6; + +static const char *kSource = "source"; +static const NSInteger kSourceLength = 7; + +static const char *kFeed = "feed"; +static const NSInteger kFeedLength = 5; + +static const char *kType = "type"; +static const NSInteger kTypeLength = 5; + +static const char *kRel = "rel"; +static const NSInteger kRelLength = 4; + +static const char *kAlternate = "alternate"; +static const NSInteger kAlternateLength = 10; + +static const char *kHref = "href"; +static const NSInteger kHrefLength = 5; + +static const char *kXML = "xml"; +static const NSInteger kXMLLength = 4; + +static const char *kBase = "base"; +static const NSInteger kBaseLength = 5; + +static const char *kLang = "lang"; +static const NSInteger kLangLength = 5; + +static const char *kTextHTML = "text/html"; +static const NSInteger kTextHTMLLength = 10; + +static const char *kRelated = "related"; +static const NSInteger kRelatedLength = 8; + +static const char *kShortURL = "shorturl"; +static const NSInteger kShortURLLength = 9; + +static const char *kHTML = "html"; +static const NSInteger kHTMLLength = 5; + +static const char *kEn = "en"; +static const NSInteger kEnLength = 3; + +static const char *kText = "text"; +static const NSInteger kTextLength = 5; + +static const char *kSelf = "self"; +static const NSInteger kSelfLength = 5; + + +#pragma mark - Parsing + +- (void)parse { + + self.dateParsed = [NSDate date]; + + @autoreleasepool { + [self.parser parseData:self.feedData]; + [self.parser finishParsing]; + } + + // Optimization: make articles do calculations on this background thread. + [self.articles makeObjectsPerformSelector:@selector(calculateArticleID)]; +} + + +- (void)addArticle { + + RSParsedArticle *article = [[RSParsedArticle alloc] initWithFeedURL:self.urlString]; + article.dateParsed = self.dateParsed; + + [self.articles addObject:article]; +} + + +- (RSParsedArticle *)currentArticle { + + return self.articles.lastObject; +} + + +- (NSDictionary *)currentAttributes { + + return self.attributesStack.lastObject; +} + + +- (NSDate *)currentDate { + + return RSDateWithBytes(self.parser.currentCharacters.bytes, self.parser.currentCharacters.length); +} + + +- (void)addFeedLink { + + if (self.link && self.link.length > 0) { + return; + } + + NSString *related = self.currentAttributes[kRelKey]; + if (related == kAlternateValue) { + self.link = self.currentAttributes[kHrefKey]; + } +} + + +- (void)addFeedTitle { + + if (self.title.length < 1) { + self.title = self.parser.currentStringWithTrimmedWhitespace; + } +} + +- (void)addLink { + + NSString *urlString = self.currentAttributes[kHrefKey]; + if (urlString.length < 1) { + return; + } + + NSString *rel = self.currentAttributes[kRelKey]; + if (rel.length < 1) { + rel = kAlternateValue; + } + + if (rel == kAlternateValue) { + if (!self.currentArticle.link) { + self.currentArticle.link = urlString; + } + } + else if (rel == kRelatedValue) { + if (!self.currentArticle.permalink) { + self.currentArticle.permalink = urlString; + } + } +} + + +- (void)addContent { + + self.currentArticle.body = [self currentStringWithHTMLEntitiesDecoded]; +} + + +- (void)addSummary { + + if (!self.currentArticle.body) { + self.currentArticle.body = [self currentStringWithHTMLEntitiesDecoded]; + } +} + + +- (NSString *)currentStringWithHTMLEntitiesDecoded { + + return [self.parser.currentStringWithTrimmedWhitespace rs_stringByDecodingHTMLEntities]; +} + + +- (void)addArticleElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix { + + if (prefix) { + return; + } + + if (RSSAXEqualTags(localName, kID, kIDLength)) { + self.currentArticle.guid = self.parser.currentStringWithTrimmedWhitespace; + } + + else if (RSSAXEqualTags(localName, kTitle, kTitleLength)) { + self.currentArticle.title = [self currentStringWithHTMLEntitiesDecoded]; + } + + else if (RSSAXEqualTags(localName, kContent, kContentLength)) { + [self addContent]; + } + + else if (RSSAXEqualTags(localName, kSummary, kSummaryLength)) { + [self addSummary]; + } + + else if (RSSAXEqualTags(localName, kLink, kLinkLength)) { + [self addLink]; + } + + else if (RSSAXEqualTags(localName, kPublished, kPublishedLength)) { + self.currentArticle.datePublished = self.currentDate; + } + + else if (RSSAXEqualTags(localName, kUpdated, kUpdatedLength)) { + self.currentArticle.dateModified = self.currentDate; + } +} + + +- (void)addXHTMLTag:(const xmlChar *)localName { + + if (!localName) { + return; + } + + [self.xhtmlString appendString:@"<"]; + [self.xhtmlString appendString:[NSString stringWithUTF8String:(const char *)localName]]; + + if (self.currentAttributes.count < 1) { + [self.xhtmlString appendString:@">"]; + return; + } + + for (NSString *oneKey in self.currentAttributes) { + + [self.xhtmlString appendString:@" "]; + + NSString *oneValue = self.currentAttributes[oneKey]; + [self.xhtmlString appendString:oneKey]; + + [self.xhtmlString appendString:@"=\""]; + + oneValue = [oneValue stringByReplacingOccurrencesOfString:@"\"" withString:@"""]; + [self.xhtmlString appendString:oneValue]; + + [self.xhtmlString appendString:@"\""]; + } + + [self.xhtmlString appendString:@">"]; +} + + +#pragma mark - RSSAXParserDelegate + +- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes { + + if (self.endFeedFound) { + return; + } + + NSDictionary *xmlAttributes = [self.parser attributesDictionary:attributes numberOfAttributes:numberOfAttributes]; + if (!xmlAttributes) { + xmlAttributes = [NSDictionary dictionary]; + } + [self.attributesStack addObject:xmlAttributes]; + + if (self.parsingXHTML) { + [self addXHTMLTag:localName]; + return; + } + + if (RSSAXEqualTags(localName, kEntry, kEntryLength)) { + self.parsingArticle = YES; + [self addArticle]; + return; + } + + if (RSSAXEqualTags(localName, kAuthor, kAuthorLength)) { + self.parsingAuthor = YES; + return; + } + + if (RSSAXEqualTags(localName, kSource, kSourceLength)) { + self.parsingSource = YES; + return; + } + + BOOL isContentTag = RSSAXEqualTags(localName, kContent, kContentLength); + BOOL isSummaryTag = RSSAXEqualTags(localName, kSummary, kSummaryLength); + if (self.parsingArticle && (isContentTag || isSummaryTag)) { + + NSString *contentType = xmlAttributes[kTypeKey]; + if ([contentType isEqualToString:kXHTMLType]) { + self.parsingXHTML = YES; + self.xhtmlString = [NSMutableString stringWithString:@""]; + return; + } + } + + if (!self.parsingArticle && RSSAXEqualTags(localName, kLink, kLinkLength)) { + [self addFeedLink]; + return; + } + + [self.parser beginStoringCharacters]; +} + + +- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri { + + if (RSSAXEqualTags(localName, kFeed, kFeedLength)) { + self.endFeedFound = YES; + return; + } + + if (self.endFeedFound) { + return; + } + + if (self.parsingXHTML) { + + BOOL isContentTag = RSSAXEqualTags(localName, kContent, kContentLength); + BOOL isSummaryTag = RSSAXEqualTags(localName, kSummary, kSummaryLength); + + if (self.parsingArticle && (isContentTag || isSummaryTag)) { + + if (isContentTag) { + self.currentArticle.body = [self.xhtmlString copy]; + } + + else if (isSummaryTag) { + if (self.currentArticle.body.length < 1) { + self.currentArticle.body = [self.xhtmlString copy]; + } + } + } + + if (isContentTag || isSummaryTag) { + self.parsingXHTML = NO; + } + + [self.xhtmlString appendString:@""]; + } + + else if (RSSAXEqualTags(localName, kAuthor, kAuthorLength)) { + self.parsingAuthor = NO; + } + + else if (RSSAXEqualTags(localName, kEntry, kEntryLength)) { + self.parsingArticle = NO; + } + + else if (self.parsingArticle && !self.parsingSource) { + [self addArticleElement:localName prefix:prefix]; + } + + else if (RSSAXEqualTags(localName, kSource, kSourceLength)) { + self.parsingSource = NO; + } + + else if (!self.parsingArticle && !self.parsingSource && RSSAXEqualTags(localName, kTitle, kTitleLength)) { + [self addFeedTitle]; + } + [self.attributesStack removeLastObject]; +} + + +- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const xmlChar *)name prefix:(const xmlChar *)prefix { + + if (prefix && RSSAXEqualTags(prefix, kXML, kXMLLength)) { + + if (RSSAXEqualTags(name, kBase, kBaseLength)) { + return kXMLBaseKey; + } + if (RSSAXEqualTags(name, kLang, kLangLength)) { + return kXMLLangKey; + } + } + + if (prefix) { + return nil; + } + + if (RSSAXEqualTags(name, kRel, kRelLength)) { + return kRelKey; + } + + if (RSSAXEqualTags(name, kType, kTypeLength)) { + return kTypeKey; + } + + if (RSSAXEqualTags(name, kHref, kHrefLength)) { + return kHrefKey; + } + + if (RSSAXEqualTags(name, kAlternate, kAlternateLength)) { + return kAlternateValue; + } + + return nil; +} + + +static BOOL equalBytes(const void *bytes1, const void *bytes2, NSUInteger length) { + + return memcmp(bytes1, bytes2, length) == 0; +} + + +- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForValue:(const void *)bytes length:(NSUInteger)length { + + static const NSUInteger alternateLength = kAlternateLength - 1; + static const NSUInteger textHTMLLength = kTextHTMLLength - 1; + static const NSUInteger relatedLength = kRelatedLength - 1; + static const NSUInteger shortURLLength = kShortURLLength - 1; + static const NSUInteger htmlLength = kHTMLLength - 1; + static const NSUInteger enLength = kEnLength - 1; + static const NSUInteger textLength = kTextLength - 1; + static const NSUInteger selfLength = kSelfLength - 1; + + if (length == alternateLength && equalBytes(bytes, kAlternate, alternateLength)) { + return kAlternateValue; + } + + if (length == textHTMLLength && equalBytes(bytes, kTextHTML, textHTMLLength)) { + return kTextHTMLValue; + } + + if (length == relatedLength && equalBytes(bytes, kRelated, relatedLength)) { + return kRelatedValue; + } + + if (length == shortURLLength && equalBytes(bytes, kShortURL, shortURLLength)) { + return kShortURLValue; + } + + if (length == htmlLength && equalBytes(bytes, kHTML, htmlLength)) { + return kHTMLValue; + } + + if (length == enLength && equalBytes(bytes, kEn, enLength)) { + return kEnValue; + } + + if (length == textLength && equalBytes(bytes, kText, textLength)) { + return kTextValue; + } + + if (length == selfLength && equalBytes(bytes, kSelf, selfLength)) { + return kSelfValue; + } + + return nil; +} + + +- (void)saxParser:(RSSAXParser *)SAXParser XMLCharactersFound:(const unsigned char *)characters length:(NSUInteger)length { + + if (self.parsingXHTML) { + [self.xhtmlString appendString:[[NSString alloc] initWithBytesNoCopy:(void *)characters length:length encoding:NSUTF8StringEncoding freeWhenDone:NO]]; + } +} + +@end diff --git a/Frameworks/RSParser/XML/Feeds/RSRSSParser.h b/Frameworks/RSParser/XML/Feeds/RSRSSParser.h new file mode 100755 index 000000000..65b20fd04 --- /dev/null +++ b/Frameworks/RSParser/XML/Feeds/RSRSSParser.h @@ -0,0 +1,13 @@ +// +// RSRSSParser.h +// RSXML +// +// Created by Brent Simmons on 1/6/15. +// Copyright (c) 2015 Ranchero Software LLC. All rights reserved. +// + +#import "FeedParser.h" + +@interface RSRSSParser : NSObject + +@end diff --git a/Frameworks/RSParser/XML/Feeds/RSRSSParser.m b/Frameworks/RSParser/XML/Feeds/RSRSSParser.m new file mode 100755 index 000000000..374f42f3d --- /dev/null +++ b/Frameworks/RSParser/XML/Feeds/RSRSSParser.m @@ -0,0 +1,469 @@ +// +// RSRSSParser.m +// RSXML +// +// Created by Brent Simmons on 1/6/15. +// Copyright (c) 2015 Ranchero Software LLC. All rights reserved. +// + +#import +#import "RSRSSParser.h" +#import "RSSAXParser.h" +#import "RSParsedFeed.h" +#import "RSParsedArticle.h" +#import "RSXMLData.h" +#import "RSXMLInternal.h" +#import "NSString+RSXML.h" +#import "RSDateParser.h" + + +@interface RSRSSParser () + +@property (nonatomic) NSData *feedData; +@property (nonatomic) NSString *urlString; +@property (nonatomic) NSDictionary *currentAttributes; +@property (nonatomic) RSSAXParser *parser; +@property (nonatomic) NSMutableArray *articles; +@property (nonatomic) BOOL parsingArticle; +@property (nonatomic, readonly) RSParsedArticle *currentArticle; +@property (nonatomic) BOOL parsingChannelImage; +@property (nonatomic, readonly) NSDate *currentDate; +@property (nonatomic) BOOL endRSSFound; +@property (nonatomic) NSString *link; +@property (nonatomic) NSString *title; +@property (nonatomic) NSDate *dateParsed; + +@end + + +@implementation RSRSSParser + +#pragma mark - Class Methods + ++ (BOOL)canParseFeed:(RSXMLData *)xmlData { + + // Checking for '' within first n characters should do it. + // TODO: handle RSS 1.0 + + @autoreleasepool { + + NSData *feedData = xmlData.data; + NSString *s = [[NSString alloc] initWithBytesNoCopy:(void *)feedData.bytes length:feedData.length encoding:NSUTF8StringEncoding freeWhenDone:NO]; + if (!s) { + s = [[NSString alloc] initWithData:feedData encoding:NSUTF8StringEncoding]; + } + if (!s) { + s = [[NSString alloc] initWithData:feedData encoding:NSUnicodeStringEncoding]; + } + if (!s) { + return NO; + } + + static const NSInteger numberOfCharactersToSearch = 4096; + NSRange rangeToSearch = NSMakeRange(0, numberOfCharactersToSearch); + if (s.length < numberOfCharactersToSearch) { + rangeToSearch.length = s.length; + } + + NSRange rssRange = [s rangeOfString:@"" options:NSLiteralSearch range:rangeToSearch]; + if (rssRange.length < 1 || channelRange.length < 1) { + return NO; + } + + if (rssRange.location > channelRange.location) { + return NO; // Wrong order. + } + } + + return YES; +} + + +#pragma mark - Init + +- (instancetype)initWithXMLData:(RSXMLData *)xmlData { + + self = [super init]; + if (!self) { + return nil; + } + + _feedData = xmlData.data; + _urlString = xmlData.urlString; + _parser = [[RSSAXParser alloc] initWithDelegate:self]; + _articles = [NSMutableArray new]; + + return self; +} + + +#pragma mark - API + +- (RSParsedFeed *)parseFeed:(NSError **)error { + + [self parse]; + + RSParsedFeed *parsedFeed = [[RSParsedFeed alloc] initWithURLString:self.urlString title:self.title link:self.link articles:self.articles]; + + return parsedFeed; +} + + +#pragma mark - Constants + +static NSString *kIsPermaLinkKey = @"isPermaLink"; +static NSString *kURLKey = @"url"; +static NSString *kLengthKey = @"length"; +static NSString *kTypeKey = @"type"; +static NSString *kFalseValue = @"false"; +static NSString *kTrueValue = @"true"; +static NSString *kContentEncodedKey = @"content:encoded"; +static NSString *kDCDateKey = @"dc:date"; +static NSString *kDCCreatorKey = @"dc:creator"; +static NSString *kRDFAboutKey = @"rdf:about"; + +static const char *kItem = "item"; +static const NSInteger kItemLength = 5; + +static const char *kImage = "image"; +static const NSInteger kImageLength = 6; + +static const char *kLink = "link"; +static const NSInteger kLinkLength = 5; + +static const char *kTitle = "title"; +static const NSInteger kTitleLength = 6; + +static const char *kDC = "dc"; +static const NSInteger kDCLength = 3; + +static const char *kCreator = "creator"; +static const NSInteger kCreatorLength = 8; + +static const char *kDate = "date"; +static const NSInteger kDateLength = 5; + +static const char *kContent = "content"; +static const NSInteger kContentLength = 8; + +static const char *kEncoded = "encoded"; +static const NSInteger kEncodedLength = 8; + +static const char *kGuid = "guid"; +static const NSInteger kGuidLength = 5; + +static const char *kPubDate = "pubDate"; +static const NSInteger kPubDateLength = 8; + +static const char *kAuthor = "author"; +static const NSInteger kAuthorLength = 7; + +static const char *kDescription = "description"; +static const NSInteger kDescriptionLength = 12; + +static const char *kRSS = "rss"; +static const NSInteger kRSSLength = 4; + +static const char *kURL = "url"; +static const NSInteger kURLLength = 4; + +static const char *kLength = "length"; +static const NSInteger kLengthLength = 7; + +static const char *kType = "type"; +static const NSInteger kTypeLength = 5; + +static const char *kIsPermaLink = "isPermaLink"; +static const NSInteger kIsPermaLinkLength = 12; + +static const char *kRDF = "rdf"; +static const NSInteger kRDFlength = 4; + +static const char *kAbout = "about"; +static const NSInteger kAboutLength = 6; + +static const char *kFalse = "false"; +static const NSInteger kFalseLength = 6; + +static const char *kTrue = "true"; +static const NSInteger kTrueLength = 5; + + +#pragma mark - Parsing + +- (void)parse { + + self.dateParsed = [NSDate date]; + + @autoreleasepool { + [self.parser parseData:self.feedData]; + [self.parser finishParsing]; + } + + // Optimization: make articles do calculations on this background thread. + [self.articles makeObjectsPerformSelector:@selector(calculateArticleID)]; +} + + +- (void)addArticle { + + RSParsedArticle *article = [[RSParsedArticle alloc] initWithFeedURL:self.urlString]; + article.dateParsed = self.dateParsed; + + [self.articles addObject:article]; +} + + +- (RSParsedArticle *)currentArticle { + + return self.articles.lastObject; +} + + +- (void)addFeedElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix { + + if (prefix != NULL) { + return; + } + + if (RSSAXEqualTags(localName, kLink, kLinkLength)) { + if (!self.link) { + self.link = self.parser.currentStringWithTrimmedWhitespace; + } + } + + else if (RSSAXEqualTags(localName, kTitle, kTitleLength)) { + self.title = self.parser.currentStringWithTrimmedWhitespace; + } +} + + +- (void)addDCElement:(const xmlChar *)localName { + + if (RSSAXEqualTags(localName, kCreator, kCreatorLength)) { + + self.currentArticle.author = self.parser.currentStringWithTrimmedWhitespace; + } + else if (RSSAXEqualTags(localName, kDate, kDateLength)) { + + self.currentArticle.datePublished = self.currentDate; + } +} + + +- (void)addGuid { + + self.currentArticle.guid = self.parser.currentStringWithTrimmedWhitespace; + + NSString *isPermaLinkValue = [self.currentAttributes rsxml_objectForCaseInsensitiveKey:@"ispermalink"]; + if (!isPermaLinkValue || ![isPermaLinkValue isEqualToString:@"false"]) { + self.currentArticle.permalink = [self urlString:self.currentArticle.guid]; + } +} + + +- (NSString *)urlString:(NSString *)s { + + /*Resolve against home page URL (if available) or feed URL.*/ + + if ([[s lowercaseString] hasPrefix:@"http"]) { + return s; + } + + if (!self.link) { + //TODO: get feed URL and use that to resolve URL.*/ + return s; + } + + NSURL *baseURL = [NSURL URLWithString:self.link]; + if (!baseURL) { + return s; + } + + NSURL *resolvedURL = [NSURL URLWithString:s relativeToURL:baseURL]; + if (resolvedURL.absoluteString) { + return resolvedURL.absoluteString; + } + + return s; +} + + +- (NSString *)currentStringWithHTMLEntitiesDecoded { + + return [self.parser.currentStringWithTrimmedWhitespace rs_stringByDecodingHTMLEntities]; +} + +- (void)addArticleElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix { + + if (RSSAXEqualTags(prefix, kDC, kDCLength)) { + + [self addDCElement:localName]; + return; + } + + if (RSSAXEqualTags(prefix, kContent, kContentLength) && RSSAXEqualTags(localName, kEncoded, kEncodedLength)) { + + self.currentArticle.body = [self currentStringWithHTMLEntitiesDecoded]; + return; + } + + if (prefix != NULL) { + return; + } + + if (RSSAXEqualTags(localName, kGuid, kGuidLength)) { + [self addGuid]; + } + else if (RSSAXEqualTags(localName, kPubDate, kPubDateLength)) { + self.currentArticle.datePublished = self.currentDate; + } + else if (RSSAXEqualTags(localName, kAuthor, kAuthorLength)) { + self.currentArticle.author = self.parser.currentStringWithTrimmedWhitespace; + } + else if (RSSAXEqualTags(localName, kLink, kLinkLength)) { + self.currentArticle.link = [self urlString:self.parser.currentStringWithTrimmedWhitespace]; + } + else if (RSSAXEqualTags(localName, kDescription, kDescriptionLength)) { + + if (!self.currentArticle.body) { + self.currentArticle.body = [self currentStringWithHTMLEntitiesDecoded]; + } + } + else if (RSSAXEqualTags(localName, kTitle, kTitleLength)) { + self.currentArticle.title = [self currentStringWithHTMLEntitiesDecoded]; + } +} + + +- (NSDate *)currentDate { + + return RSDateWithBytes(self.parser.currentCharacters.bytes, self.parser.currentCharacters.length); +} + + +#pragma mark - RSSAXParserDelegate + +- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes { + + if (self.endRSSFound) { + return; + } + + NSDictionary *xmlAttributes = nil; + if (RSSAXEqualTags(localName, kItem, kItemLength) || RSSAXEqualTags(localName, kGuid, kGuidLength)) { + xmlAttributes = [self.parser attributesDictionary:attributes numberOfAttributes:numberOfAttributes]; + } + if (self.currentAttributes != xmlAttributes) { + self.currentAttributes = xmlAttributes; + } + + if (!prefix && RSSAXEqualTags(localName, kItem, kItemLength)) { + + [self addArticle]; + self.parsingArticle = YES; + + if (xmlAttributes && xmlAttributes[kRDFAboutKey]) { /*RSS 1.0 guid*/ + self.currentArticle.guid = xmlAttributes[kRDFAboutKey]; + self.currentArticle.permalink = self.currentArticle.guid; + } + } + + else if (!prefix && RSSAXEqualTags(localName, kImage, kImageLength)) { + self.parsingChannelImage = YES; + } + + if (!self.parsingChannelImage) { + [self.parser beginStoringCharacters]; + } +} + + +- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri { + + if (self.endRSSFound) { + return; + } + + if (RSSAXEqualTags(localName, kRSS, kRSSLength)) { + self.endRSSFound = YES; + } + + else if (RSSAXEqualTags(localName, kImage, kImageLength)) { + self.parsingChannelImage = NO; + } + + else if (RSSAXEqualTags(localName, kItem, kItemLength)) { + self.parsingArticle = NO; + } + + else if (self.parsingArticle) { + [self addArticleElement:localName prefix:prefix]; + } + + else if (!self.parsingChannelImage) { + [self addFeedElement:localName prefix:prefix]; + } +} + + +- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const xmlChar *)name prefix:(const xmlChar *)prefix { + + if (RSSAXEqualTags(prefix, kRDF, kRDFlength)) { + + if (RSSAXEqualTags(name, kAbout, kAboutLength)) { + return kRDFAboutKey; + } + + return nil; + } + + if (prefix) { + return nil; + } + + if (RSSAXEqualTags(name, kIsPermaLink, kIsPermaLinkLength)) { + return kIsPermaLinkKey; + } + + if (RSSAXEqualTags(name, kURL, kURLLength)) { + return kURLKey; + } + + if (RSSAXEqualTags(name, kLength, kLengthLength)) { + return kLengthKey; + } + + if (RSSAXEqualTags(name, kType, kTypeLength)) { + return kTypeKey; + } + + return nil; +} + + +static BOOL equalBytes(const void *bytes1, const void *bytes2, NSUInteger length) { + + return memcmp(bytes1, bytes2, length) == 0; +} + + +- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForValue:(const void *)bytes length:(NSUInteger)length { + + static const NSUInteger falseLength = kFalseLength - 1; + static const NSUInteger trueLength = kTrueLength - 1; + + if (length == falseLength && equalBytes(bytes, kFalse, falseLength)) { + return kFalseValue; + } + + if (length == trueLength && equalBytes(bytes, kTrue, trueLength)) { + return kTrueValue; + } + + return nil; +} + + +@end diff --git a/Frameworks/RSParser/XML/OPML/RSOPMLAttributes.h b/Frameworks/RSParser/XML/OPML/RSOPMLAttributes.h new file mode 100755 index 000000000..0ffd20242 --- /dev/null +++ b/Frameworks/RSParser/XML/OPML/RSOPMLAttributes.h @@ -0,0 +1,36 @@ +// +// RSOPMLAttributes.h +// RSXML +// +// Created by Brent Simmons on 2/28/16. +// Copyright © 2016 Ranchero Software, LLC. All rights reserved. +// + +@import Foundation; + +// OPML allows for arbitrary attributes. +// These are the common attributes in OPML files used as RSS subscription lists. + +extern NSString *OPMLTextKey; //text +extern NSString *OPMLTitleKey; //title +extern NSString *OPMLDescriptionKey; //description +extern NSString *OPMLTypeKey; //type +extern NSString *OPMLVersionKey; //version +extern NSString *OPMLHMTLURLKey; //htmlUrl +extern NSString *OPMLXMLURLKey; //xmlUrl + + +@interface NSDictionary (RSOPMLAttributes) + +// A frequent error in OPML files is to mess up the capitalization, +// so these do a case-insensitive lookup. + +@property (nonatomic, readonly) NSString *opml_text; +@property (nonatomic, readonly) NSString *opml_title; +@property (nonatomic, readonly) NSString *opml_description; +@property (nonatomic, readonly) NSString *opml_type; +@property (nonatomic, readonly) NSString *opml_version; +@property (nonatomic, readonly) NSString *opml_htmlUrl; +@property (nonatomic, readonly) NSString *opml_xmlUrl; + +@end diff --git a/Frameworks/RSParser/XML/OPML/RSOPMLAttributes.m b/Frameworks/RSParser/XML/OPML/RSOPMLAttributes.m new file mode 100755 index 000000000..4fb5e9bd4 --- /dev/null +++ b/Frameworks/RSParser/XML/OPML/RSOPMLAttributes.m @@ -0,0 +1,66 @@ +// +// RSOPMLAttributes.m +// RSXML +// +// Created by Brent Simmons on 2/28/16. +// Copyright © 2016 Ranchero Software, LLC. All rights reserved. +// + +#import "RSOPMLAttributes.h" +#import "RSXMLInternal.h" + + +NSString *OPMLTextKey = @"text"; +NSString *OPMLTitleKey = @"title"; +NSString *OPMLDescriptionKey = @"description"; +NSString *OPMLTypeKey = @"type"; +NSString *OPMLVersionKey = @"version"; +NSString *OPMLHMTLURLKey = @"htmlUrl"; +NSString *OPMLXMLURLKey = @"xmlUrl"; + + +@implementation NSDictionary (RSOPMLAttributes) + +- (NSString *)opml_text { + + return [self rsxml_objectForCaseInsensitiveKey:OPMLTextKey]; +} + + +- (NSString *)opml_title { + + return [self rsxml_objectForCaseInsensitiveKey:OPMLTitleKey]; +} + + +- (NSString *)opml_description { + + return [self rsxml_objectForCaseInsensitiveKey:OPMLDescriptionKey]; +} + + +- (NSString *)opml_type { + + return [self rsxml_objectForCaseInsensitiveKey:OPMLTypeKey]; +} + + +- (NSString *)opml_version { + + return [self rsxml_objectForCaseInsensitiveKey:OPMLVersionKey]; +} + + +- (NSString *)opml_htmlUrl { + + return [self rsxml_objectForCaseInsensitiveKey:OPMLHMTLURLKey]; +} + + +- (NSString *)opml_xmlUrl { + + return [self rsxml_objectForCaseInsensitiveKey:OPMLXMLURLKey]; +} + + +@end diff --git a/Frameworks/RSParser/XML/OPML/RSOPMLDocument.h b/Frameworks/RSParser/XML/OPML/RSOPMLDocument.h new file mode 100755 index 000000000..bd33f1666 --- /dev/null +++ b/Frameworks/RSParser/XML/OPML/RSOPMLDocument.h @@ -0,0 +1,17 @@ +// +// RSOPMLDocument.h +// RSXML +// +// Created by Brent Simmons on 2/28/16. +// Copyright © 2016 Ranchero Software, LLC. All rights reserved. +// + +@import Foundation; +#import "RSOPMLItem.h" + + +@interface RSOPMLDocument : RSOPMLItem + +@property (nonatomic) NSString *title; + +@end diff --git a/Frameworks/RSParser/XML/OPML/RSOPMLDocument.m b/Frameworks/RSParser/XML/OPML/RSOPMLDocument.m new file mode 100755 index 000000000..ec22cd74d --- /dev/null +++ b/Frameworks/RSParser/XML/OPML/RSOPMLDocument.m @@ -0,0 +1,13 @@ +// +// RSOPMLDocument.m +// RSXML +// +// Created by Brent Simmons on 2/28/16. +// Copyright © 2016 Ranchero Software, LLC. All rights reserved. +// + +#import "RSOPMLDocument.h" + +@implementation RSOPMLDocument + +@end diff --git a/Frameworks/RSParser/XML/OPML/RSOPMLFeedSpecifier.h b/Frameworks/RSParser/XML/OPML/RSOPMLFeedSpecifier.h new file mode 100755 index 000000000..785c4cab3 --- /dev/null +++ b/Frameworks/RSParser/XML/OPML/RSOPMLFeedSpecifier.h @@ -0,0 +1,23 @@ +// +// RSOPMLFeedSpecifier.h +// RSXML +// +// Created by Brent Simmons on 2/28/16. +// Copyright © 2016 Ranchero Software, LLC. All rights reserved. +// + +@import Foundation; + + +@interface RSOPMLFeedSpecifier : NSObject + + +- (instancetype)initWithTitle:(NSString *)title feedDescription:(NSString *)feedDescription homePageURL:(NSString *)homePageURL feedURL:(NSString *)feedURL; + +@property (nonatomic, readonly) NSString *title; +@property (nonatomic, readonly) NSString *feedDescription; +@property (nonatomic, readonly) NSString *homePageURL; +@property (nonatomic, readonly) NSString *feedURL; + + +@end diff --git a/Frameworks/RSParser/XML/OPML/RSOPMLFeedSpecifier.m b/Frameworks/RSParser/XML/OPML/RSOPMLFeedSpecifier.m new file mode 100755 index 000000000..e64558930 --- /dev/null +++ b/Frameworks/RSParser/XML/OPML/RSOPMLFeedSpecifier.m @@ -0,0 +1,50 @@ +// +// RSOPMLFeedSpecifier.m +// RSXML +// +// Created by Brent Simmons on 2/28/16. +// Copyright © 2016 Ranchero Software, LLC. All rights reserved. +// + +#import "RSOPMLFeedSpecifier.h" +#import "RSXMLInternal.h" + + +@implementation RSOPMLFeedSpecifier + +- (instancetype)initWithTitle:(NSString *)title feedDescription:(NSString *)feedDescription homePageURL:(NSString *)homePageURL feedURL:(NSString *)feedURL { + + NSParameterAssert(!RSXMLIsEmpty(feedURL)); + + self = [super init]; + if (!self) { + return nil; + } + + if (RSXMLIsEmpty(title)) { + _title = nil; + } + else { + _title = title; + } + + if (RSXMLIsEmpty(feedDescription)) { + _feedDescription = nil; + } + else { + _feedDescription = feedDescription; + } + + if (RSXMLIsEmpty(homePageURL)) { + _homePageURL = nil; + } + else { + _homePageURL = homePageURL; + } + + _feedURL = feedURL; + + return self; +} + +@end diff --git a/Frameworks/RSParser/XML/OPML/RSOPMLItem.h b/Frameworks/RSParser/XML/OPML/RSOPMLItem.h new file mode 100755 index 000000000..853cf9bc4 --- /dev/null +++ b/Frameworks/RSParser/XML/OPML/RSOPMLItem.h @@ -0,0 +1,26 @@ +// +// RSOPMLItem.h +// RSXML +// +// Created by Brent Simmons on 2/28/16. +// Copyright © 2016 Ranchero Software, LLC. All rights reserved. +// + +@import Foundation; + +@class RSOPMLFeedSpecifier; + + +@interface RSOPMLItem : NSObject + +@property (nonatomic) NSDictionary *attributes; +@property (nonatomic) NSArray *children; + +- (void)addChild:(RSOPMLItem *)child; + +@property (nonatomic, readonly) RSOPMLFeedSpecifier *OPMLFeedSpecifier; //May be nil. + +@property (nonatomic, readonly) NSString *titleFromAttributes; //May be nil. +@property (nonatomic, readonly) BOOL isFolder; + +@end diff --git a/Frameworks/RSParser/XML/OPML/RSOPMLItem.m b/Frameworks/RSParser/XML/OPML/RSOPMLItem.m new file mode 100755 index 000000000..f73efbdeb --- /dev/null +++ b/Frameworks/RSParser/XML/OPML/RSOPMLItem.m @@ -0,0 +1,86 @@ +// +// RSOPMLItem.m +// RSXML +// +// Created by Brent Simmons on 2/28/16. +// Copyright © 2016 Ranchero Software, LLC. All rights reserved. +// + +#import "RSOPMLItem.h" +#import "RSOPMLAttributes.h" +#import "RSOPMLFeedSpecifier.h" +#import "RSXMLInternal.h" + + +@interface RSOPMLItem () + +@property (nonatomic) NSMutableArray *mutableChildren; + +@end + + +@implementation RSOPMLItem + +@synthesize children = _children; +@synthesize OPMLFeedSpecifier = _OPMLFeedSpecifier; + + +- (NSArray *)children { + + return [self.mutableChildren copy]; +} + + +- (void)setChildren:(NSArray *)children { + + _children = children; + self.mutableChildren = [_children mutableCopy]; +} + + +- (void)addChild:(RSOPMLItem *)child { + + if (!self.mutableChildren) { + self.mutableChildren = [NSMutableArray new]; + } + + [self.mutableChildren addObject:child]; +} + + +- (RSOPMLFeedSpecifier *)OPMLFeedSpecifier { + + if (_OPMLFeedSpecifier) { + return _OPMLFeedSpecifier; + } + + NSString *feedURL = self.attributes.opml_xmlUrl; + if (RSXMLIsEmpty(feedURL)) { + return nil; + } + + _OPMLFeedSpecifier = [[RSOPMLFeedSpecifier alloc] initWithTitle:self.attributes.opml_title feedDescription:self.attributes.opml_description homePageURL:self.attributes.opml_htmlUrl feedURL:feedURL]; + + return _OPMLFeedSpecifier; +} + +- (NSString *)titleFromAttributes { + + NSString *title = self.attributes.opml_title; + if (title) { + return title; + } + title = self.attributes.opml_text; + if (title) { + return title; + } + + return nil; +} + +- (BOOL)isFolder { + + return self.mutableChildren.count > 0; +} + +@end diff --git a/Frameworks/RSParser/XML/OPML/RSOPMLParser.h b/Frameworks/RSParser/XML/OPML/RSOPMLParser.h new file mode 100755 index 000000000..825b9ac93 --- /dev/null +++ b/Frameworks/RSParser/XML/OPML/RSOPMLParser.h @@ -0,0 +1,29 @@ +// +// RSOPMLParser.h +// RSXML +// +// Created by Brent Simmons on 7/12/15. +// Copyright © 2015 Ranchero Software, LLC. All rights reserved. +// + +@import Foundation; + + +@class RSXMLData; +@class RSOPMLDocument; + + +typedef void (^RSParsedOPMLBlock)(RSOPMLDocument *OPMLDocument, NSError *error); + +void RSParseOPML(RSXMLData *xmlData, RSParsedOPMLBlock callback); //async; calls back on main thread. + + +@interface RSOPMLParser: NSObject + +- (instancetype)initWithXMLData:(RSXMLData *)xmlData; + +@property (nonatomic, readonly) RSOPMLDocument *OPMLDocument; +@property (nonatomic, readonly) NSError *error; + +@end + diff --git a/Frameworks/RSParser/XML/OPML/RSOPMLParser.m b/Frameworks/RSParser/XML/OPML/RSOPMLParser.m new file mode 100755 index 000000000..842f30a46 --- /dev/null +++ b/Frameworks/RSParser/XML/OPML/RSOPMLParser.m @@ -0,0 +1,297 @@ +// +// RSOPMLParser.m +// RSXML +// +// Created by Brent Simmons on 7/12/15. +// Copyright © 2015 Ranchero Software, LLC. All rights reserved. +// + +#import "RSOPMLParser.h" +#import +#import "RSXMLData.h" +#import "RSSAXParser.h" +#import "RSOPMLItem.h" +#import "RSOPMLDocument.h" +#import "RSOPMLAttributes.h" +#import "RSXMLError.h" + + +void RSParseOPML(RSXMLData *xmlData, RSParsedOPMLBlock callback) { + + NSCParameterAssert(xmlData); + NSCParameterAssert(callback); + + dispatch_async(dispatch_get_global_queue(QOS_CLASS_DEFAULT, 0), ^{ + + @autoreleasepool { + + RSOPMLParser *parser = [[RSOPMLParser alloc] initWithXMLData:xmlData]; + + RSOPMLDocument *document = parser.OPMLDocument; + NSError *error = parser.error; + + dispatch_async(dispatch_get_main_queue(), ^{ + + callback(document, error); + }); + } + }); +} + + +@interface RSOPMLParser () + +@property (nonatomic, readwrite) RSOPMLDocument *OPMLDocument; +@property (nonatomic, readwrite) NSError *error; +@property (nonatomic) NSMutableArray *itemStack; + +@end + + +@implementation RSOPMLParser + + +#pragma mark - Init + +- (instancetype)initWithXMLData:(RSXMLData *)XMLData { + + self = [super init]; + if (!self) { + return nil; + } + + [self parse:XMLData]; + + return self; +} + + +#pragma mark - Private + +- (void)parse:(RSXMLData *)XMLData { + + @autoreleasepool { + + if (![self canParseData:XMLData.data]) { + + NSString *filename = nil; + NSURL *url = [NSURL URLWithString:XMLData.urlString]; + if (url && url.isFileURL) { + filename = url.path.lastPathComponent; + } + if ([XMLData.urlString hasPrefix:@"http"]) { + filename = XMLData.urlString; + } + if (!filename) { + filename = XMLData.urlString; + } + self.error = RSOPMLWrongFormatError(filename); + return; + } + + RSSAXParser *parser = [[RSSAXParser alloc] initWithDelegate:self]; + + self.itemStack = [NSMutableArray new]; + self.OPMLDocument = [RSOPMLDocument new]; + [self pushItem:self.OPMLDocument]; + + [parser parseData:XMLData.data]; + [parser finishParsing]; + } +} + +- (BOOL)canParseData:(NSData *)d { + + // Check for 0, nil); + + /*If itemStack is empty, bad things are happening. + But we still shouldn't crash in production.*/ + + if (self.itemStack.count > 0) { + [self.itemStack removeLastObject]; + } +} + + +- (RSOPMLItem *)currentItem { + + return self.itemStack.lastObject; +} + + +#pragma mark - RSSAXParserDelegate + +static const char *kOutline = "outline"; +static const char kOutlineLength = 8; + +- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes { + + if (!RSSAXEqualTags(localName, kOutline, kOutlineLength)) { + return; + } + + RSOPMLItem *item = [RSOPMLItem new]; + item.attributes = [SAXParser attributesDictionary:attributes numberOfAttributes:numberOfAttributes]; + + [[self currentItem] addChild:item]; + [self pushItem:item]; +} + + +- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri { + + if (RSSAXEqualTags(localName, kOutline, kOutlineLength)) { + [self popItem]; + } +} + + +static const char *kText = "text"; +static const NSInteger kTextLength = 5; + +static const char *kTitle = "title"; +static const NSInteger kTitleLength = 6; + +static const char *kDescription = "description"; +static const NSInteger kDescriptionLength = 12; + +static const char *kType = "type"; +static const NSInteger kTypeLength = 5; + +static const char *kVersion = "version"; +static const NSInteger kVersionLength = 8; + +static const char *kHTMLURL = "htmlUrl"; +static const NSInteger kHTMLURLLength = 8; + +static const char *kXMLURL = "xmlUrl"; +static const NSInteger kXMLURLLength = 7; + +- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const xmlChar *)name prefix:(const xmlChar *)prefix { + + if (prefix) { + return nil; + } + + size_t nameLength = strlen((const char *)name); + + if (nameLength == kTextLength - 1) { + if (RSSAXEqualTags(name, kText, kTextLength)) { + return OPMLTextKey; + } + if (RSSAXEqualTags(name, kType, kTypeLength)) { + return OPMLTypeKey; + } + } + + else if (nameLength == kTitleLength - 1) { + if (RSSAXEqualTags(name, kTitle, kTitleLength)) { + return OPMLTitleKey; + } + } + + else if (nameLength == kXMLURLLength - 1) { + if (RSSAXEqualTags(name, kXMLURL, kXMLURLLength)) { + return OPMLXMLURLKey; + } + } + + else if (nameLength == kVersionLength - 1) { + if (RSSAXEqualTags(name, kVersion, kVersionLength)) { + return OPMLVersionKey; + } + if (RSSAXEqualTags(name, kHTMLURL, kHTMLURLLength)) { + return OPMLHMTLURLKey; + } + } + + else if (nameLength == kDescriptionLength - 1) { + if (RSSAXEqualTags(name, kDescription, kDescriptionLength)) { + return OPMLDescriptionKey; + } + } + + return nil; +} + + +static const char *kRSSUppercase = "RSS"; +static const char *kRSSLowercase = "rss"; +static const NSUInteger kRSSLength = 3; +static NSString *RSSUppercaseValue = @"RSS"; +static NSString *RSSLowercaseValue = @"rss"; +static NSString *emptyString = @""; + +static BOOL equalBytes(const void *bytes1, const void *bytes2, NSUInteger length) { + + return memcmp(bytes1, bytes2, length) == 0; +} + +- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForValue:(const void *)bytes length:(NSUInteger)length { + + + if (length < 1) { + return emptyString; + } + + if (length == kRSSLength) { + + if (equalBytes(bytes, kRSSUppercase, kRSSLength)) { + return RSSUppercaseValue; + } + else if (equalBytes(bytes, kRSSLowercase, kRSSLength)) { + return RSSLowercaseValue; + } + + } + + return nil; +} + + +@end diff --git a/Frameworks/RSParser/XML/SAX/RSSAXParser.h b/Frameworks/RSParser/XML/SAX/RSSAXParser.h new file mode 100755 index 000000000..f31eb30ff --- /dev/null +++ b/Frameworks/RSParser/XML/SAX/RSSAXParser.h @@ -0,0 +1,68 @@ +// +// RSSAXParser.h +// RSXML +// +// Created by Brent Simmons on 3/25/15. +// Copyright (c) 2015 Ranchero Software, LLC. All rights reserved. +// + +@import Foundation; + +/*Thread-safe, not re-entrant. + + Calls to the delegate will happen on the same thread where the parser runs. + + This is a low-level streaming XML parser, a thin wrapper for libxml2's SAX parser. It doesn't do much Foundation-ifying quite on purpose -- because the goal is performance and low memory use. + + This class is not meant to be sub-classed. Use the delegate methods. + */ + + +@class RSSAXParser; + +@protocol RSSAXParserDelegate + +@optional + +- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const unsigned char *)localName prefix:(const unsigned char *)prefix uri:(const unsigned char *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const unsigned char **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const unsigned char **)attributes; + +- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const unsigned char *)localName prefix:(const unsigned char *)prefix uri:(const unsigned char *)uri; + +- (void)saxParser:(RSSAXParser *)SAXParser XMLCharactersFound:(const unsigned char *)characters length:(NSUInteger)length; + +- (void)saxParserDidReachEndOfDocument:(RSSAXParser *)SAXParser; /*If canceled, may not get called (but might).*/ + +- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const unsigned char *)name prefix:(const unsigned char *)prefix; /*Okay to return nil. Prefix may be nil.*/ + +- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForValue:(const void *)bytes length:(NSUInteger)length; + +@end + + +void RSSAXInitLibXMLParser(void); // Needed by RSSAXHTMLParser. + +/*For use by delegate.*/ + +BOOL RSSAXEqualTags(const unsigned char *localName, const char *tag, NSInteger tagLength); + + +@interface RSSAXParser : NSObject + +- (instancetype)initWithDelegate:(id)delegate; + +- (void)parseData:(NSData *)data; +- (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes; +- (void)finishParsing; +- (void)cancel; + +@property (nonatomic, strong, readonly) NSData *currentCharacters; /*nil if not storing characters. UTF-8 encoded.*/ +@property (nonatomic, strong, readonly) NSString *currentString; /*Convenience to get string version of currentCharacters.*/ +@property (nonatomic, strong, readonly) NSString *currentStringWithTrimmedWhitespace; + +- (void)beginStoringCharacters; /*Delegate can call from XMLStartElement. Characters will be available in XMLEndElement as currentCharacters property. Storing characters is stopped after each XMLEndElement.*/ + +/*Delegate can call from within XMLStartElement. Returns nil if numberOfAttributes < 1.*/ + +- (NSDictionary *)attributesDictionary:(const unsigned char **)attributes numberOfAttributes:(NSInteger)numberOfAttributes; + +@end diff --git a/Frameworks/RSParser/XML/SAX/RSSAXParser.m b/Frameworks/RSParser/XML/SAX/RSSAXParser.m new file mode 100755 index 000000000..5bbbc6e67 --- /dev/null +++ b/Frameworks/RSParser/XML/SAX/RSSAXParser.m @@ -0,0 +1,346 @@ +// +// RSSAXParser.m +// RSXML +// +// Created by Brent Simmons on 3/25/15. +// Copyright (c) 2015 Ranchero Software, LLC. All rights reserved. +// + +#import +#import +#import "RSSAXParser.h" +#import "RSXMLInternal.h" + + +@interface RSSAXParser () + +@property (nonatomic, weak) id delegate; +@property (nonatomic, assign) xmlParserCtxtPtr context; +@property (nonatomic, assign) BOOL storingCharacters; +@property (nonatomic) NSMutableData *characters; +@property (nonatomic) BOOL delegateRespondsToInternedStringMethod; +@property (nonatomic) BOOL delegateRespondsToInternedStringForValueMethod; +@property (nonatomic) BOOL delegateRespondsToStartElementMethod; +@property (nonatomic) BOOL delegateRespondsToEndElementMethod; +@property (nonatomic) BOOL delegateRespondsToCharactersFoundMethod; +@property (nonatomic) BOOL delegateRespondsToEndOfDocumentMethod; + +@end + + +@implementation RSSAXParser + ++ (void)initialize { + + RSSAXInitLibXMLParser(); +} + + +#pragma mark - Init + +- (instancetype)initWithDelegate:(id)delegate { + + self = [super init]; + if (self == nil) + return nil; + + _delegate = delegate; + + if ([_delegate respondsToSelector:@selector(saxParser:internedStringForName:prefix:)]) { + _delegateRespondsToInternedStringMethod = YES; + } + if ([_delegate respondsToSelector:@selector(saxParser:internedStringForValue:length:)]) { + _delegateRespondsToInternedStringForValueMethod = YES; + } + if ([_delegate respondsToSelector:@selector(saxParser:XMLStartElement:prefix:uri:numberOfNamespaces:namespaces:numberOfAttributes:numberDefaulted:attributes:)]) { + _delegateRespondsToStartElementMethod = YES; + } + if ([_delegate respondsToSelector:@selector(saxParser:XMLEndElement:prefix:uri:)]) { + _delegateRespondsToEndElementMethod = YES; + } + if ([_delegate respondsToSelector:@selector(saxParser:XMLCharactersFound:length:)]) { + _delegateRespondsToCharactersFoundMethod = YES; + } + if ([_delegate respondsToSelector:@selector(saxParserDidReachEndOfDocument:)]) { + _delegateRespondsToEndOfDocumentMethod = YES; + } + + return self; +} + + +#pragma mark - Dealloc + +- (void)dealloc { + if (_context != nil) { + xmlFreeParserCtxt(_context); + _context = nil; + } + _delegate = nil; +} + + +#pragma mark - API + +static xmlSAXHandler saxHandlerStruct; + +- (void)parseData:(NSData *)data { + + [self parseBytes:data.bytes numberOfBytes:data.length]; +} + + +- (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes { + + if (self.context == nil) { + + self.context = xmlCreatePushParserCtxt(&saxHandlerStruct, (__bridge void *)self, nil, 0, nil); + xmlCtxtUseOptions(self.context, XML_PARSE_RECOVER | XML_PARSE_NOENT); + } + + @autoreleasepool { + xmlParseChunk(self.context, (const char *)bytes, (int)numberOfBytes, 0); + } +} + + +- (void)finishParsing { + + NSAssert(self.context != nil, nil); + if (self.context == nil) + return; + + @autoreleasepool { + xmlParseChunk(self.context, nil, 0, 1); + xmlFreeParserCtxt(self.context); + self.context = nil; + self.characters = nil; + } +} + + +- (void)cancel { + + @autoreleasepool { + xmlStopParser(self.context); + } +} + + +- (void)beginStoringCharacters { + self.storingCharacters = YES; + self.characters = [NSMutableData new]; +} + + +- (void)endStoringCharacters { + self.storingCharacters = NO; + self.characters = nil; +} + + +- (NSData *)currentCharacters { + + if (!self.storingCharacters) { + return nil; + } + + return self.characters; +} + + +- (NSString *)currentString { + + NSData *d = self.currentCharacters; + if (RSXMLIsEmpty(d)) { + return nil; + } + + return [[NSString alloc] initWithData:d encoding:NSUTF8StringEncoding]; +} + + +- (NSString *)currentStringWithTrimmedWhitespace { + + return [self.currentString stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]]; +} + + +#pragma mark - Attributes Dictionary + +- (NSDictionary *)attributesDictionary:(const xmlChar **)attributes numberOfAttributes:(NSInteger)numberOfAttributes { + + if (numberOfAttributes < 1 || !attributes) { + return nil; + } + + NSMutableDictionary *d = [NSMutableDictionary new]; + + @autoreleasepool { + NSInteger i = 0, j = 0; + for (i = 0, j = 0; i < numberOfAttributes; i++, j+=5) { + + NSUInteger lenValue = (NSUInteger)(attributes[j + 4] - attributes[j + 3]); + NSString *value = nil; + + if (self.delegateRespondsToInternedStringForValueMethod) { + value = [self.delegate saxParser:self internedStringForValue:(const void *)attributes[j + 3] length:lenValue]; + } + if (!value) { + value = [[NSString alloc] initWithBytes:(const void *)attributes[j + 3] length:lenValue encoding:NSUTF8StringEncoding]; + } + + NSString *attributeName = nil; + + if (self.delegateRespondsToInternedStringMethod) { + attributeName = [self.delegate saxParser:self internedStringForName:(const xmlChar *)attributes[j] prefix:(const xmlChar *)attributes[j + 1]]; + } + + if (!attributeName) { + attributeName = [NSString stringWithUTF8String:(const char *)attributes[j]]; + if (attributes[j + 1]) { + NSString *attributePrefix = [NSString stringWithUTF8String:(const char *)attributes[j + 1]]; + attributeName = [NSString stringWithFormat:@"%@:%@", attributePrefix, attributeName]; + } + } + + if (value && attributeName) { + d[attributeName] = value; + } + } + } + + return d; +} + + +#pragma mark - Equal Tags + +BOOL RSSAXEqualTags(const xmlChar *localName, const char *tag, NSInteger tagLength) { + + if (!localName) { + return NO; + } + return !strncmp((const char *)localName, tag, (size_t)tagLength); +} + + +#pragma mark - Callbacks + +- (void)xmlEndDocument { + + @autoreleasepool { + if (self.delegateRespondsToEndOfDocumentMethod) { + [self.delegate saxParserDidReachEndOfDocument:self]; + } + + [self endStoringCharacters]; + } +} + + +- (void)xmlCharactersFound:(const xmlChar *)ch length:(NSUInteger)length { + + @autoreleasepool { + if (self.storingCharacters) { + [self.characters appendBytes:(const void *)ch length:length]; + } + + if (self.delegateRespondsToCharactersFoundMethod) { + [self.delegate saxParser:self XMLCharactersFound:ch length:length]; + } + } +} + + +- (void)xmlStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(int)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(int)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes { + + @autoreleasepool { + if (self.delegateRespondsToStartElementMethod) { + + [self.delegate saxParser:self XMLStartElement:localName prefix:prefix uri:uri numberOfNamespaces:numberOfNamespaces namespaces:namespaces numberOfAttributes:numberOfAttributes numberDefaulted:numberDefaulted attributes:attributes]; + } + } +} + + +- (void)xmlEndElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri { + + @autoreleasepool { + if (self.delegateRespondsToEndElementMethod) { + [self.delegate saxParser:self XMLEndElement:localName prefix:prefix uri:uri]; + } + + [self endStoringCharacters]; + } +} + + +@end + + +static void startElementSAX(void *context, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI, int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int nb_defaulted, const xmlChar **attributes) { + + [(__bridge RSSAXParser *)context xmlStartElement:localname prefix:prefix uri:URI numberOfNamespaces:nb_namespaces namespaces:namespaces numberOfAttributes:nb_attributes numberDefaulted:nb_defaulted attributes:attributes]; +} + + +static void endElementSAX(void *context, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI) { + [(__bridge RSSAXParser *)context xmlEndElement:localname prefix:prefix uri:URI]; +} + + +static void charactersFoundSAX(void *context, const xmlChar *ch, int len) { + [(__bridge RSSAXParser *)context xmlCharactersFound:ch length:(NSUInteger)len]; +} + + +static void endDocumentSAX(void *context) { + [(__bridge RSSAXParser *)context xmlEndDocument]; +} + + +static xmlSAXHandler saxHandlerStruct = { + nil, /* internalSubset */ + nil, /* isStandalone */ + nil, /* hasInternalSubset */ + nil, /* hasExternalSubset */ + nil, /* resolveEntity */ + nil, /* getEntity */ + nil, /* entityDecl */ + nil, /* notationDecl */ + nil, /* attributeDecl */ + nil, /* elementDecl */ + nil, /* unparsedEntityDecl */ + nil, /* setDocumentLocator */ + nil, /* startDocument */ + endDocumentSAX, /* endDocument */ + nil, /* startElement*/ + nil, /* endElement */ + nil, /* reference */ + charactersFoundSAX, /* characters */ + nil, /* ignorableWhitespace */ + nil, /* processingInstruction */ + nil, /* comment */ + nil, /* warning */ + nil, /* error */ + nil, /* fatalError //: unused error() get all the errors */ + nil, /* getParameterEntity */ + nil, /* cdataBlock */ + nil, /* externalSubset */ + XML_SAX2_MAGIC, + nil, + startElementSAX, /* startElementNs */ + endElementSAX, /* endElementNs */ + nil /* serror */ +}; + + +void RSSAXInitLibXMLParser(void) { + + static dispatch_once_t onceToken; + dispatch_once(&onceToken, ^{ + xmlInitParser(); + }); +} +