Make RSParser a local module.

This commit is contained in:
Brent Simmons
2024-11-09 09:37:57 -08:00
parent 7751bff896
commit e2b76c1e08
126 changed files with 37555 additions and 38 deletions

View File

@@ -0,0 +1,24 @@
//
// FeedParser.h
// RSXML
//
// Created by Brent Simmons on 7/12/15.
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
@class RSParsedFeed;
@class RSXMLData;
@protocol FeedParser <NSObject>
+ (BOOL)canParseFeed:(RSXMLData * _Nonnull)xmlData;
- (nonnull instancetype)initWithXMLData:(RSXMLData * _Nonnull)xmlData;
- (nullable RSParsedFeed *)parseFeed:(NSError * _Nullable * _Nullable)error;
@end

View File

@@ -0,0 +1,26 @@
//
// NSData+RSParser.h
// RSParser
//
// Created by Brent Simmons on 6/24/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
@interface NSData (RSParser)
- (BOOL)isProbablyHTML;
- (BOOL)isProbablyXML;
- (BOOL)isProbablyJSON;
- (BOOL)isProbablyJSONFeed;
- (BOOL)isProbablyRSSInJSON;
- (BOOL)isProbablyRSS;
- (BOOL)isProbablyAtom;
@end

View File

@@ -0,0 +1,139 @@
//
// NSData+RSParser.m
// RSParser
//
// Created by Brent Simmons on 6/24/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
#import "NSData+RSParser.h"
/* TODO: find real-world cases where the isProbably* cases fail when they should succeed, and add them to tests.*/
static BOOL bytesAreProbablyHTML(const char *bytes, NSUInteger numberOfBytes);
static BOOL bytesAreProbablyXML(const char *bytes, NSUInteger numberOfBytes);
static BOOL bytesStartWithStringIgnoringWhitespace(const char *string, const char *bytes, NSUInteger numberOfBytes);
static BOOL didFindString(const char *string, const char *bytes, NSUInteger numberOfBytes);
static BOOL bytesStartWithRSS(const char *bytes, NSUInteger numberOfBytes);
static BOOL bytesStartWithRDF(const char *bytes, NSUInteger numberOfBytes);
static BOOL bytesStartWithAtom(const char *bytes, NSUInteger numberOfBytes);
@implementation NSData (RSParser)
- (BOOL)isProbablyHTML {
return bytesAreProbablyHTML(self.bytes, self.length);
}
- (BOOL)isProbablyXML {
return bytesAreProbablyXML(self.bytes, self.length);
}
- (BOOL)isProbablyJSON {
return bytesStartWithStringIgnoringWhitespace("{", self.bytes, self.length);
}
- (BOOL)isProbablyJSONFeed {
if (![self isProbablyJSON]) {
return NO;
}
return didFindString("://jsonfeed.org/version/", self.bytes, self.length) || didFindString(":\\/\\/jsonfeed.org\\/version\\/", self.bytes, self.length);
}
- (BOOL)isProbablyRSSInJSON {
if (![self isProbablyJSON]) {
return NO;
}
const char *bytes = self.bytes;
NSUInteger length = self.length;
return didFindString("rss", bytes, length) && didFindString("channel", bytes, length) && didFindString("item", bytes, length);
}
- (BOOL)isProbablyRSS {
if (didFindString("<rss", self.bytes, self.length) || didFindString("<rdf:RDF", self.bytes, self.length)) {
return YES;
}
// At this writing (7 Dec. 2017), https://www.natashatherobot.com/feed/ is missing an opening <rss> tag, but it should be parsed anyway. It does have some other distinct RSS markers we can find.
return (didFindString("<channel>", self.bytes, self.length) && didFindString("<pubDate>", self.bytes, self.length));
}
- (BOOL)isProbablyAtom {
return didFindString("<feed", self.bytes, self.length);
}
@end
static BOOL didFindString(const char *string, const char *bytes, NSUInteger numberOfBytes) {
char *foundString = strnstr(bytes, string, numberOfBytes);
return foundString != NULL;
}
static BOOL bytesStartWithStringIgnoringWhitespace(const char *string, const char *bytes, NSUInteger numberOfBytes) {
NSUInteger i = 0;
for (i = 0; i < numberOfBytes; i++) {
const char ch = bytes[i];
if (ch == ' ' || ch == '\r' || ch == '\n' || ch == '\t') {
continue;
}
if (ch == string[0]) {
return strnstr(bytes, string, numberOfBytes) == bytes + i;
}
// Allow for a BOM of up to four bytes. ASSUMPTION: BOM will only be at the start of the data.
if (i < 4) continue;
break;
}
return NO;
}
static BOOL bytesAreProbablyHTML(const char *bytes, NSUInteger numberOfBytes) {
if (didFindString("<html", bytes, numberOfBytes)) {
return YES;
}
if (didFindString("<HTML", bytes, numberOfBytes)) {
return YES;
}
if (didFindString("<body", bytes, numberOfBytes)) {
return YES;
}
if (didFindString("<meta", bytes, numberOfBytes)) {
return YES;
}
if (didFindString("<", bytes, numberOfBytes)) {
if (didFindString("doctype html", bytes, numberOfBytes)) {
return YES;
}
if (didFindString("DOCTYPE html", bytes, numberOfBytes)) {
return YES;
}
if (didFindString("DOCTYPE HTML", bytes, numberOfBytes)) {
return YES;
}
}
return NO;
}
static BOOL bytesAreProbablyXML(const char *bytes, NSUInteger numberOfBytes) {
return bytesStartWithStringIgnoringWhitespace("<?xml", bytes, numberOfBytes);
}

View File

@@ -0,0 +1,26 @@
//
// NSString+RSParser.h
// RSParser
//
// Created by Brent Simmons on 9/25/15.
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
NS_ASSUME_NONNULL_BEGIN
@interface NSString (RSParser)
- (NSString *)rsparser_stringByDecodingHTMLEntities;
/// Returns a copy of \c self with <, >, and & entity-encoded.
@property (readonly, copy) NSString *rsparser_stringByEncodingRequiredEntities;
- (NSString *)rsparser_md5Hash;
- (BOOL)rsparser_contains:(NSString *)s;
@end
NS_ASSUME_NONNULL_END

View File

@@ -0,0 +1,348 @@
//
// NSString+RSParser.m
// RSParser
//
// Created by Brent Simmons on 9/25/15.
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
//
#import "NSString+RSParser.h"
#import <CommonCrypto/CommonDigest.h>
@interface NSScanner (RSParser)
- (BOOL)rs_scanEntityValue:(NSString * _Nullable * _Nullable)decodedEntity;
@end
@implementation NSString (RSParser)
- (BOOL)rsparser_contains:(NSString *)s {
return [self rangeOfString:s].location != NSNotFound;
}
- (NSString *)rsparser_stringByDecodingHTMLEntities {
@autoreleasepool {
NSScanner *scanner = [[NSScanner alloc] initWithString:self];
scanner.charactersToBeSkipped = nil;
NSMutableString *result = [[NSMutableString alloc] init];
while (true) {
NSString *scannedString = nil;
if ([scanner scanUpToString:@"&" intoString:&scannedString]) {
[result appendString:scannedString];
}
if (scanner.isAtEnd) {
break;
}
NSUInteger savedScanLocation = scanner.scanLocation;
NSString *decodedEntity = nil;
if ([scanner rs_scanEntityValue:&decodedEntity]) {
[result appendString:decodedEntity];
}
else {
[result appendString:@"&"];
scanner.scanLocation = savedScanLocation + 1;
}
if (scanner.isAtEnd) {
break;
}
}
if ([self isEqualToString:result]) {
return self;
}
return [result copy];
}
}
static NSDictionary *RSEntitiesDictionary(void);
static NSString *RSParserStringWithValue(uint32_t value);
- (NSString * _Nullable)rs_stringByDecodingEntity {
// self may or may not have outer & and ; characters.
NSMutableString *s = [self mutableCopy];
if ([s hasPrefix:@"&"]) {
[s deleteCharactersInRange:NSMakeRange(0, 1)];
}
if ([s hasSuffix:@";"]) {
[s deleteCharactersInRange:NSMakeRange(s.length - 1, 1)];
}
NSDictionary *entitiesDictionary = RSEntitiesDictionary();
NSString *decodedEntity = entitiesDictionary[self];
if (decodedEntity) {
return decodedEntity;
}
if ([s hasPrefix:@"#x"] || [s hasPrefix:@"#X"]) { // Hex
NSScanner *scanner = [[NSScanner alloc] initWithString:s];
scanner.charactersToBeSkipped = [NSCharacterSet characterSetWithCharactersInString:@"#xX"];
unsigned int hexValue = 0;
if ([scanner scanHexInt:&hexValue]) {
return RSParserStringWithValue((uint32_t)hexValue);
}
return nil;
}
else if ([s hasPrefix:@"#"]) {
[s deleteCharactersInRange:NSMakeRange(0, 1)];
NSInteger value = s.integerValue;
if (value < 1) {
return nil;
}
return RSParserStringWithValue((uint32_t)value);
}
return nil;
}
- (NSString *)rsparser_stringByEncodingRequiredEntities {
NSMutableString *result = [NSMutableString string];
for (NSUInteger i = 0; i < self.length; ++i) {
unichar c = [self characterAtIndex:i];
switch (c) {
case '<':
[result appendString:@"&lt;"];
break;
case '>':
[result appendString:@"&gt;"];
break;
case '&':
[result appendString:@"&amp;"];
break;
default:
[result appendFormat:@"%C", c];
break;
}
}
return [result copy];
}
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
- (NSData *)_rsparser_md5HashData {
NSData *data = [self dataUsingEncoding:NSUTF8StringEncoding];
unsigned char hash[CC_MD5_DIGEST_LENGTH];
CC_MD5(data.bytes, (CC_LONG)data.length, hash);
return [NSData dataWithBytes:(const void *)hash length:CC_MD5_DIGEST_LENGTH];
}
#pragma GCC diagnostic pop
- (NSString *)rsparser_md5Hash {
NSData *md5Data = [self _rsparser_md5HashData];
const Byte *bytes = md5Data.bytes;
return [NSString stringWithFormat:@"%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x", bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], bytes[8], bytes[9], bytes[10], bytes[11], bytes[12], bytes[13], bytes[14], bytes[15]];
}
@end
@implementation NSScanner (RSParser)
- (BOOL)rs_scanEntityValue:(NSString * _Nullable * _Nullable)decodedEntity {
NSString *s = self.string;
NSUInteger initialScanLocation = self.scanLocation;
static NSUInteger maxEntityLength = 20; // Its probably smaller, but this is just for sanity.
while (true) {
unichar ch = [s characterAtIndex:self.scanLocation];
if ([NSCharacterSet.whitespaceAndNewlineCharacterSet characterIsMember:ch]) {
break;
}
if (ch == ';') {
if (!decodedEntity) {
return YES;
}
NSString *rawEntity = [s substringWithRange:NSMakeRange(initialScanLocation + 1, (self.scanLocation - initialScanLocation) - 1)];
*decodedEntity = [rawEntity rs_stringByDecodingEntity];
self.scanLocation = self.scanLocation + 1;
return *decodedEntity != nil;
}
self.scanLocation = self.scanLocation + 1;
if (self.scanLocation - initialScanLocation > maxEntityLength) {
break;
}
if (self.isAtEnd) {
break;
}
}
return NO;
}
@end
static NSString *RSParserStringWithValue(uint32_t value) {
// From WebCore's HTMLEntityParser
static const uint32_t windowsLatin1ExtensionArray[32] = {
0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87
0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F
0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97
0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178 // 98-9F
};
if ((value & ~0x1Fu) == 0x80u) { // value >= 128 && value < 160
value = windowsLatin1ExtensionArray[value - 0x80];
}
value = CFSwapInt32HostToLittle(value);
return [[NSString alloc] initWithBytes:&value length:sizeof(value) encoding:NSUTF32LittleEndianStringEncoding];
}
static NSDictionary *RSEntitiesDictionary(void) {
static NSDictionary *entitiesDictionary = nil;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
entitiesDictionary = @{
// Named entities
@"AElig": @"Æ",
@"Aacute": @"Á",
@"Acirc": @"Â",
@"Agrave": @"À",
@"Aring": @"Å",
@"Atilde": @"Ã",
@"Auml": @"Ä",
@"Ccedil": @"Ç",
@"Dstrok": @"Ð",
@"ETH": @"Ð",
@"Eacute": @"É",
@"Ecirc": @"Ê",
@"Egrave": @"È",
@"Euml": @"Ë",
@"Iacute": @"Í",
@"Icirc": @"Î",
@"Igrave": @"Ì",
@"Iuml": @"Ï",
@"Ntilde": @"Ñ",
@"Oacute": @"Ó",
@"Ocirc": @"Ô",
@"Ograve": @"Ò",
@"Oslash": @"Ø",
@"Otilde": @"Õ",
@"Ouml": @"Ö",
@"Pi": @"Π",
@"THORN": @"Þ",
@"Uacute": @"Ú",
@"Ucirc": @"Û",
@"Ugrave": @"Ù",
@"Uuml": @"Ü",
@"Yacute": @"Y",
@"aacute": @"á",
@"acirc": @"â",
@"acute": @"´",
@"aelig": @"æ",
@"agrave": @"à",
@"amp": @"&",
@"apos": @"'",
@"aring": @"å",
@"atilde": @"ã",
@"auml": @"ä",
@"brkbar": @"¦",
@"brvbar": @"¦",
@"ccedil": @"ç",
@"cedil": @"¸",
@"cent": @"¢",
@"copy": @"©",
@"curren": @"¤",
@"deg": @"°",
@"die": @"¨",
@"divide": @"÷",
@"eacute": @"é",
@"ecirc": @"ê",
@"egrave": @"è",
@"eth": @"ð",
@"euml": @"ë",
@"euro": @"€",
@"frac12": @"½",
@"frac14": @"¼",
@"frac34": @"¾",
@"gt": @">",
@"hearts": @"♥",
@"hellip": @"…",
@"iacute": @"í",
@"icirc": @"î",
@"iexcl": @"¡",
@"igrave": @"ì",
@"iquest": @"¿",
@"iuml": @"ï",
@"laquo": @"«",
@"ldquo": @"“",
@"lsquo": @"",
@"lt": @"<",
@"macr": @"¯",
@"mdash": @"—",
@"micro": @"µ",
@"middot": @"·",
@"ndash": @"",
@"not": @"¬",
@"ntilde": @"ñ",
@"oacute": @"ó",
@"ocirc": @"ô",
@"ograve": @"ò",
@"ordf": @"ª",
@"ordm": @"º",
@"oslash": @"ø",
@"otilde": @"õ",
@"ouml": @"ö",
@"para": @"¶",
@"pi": @"π",
@"plusmn": @"±",
@"pound": @"£",
@"quot": @"\"",
@"raquo": @"»",
@"rdquo": @"”",
@"reg": @"®",
@"rsquo": @"",
@"sect": @"§",
@"shy": RSParserStringWithValue(173),
@"sup1": @"¹",
@"sup2": @"²",
@"sup3": @"³",
@"szlig": @"ß",
@"thorn": @"þ",
@"times": @"×",
@"trade": @"™",
@"uacute": @"ú",
@"ucirc": @"û",
@"ugrave": @"ù",
@"uml": @"¨",
@"uuml": @"ü",
@"yacute": @"y",
@"yen": @"¥",
@"yuml": @"ÿ",
@"infin": @"∞",
@"nbsp": RSParserStringWithValue(160)
};
});
return entitiesDictionary;
}

View File

@@ -0,0 +1,23 @@
//
// ParserData.h
// RSParser
//
// Created by Brent Simmons on 10/4/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
NS_ASSUME_NONNULL_BEGIN
@interface ParserData : NSObject
@property (nonatomic, readonly) NSString *url;
@property (nonatomic, readonly) NSData *data;
- (instancetype)initWithURL:(NSString *)url data:(NSData *)data;
@end
NS_ASSUME_NONNULL_END

View File

@@ -0,0 +1,26 @@
//
// ParserData.m
// RSParser
//
// Created by Brent Simmons on 10/4/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
#import "ParserData.h"
@implementation ParserData
- (instancetype)initWithURL:(NSString *)url data:(NSData *)data {
self = [super init];
if (!self) {
return nil;
}
_url = url;
_data = data;
return self;
}
@end

View File

@@ -0,0 +1,18 @@
//
// RSAtomParser.h
// RSParser
//
// Created by Brent Simmons on 1/15/15.
// Copyright (c) 2015 Ranchero Software LLC. All rights reserved.
//
@import Foundation;
@class ParserData;
@class RSParsedFeed;
@interface RSAtomParser : NSObject
+ (RSParsedFeed *)parseFeedWithData:(ParserData *)parserData;
@end

View File

@@ -0,0 +1,679 @@
//
// RSAtomParser.m
// RSParser
//
// Created by Brent Simmons on 1/15/15.
// Copyright (c) 2015 Ranchero Software LLC. All rights reserved.
//
#import "RSAtomParser.h"
#import "RSSAXParser.h"
#import "RSParsedFeed.h"
#import "RSParsedArticle.h"
#import "NSString+RSParser.h"
#import "RSDateParser.h"
#import "ParserData.h"
#import "RSParsedEnclosure.h"
#import "RSParsedAuthor.h"
#import <libxml/xmlstring.h>
@interface RSAtomParser () <RSSAXParserDelegate>
@property (nonatomic) NSData *feedData;
@property (nonatomic) NSString *urlString;
@property (nonatomic) BOOL endFeedFound;
@property (nonatomic) BOOL parsingXHTML;
@property (nonatomic) BOOL parsingSource;
@property (nonatomic) BOOL parsingArticle;
@property (nonatomic) BOOL parsingAuthor;
@property (nonatomic) NSMutableArray *attributesStack;
@property (nonatomic, readonly) NSDictionary *currentAttributes;
@property (nonatomic) NSMutableString *xhtmlString;
@property (nonatomic) NSString *link;
@property (nonatomic) NSString *title;
@property (nonatomic) NSMutableArray *articles;
@property (nonatomic) NSDate *dateParsed;
@property (nonatomic) RSSAXParser *parser;
@property (nonatomic, readonly) RSParsedArticle *currentArticle;
@property (nonatomic) RSParsedAuthor *currentAuthor;
@property (nonatomic, readonly) NSDate *currentDate;
@property (nonatomic) NSString *language;
@end
@implementation RSAtomParser
#pragma mark - Class Methods
+ (RSParsedFeed *)parseFeedWithData:(ParserData *)parserData {
RSAtomParser *parser = [[[self class] alloc] initWithParserData:parserData];
return [parser parseFeed];
}
#pragma mark - Init
- (instancetype)initWithParserData:(ParserData *)parserData {
self = [super init];
if (!self) {
return nil;
}
_feedData = parserData.data;
_urlString = parserData.url;
_parser = [[RSSAXParser alloc] initWithDelegate:self];
_attributesStack = [NSMutableArray new];
_articles = [NSMutableArray new];
return self;
}
#pragma mark - API
- (RSParsedFeed *)parseFeed {
[self parse];
RSParsedFeed *parsedFeed = [[RSParsedFeed alloc] initWithURLString:self.urlString title:self.title link:self.link language:self.language articles:self.articles];
return parsedFeed;
}
#pragma mark - Constants
static NSString *kTypeKey = @"type";
static NSString *kXHTMLType = @"xhtml";
static NSString *kRelKey = @"rel";
static NSString *kAlternateValue = @"alternate";
static NSString *kHrefKey = @"href";
static NSString *kXMLKey = @"xml";
static NSString *kBaseKey = @"base";
static NSString *kLangKey = @"lang";
static NSString *kXMLBaseKey = @"xml:base";
static NSString *kXMLLangKey = @"xml:lang";
static NSString *kTextHTMLValue = @"text/html";
static NSString *kRelatedValue = @"related";
static NSString *kEnclosureValue = @"enclosure";
static NSString *kShortURLValue = @"shorturl";
static NSString *kHTMLValue = @"html";
static NSString *kEnValue = @"en";
static NSString *kTextValue = @"text";
static NSString *kSelfValue = @"self";
static NSString *kLengthKey = @"length";
static NSString *kTitleKey = @"title";
static const char *kID = "id";
static const NSInteger kIDLength = 3;
static const char *kTitle = "title";
static const NSInteger kTitleLength = 6;
static const char *kContent = "content";
static const NSInteger kContentLength = 8;
static const char *kSummary = "summary";
static const NSInteger kSummaryLength = 8;
static const char *kLink = "link";
static const NSInteger kLinkLength = 5;
static const char *kPublished = "published";
static const NSInteger kPublishedLength = 10;
static const char *kIssued = "issued";
static const NSInteger kIssuedLength = 7;
static const char *kUpdated = "updated";
static const NSInteger kUpdatedLength = 8;
static const char *kModified = "modified";
static const NSInteger kModifiedLength = 9;
static const char *kAuthor = "author";
static const NSInteger kAuthorLength = 7;
static const char *kName = "name";
static const NSInteger kNameLength = 5;
static const char *kEmail = "email";
static const NSInteger kEmailLength = 6;
static const char *kURI = "uri";
static const NSInteger kURILength = 4;
static const char *kEntry = "entry";
static const NSInteger kEntryLength = 6;
static const char *kSource = "source";
static const NSInteger kSourceLength = 7;
static const char *kFeed = "feed";
static const NSInteger kFeedLength = 5;
static const char *kType = "type";
static const NSInteger kTypeLength = 5;
static const char *kRel = "rel";
static const NSInteger kRelLength = 4;
static const char *kAlternate = "alternate";
static const NSInteger kAlternateLength = 10;
static const char *kHref = "href";
static const NSInteger kHrefLength = 5;
static const char *kXML = "xml";
static const NSInteger kXMLLength = 4;
static const char *kBase = "base";
static const NSInteger kBaseLength = 5;
static const char *kLang = "lang";
static const NSInteger kLangLength = 5;
static const char *kTextHTML = "text/html";
static const NSInteger kTextHTMLLength = 10;
static const char *kRelated = "related";
static const NSInteger kRelatedLength = 8;
static const char *kShortURL = "shorturl";
static const NSInteger kShortURLLength = 9;
static const char *kHTML = "html";
static const NSInteger kHTMLLength = 5;
static const char *kEn = "en";
static const NSInteger kEnLength = 3;
static const char *kText = "text";
static const NSInteger kTextLength = 5;
static const char *kSelf = "self";
static const NSInteger kSelfLength = 5;
static const char *kEnclosure = "enclosure";
static const NSInteger kEnclosureLength = 10;
static const char *kLength = "length";
static const NSInteger kLengthLength = 7;
#pragma mark - Parsing
- (void)parse {
self.dateParsed = [NSDate date];
@autoreleasepool {
[self.parser parseData:self.feedData];
[self.parser finishParsing];
}
}
- (void)addArticle {
RSParsedArticle *article = [[RSParsedArticle alloc] initWithFeedURL:self.urlString];
article.dateParsed = self.dateParsed;
[self.articles addObject:article];
}
- (RSParsedArticle *)currentArticle {
return self.articles.lastObject;
}
- (NSDictionary *)currentAttributes {
return self.attributesStack.lastObject;
}
- (NSDate *)currentDate {
return RSDateWithBytes(self.parser.currentCharacters.bytes, self.parser.currentCharacters.length);
}
- (void)addFeedLink {
if (self.link && self.link.length > 0) {
return;
}
NSString *related = self.currentAttributes[kRelKey];
if (related == kAlternateValue) {
self.link = self.currentAttributes[kHrefKey];
}
}
- (void)addFeedTitle {
if (self.title.length < 1) {
self.title = [self currentString];
}
}
- (void)addFeedLanguage {
if (self.language.length < 0) {
self.language = self.currentAttributes[kXMLLangKey]
;
}
}
- (void)addLink {
NSDictionary *attributes = self.currentAttributes;
NSString *urlString = attributes[kHrefKey];
if (urlString.length < 1) {
return;
}
RSParsedArticle *article = self.currentArticle;
NSString *rel = attributes[kRelKey];
if (rel.length < 1) {
rel = kAlternateValue;
}
if (rel == kRelatedValue) {
if (!article.link) {
article.link = urlString;
}
}
else if (rel == kAlternateValue) {
if (!article.permalink) {
article.permalink = urlString;
}
}
else if (rel == kEnclosureValue) {
RSParsedEnclosure *enclosure = [self enclosureWithURLString:urlString attributes:attributes];
[article addEnclosure:enclosure];
}
}
- (RSParsedEnclosure *)enclosureWithURLString:(NSString *)urlString attributes:(NSDictionary *)attributes {
RSParsedEnclosure *enclosure = [[RSParsedEnclosure alloc] init];
enclosure.url = urlString;
enclosure.title = attributes[kTitleKey];
enclosure.mimeType = attributes[kTypeKey];
enclosure.length = [attributes[kLengthKey] integerValue];
return enclosure;
}
- (void)addContent {
self.currentArticle.body = [self currentString];
}
- (void)addSummary {
if (!self.currentArticle.body) {
self.currentArticle.body = [self currentString];
}
}
- (NSString *)currentString {
return self.parser.currentStringWithTrimmedWhitespace;
}
- (void)addArticleElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix {
if (prefix) {
return;
}
if (RSSAXEqualTags(localName, kID, kIDLength)) {
self.currentArticle.guid = [self currentString];
}
else if (RSSAXEqualTags(localName, kTitle, kTitleLength)) {
self.currentArticle.title = [self currentString];
}
else if (RSSAXEqualTags(localName, kContent, kContentLength)) {
[self addContent];
}
else if (RSSAXEqualTags(localName, kSummary, kSummaryLength)) {
[self addSummary];
}
else if (RSSAXEqualTags(localName, kLink, kLinkLength)) {
[self addLink];
}
else if (RSSAXEqualTags(localName, kPublished, kPublishedLength)) {
self.currentArticle.datePublished = self.currentDate;
}
else if (RSSAXEqualTags(localName, kUpdated, kUpdatedLength)) {
self.currentArticle.dateModified = self.currentDate;
}
// Atom 0.3 dates
else if (RSSAXEqualTags(localName, kIssued, kIssuedLength)) {
if (!self.currentArticle.datePublished) {
self.currentArticle.datePublished = self.currentDate;
}
}
else if (RSSAXEqualTags(localName, kModified, kModifiedLength)) {
if (!self.currentArticle.dateModified) {
self.currentArticle.dateModified = self.currentDate;
}
}
}
- (void)addXHTMLTag:(const xmlChar *)localName {
if (!localName) {
return;
}
[self.xhtmlString appendString:@"<"];
[self.xhtmlString appendString:[NSString stringWithUTF8String:(const char *)localName]];
if (self.currentAttributes.count < 1) {
[self.xhtmlString appendString:@">"];
return;
}
for (NSString *oneKey in self.currentAttributes) {
[self.xhtmlString appendString:@" "];
NSString *oneValue = self.currentAttributes[oneKey];
[self.xhtmlString appendString:oneKey];
[self.xhtmlString appendString:@"=\""];
oneValue = [oneValue stringByReplacingOccurrencesOfString:@"\"" withString:@"&quot;"];
[self.xhtmlString appendString:oneValue];
[self.xhtmlString appendString:@"\""];
}
[self.xhtmlString appendString:@">"];
}
#pragma mark - RSSAXParserDelegate
- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes {
if (self.endFeedFound) {
return;
}
NSDictionary *xmlAttributes = [self.parser attributesDictionary:attributes numberOfAttributes:numberOfAttributes];
if (!xmlAttributes) {
xmlAttributes = [NSDictionary dictionary];
}
[self.attributesStack addObject:xmlAttributes];
if (self.parsingXHTML) {
[self addXHTMLTag:localName];
return;
}
if (RSSAXEqualTags(localName, kEntry, kEntryLength)) {
self.parsingArticle = YES;
[self addArticle];
return;
}
if (RSSAXEqualTags(localName, kAuthor, kAuthorLength)) {
self.parsingAuthor = YES;
self.currentAuthor = [[RSParsedAuthor alloc] init];
return;
}
if (RSSAXEqualTags(localName, kSource, kSourceLength)) {
self.parsingSource = YES;
return;
}
BOOL isContentTag = RSSAXEqualTags(localName, kContent, kContentLength);
BOOL isSummaryTag = RSSAXEqualTags(localName, kSummary, kSummaryLength);
if (self.parsingArticle && (isContentTag || isSummaryTag)) {
if (isContentTag) {
self.currentArticle.language = xmlAttributes[kXMLLangKey];
}
NSString *contentType = xmlAttributes[kTypeKey];
if ([contentType isEqualToString:kXHTMLType]) {
self.parsingXHTML = YES;
self.xhtmlString = [NSMutableString stringWithString:@""];
return;
}
}
if (!self.parsingArticle && RSSAXEqualTags(localName, kLink, kLinkLength)) {
[self addFeedLink];
return;
}
if (RSSAXEqualTags(localName, kFeed, kFeedLength)) {
[self addFeedLanguage];
}
[self.parser beginStoringCharacters];
}
- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri {
if (RSSAXEqualTags(localName, kFeed, kFeedLength)) {
self.endFeedFound = YES;
return;
}
if (self.endFeedFound) {
return;
}
if (self.parsingXHTML) {
BOOL isContentTag = RSSAXEqualTags(localName, kContent, kContentLength);
BOOL isSummaryTag = RSSAXEqualTags(localName, kSummary, kSummaryLength);
if (self.parsingArticle && (isContentTag || isSummaryTag)) {
if (isContentTag) {
self.currentArticle.body = [self.xhtmlString copy];
}
else if (isSummaryTag) {
if (self.currentArticle.body.length < 1) {
self.currentArticle.body = [self.xhtmlString copy];
}
}
}
if (isContentTag || isSummaryTag) {
self.parsingXHTML = NO;
}
[self.xhtmlString appendString:@"</"];
[self.xhtmlString appendString:[NSString stringWithUTF8String:(const char *)localName]];
[self.xhtmlString appendString:@">"];
}
else if (self.parsingAuthor) {
if (RSSAXEqualTags(localName, kAuthor, kAuthorLength)) {
self.parsingAuthor = NO;
RSParsedAuthor *author = self.currentAuthor;
if (author.name || author.emailAddress || author.url) {
[self.currentArticle addAuthor:author];
}
self.currentAuthor = nil;
}
else if (RSSAXEqualTags(localName, kName, kNameLength)) {
self.currentAuthor.name = [self currentString];
}
else if (RSSAXEqualTags(localName, kEmail, kEmailLength)) {
self.currentAuthor.emailAddress = [self currentString];
}
else if (RSSAXEqualTags(localName, kURI, kURILength)) {
self.currentAuthor.url = [self currentString];
}
}
else if (RSSAXEqualTags(localName, kEntry, kEntryLength)) {
self.parsingArticle = NO;
}
else if (self.parsingArticle && !self.parsingSource) {
[self addArticleElement:localName prefix:prefix];
}
else if (RSSAXEqualTags(localName, kSource, kSourceLength)) {
self.parsingSource = NO;
}
else if (!self.parsingArticle && !self.parsingSource && RSSAXEqualTags(localName, kTitle, kTitleLength)) {
[self addFeedTitle];
}
[self.attributesStack removeLastObject];
}
- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const xmlChar *)name prefix:(const xmlChar *)prefix {
if (prefix && RSSAXEqualTags(prefix, kXML, kXMLLength)) {
if (RSSAXEqualTags(name, kBase, kBaseLength)) {
return kXMLBaseKey;
}
if (RSSAXEqualTags(name, kLang, kLangLength)) {
return kXMLLangKey;
}
}
if (prefix) {
return nil;
}
if (RSSAXEqualTags(name, kRel, kRelLength)) {
return kRelKey;
}
if (RSSAXEqualTags(name, kType, kTypeLength)) {
return kTypeKey;
}
if (RSSAXEqualTags(name, kHref, kHrefLength)) {
return kHrefKey;
}
if (RSSAXEqualTags(name, kAlternate, kAlternateLength)) {
return kAlternateValue;
}
if (RSSAXEqualTags(name, kLength, kLengthLength)) {
return kLengthKey;
}
if (RSSAXEqualTags(name, kTitle, kTitleLength)) {
return kTitleKey;
}
return nil;
}
static BOOL equalBytes(const void *bytes1, const void *bytes2, NSUInteger length) {
return memcmp(bytes1, bytes2, length) == 0;
}
- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForValue:(const void *)bytes length:(NSUInteger)length {
static const NSUInteger alternateLength = kAlternateLength - 1;
static const NSUInteger textHTMLLength = kTextHTMLLength - 1;
static const NSUInteger relatedLength = kRelatedLength - 1;
static const NSUInteger shortURLLength = kShortURLLength - 1;
static const NSUInteger htmlLength = kHTMLLength - 1;
static const NSUInteger enLength = kEnLength - 1;
static const NSUInteger textLength = kTextLength - 1;
static const NSUInteger selfLength = kSelfLength - 1;
static const NSUInteger enclosureLength = kEnclosureLength - 1;
if (length == alternateLength && equalBytes(bytes, kAlternate, alternateLength)) {
return kAlternateValue;
}
if (length == enclosureLength && equalBytes(bytes, kEnclosure, enclosureLength)) {
return kEnclosureValue;
}
if (length == textHTMLLength && equalBytes(bytes, kTextHTML, textHTMLLength)) {
return kTextHTMLValue;
}
if (length == relatedLength && equalBytes(bytes, kRelated, relatedLength)) {
return kRelatedValue;
}
if (length == shortURLLength && equalBytes(bytes, kShortURL, shortURLLength)) {
return kShortURLValue;
}
if (length == htmlLength && equalBytes(bytes, kHTML, htmlLength)) {
return kHTMLValue;
}
if (length == enLength && equalBytes(bytes, kEn, enLength)) {
return kEnValue;
}
if (length == textLength && equalBytes(bytes, kText, textLength)) {
return kTextValue;
}
if (length == selfLength && equalBytes(bytes, kSelf, selfLength)) {
return kSelfValue;
}
return nil;
}
- (void)saxParser:(RSSAXParser *)SAXParser XMLCharactersFound:(const unsigned char *)characters length:(NSUInteger)length {
if (self.parsingXHTML) {
NSString *s = [[NSString alloc] initWithBytesNoCopy:(void *)characters length:length encoding:NSUTF8StringEncoding freeWhenDone:NO];
if (s == nil) {
return;
}
// libxml decodes all entities; we need to re-encode certain characters
// (<, >, and &) when inside XHTML text content.
[self.xhtmlString appendString:s.rsparser_stringByEncodingRequiredEntities];
}
}
@end

View File

@@ -0,0 +1,22 @@
//
// RSDateParser.h
// RSParser
//
// Created by Brent Simmons on 3/25/15.
// Copyright (c) 2015 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
// Common web dates -- RFC 822 and 8601 -- are handled here: the formats you find in JSON and XML feeds.
// These may return nil. They may also return garbage, given bad input.
NSDate *RSDateWithString(NSString *dateString);
// If you're using a SAX parser, you have the bytes and don't need to convert to a string first.
// It's faster and uses less memory.
// (Assumes bytes are UTF-8 or ASCII. If you're using the libxml SAX parser, this will work.)
NSDate *RSDateWithBytes(const char *bytes, NSUInteger numberOfBytes);

View File

@@ -0,0 +1,461 @@
//
// RSDateParser.m
// RSParser
//
// Created by Brent Simmons on 3/25/15.
// Copyright (c) 2015 Ranchero Software, LLC. All rights reserved.
//
#import "RSDateParser.h"
#import <time.h>
typedef struct {
const char *abbreviation;
const NSInteger offsetHours;
const NSInteger offsetMinutes;
} RSTimeZoneAbbreviationAndOffset;
#define kNumberOfTimeZones 96
static const RSTimeZoneAbbreviationAndOffset timeZoneTable[kNumberOfTimeZones] = {
{"GMT", 0, 0}, //Most common at top, for performance
{"PDT", -7, 0}, {"PST", -8, 0}, {"EST", -5, 0}, {"EDT", -4, 0},
{"MDT", -6, 0}, {"MST", -7, 0}, {"CST", -6, 0}, {"CDT", -5, 0},
{"ACT", -8, 0}, {"AFT", 4, 30}, {"AMT", 4, 0}, {"ART", -3, 0},
{"AST", 3, 0}, {"AZT", 4, 0}, {"BIT", -12, 0}, {"BDT", 8, 0},
{"ACST", 9, 30}, {"AEST", 10, 0}, {"AKST", -9, 0}, {"AMST", 5, 0},
{"AWST", 8, 0}, {"AZOST", -1, 0}, {"BIOT", 6, 0}, {"BRT", -3, 0},
{"BST", 6, 0}, {"BTT", 6, 0}, {"CAT", 2, 0}, {"CCT", 6, 30},
{"CET", 1, 0}, {"CEST", 2, 0}, {"CHAST", 12, 45}, {"ChST", 10, 0},
{"CIST", -8, 0}, {"CKT", -10, 0}, {"CLT", -4, 0}, {"CLST", -3, 0},
{"COT", -5, 0}, {"COST", -4, 0}, {"CVT", -1, 0}, {"CXT", 7, 0},
{"EAST", -6, 0}, {"EAT", 3, 0}, {"ECT", -4, 0}, {"EEST", 3, 0},
{"EET", 2, 0}, {"FJT", 12, 0}, {"FKST", -4, 0}, {"GALT", -6, 0},
{"GET", 4, 0}, {"GFT", -3, 0}, {"GILT", 7, 0}, {"GIT", -9, 0},
{"GST", -2, 0}, {"GYT", -4, 0}, {"HAST", -10, 0}, {"HKT", 8, 0},
{"HMT", 5, 0}, {"IRKT", 8, 0}, {"IRST", 3, 30}, {"IST", 2, 0},
{"JST", 9, 0}, {"KRAT", 7, 0}, {"KST", 9, 0}, {"LHST", 10, 30},
{"LINT", 14, 0}, {"MAGT", 11, 0}, {"MIT", -9, 30}, {"MSK", 3, 0},
{"MUT", 4, 0}, {"NDT", -2, 30}, {"NFT", 11, 30}, {"NPT", 5, 45},
{"NT", -3, 30}, {"OMST", 6, 0}, {"PETT", 12, 0}, {"PHOT", 13, 0},
{"PKT", 5, 0}, {"RET", 4, 0}, {"SAMT", 4, 0}, {"SAST", 2, 0},
{"SBT", 11, 0}, {"SCT", 4, 0}, {"SLT", 5, 30}, {"SST", 8, 0},
{"TAHT", -10, 0}, {"THA", 7, 0}, {"UYT", -3, 0}, {"UYST", -2, 0},
{"VET", -4, 30}, {"VLAT", 10, 0}, {"WAT", 1, 0}, {"WET", 0, 0},
{"WEST", 1, 0}, {"YAKT", 9, 0}, {"YEKT", 5, 0}
}; /*See http://en.wikipedia.org/wiki/List_of_time_zone_abbreviations for list*/
#pragma mark - Parser
enum {
RSJanuary = 1,
RSFebruary,
RSMarch,
RSApril,
RSMay,
RSJune,
RSJuly,
RSAugust,
RSSeptember,
RSOctober,
RSNovember,
RSDecember
};
static NSInteger nextMonthValue(const char *bytes, NSUInteger numberOfBytes, NSUInteger startingIndex, NSUInteger *finalIndex) {
/*Months are 1-based -- January is 1, Dec is 12.
Lots of short-circuits here. Not strict. GIGO.*/
NSUInteger i;// = startingIndex;
NSUInteger numberOfAlphaCharactersFound = 0;
char monthCharacters[3] = {0, 0, 0};
for (i = startingIndex; i < numberOfBytes; i++) {
*finalIndex = i;
char character = bytes[i];
BOOL isAlphaCharacter = (BOOL)isalpha(character);
if (!isAlphaCharacter && numberOfAlphaCharactersFound < 1)
continue;
if (!isAlphaCharacter && numberOfAlphaCharactersFound > 0)
break;
numberOfAlphaCharactersFound++;
if (numberOfAlphaCharactersFound == 1) {
if (character == 'F' || character == 'f')
return RSFebruary;
if (character == 'S' || character == 's')
return RSSeptember;
if (character == 'O' || character == 'o')
return RSOctober;
if (character == 'N' || character == 'n')
return RSNovember;
if (character == 'D' || character == 'd')
return RSDecember;
}
monthCharacters[numberOfAlphaCharactersFound - 1] = character;
if (numberOfAlphaCharactersFound >=3)
break;
}
if (numberOfAlphaCharactersFound < 2)
return NSNotFound;
if (monthCharacters[0] == 'J' || monthCharacters[0] == 'j') { //Jan, Jun, Jul
if (monthCharacters[1] == 'a' || monthCharacters[1] == 'A')
return RSJanuary;
if (monthCharacters[1] == 'u' || monthCharacters[1] == 'U') {
if (monthCharacters[2] == 'n' || monthCharacters[2] == 'N')
return RSJune;
return RSJuly;
}
return RSJanuary;
}
if (monthCharacters[0] == 'M' || monthCharacters[0] == 'm') { //March, May
if (monthCharacters[2] == 'y' || monthCharacters[2] == 'Y')
return RSMay;
return RSMarch;
}
if (monthCharacters[0] == 'A' || monthCharacters[0] == 'a') { //April, August
if (monthCharacters[1] == 'u' || monthCharacters[1] == 'U')
return RSAugust;
return RSApril;
}
return RSJanuary; //should never get here
}
static NSInteger nextNumericValue(const char *bytes, NSUInteger numberOfBytes, NSUInteger startingIndex, NSUInteger maximumNumberOfDigits, NSUInteger *finalIndex) {
/*maximumNumberOfDigits has a maximum limit of 4 (for time zone offsets and years).
*finalIndex will be the index of the last character looked at.*/
if (maximumNumberOfDigits > 4)
maximumNumberOfDigits = 4;
NSUInteger i = 0;
NSUInteger numberOfDigitsFound = 0;
NSInteger digits[4] = {0, 0, 0, 0};
for (i = startingIndex; i < numberOfBytes; i++) {
*finalIndex = i;
BOOL isDigit = (BOOL)isdigit(bytes[i]);
if (!isDigit && numberOfDigitsFound < 1)
continue;
if (!isDigit && numberOfDigitsFound > 0)
break;
digits[numberOfDigitsFound] = bytes[i] - 48; // '0' is 48
numberOfDigitsFound++;
if (numberOfDigitsFound >= maximumNumberOfDigits)
break;
}
if (numberOfDigitsFound < 1)
return NSNotFound;
if (numberOfDigitsFound == 1)
return digits[0];
if (numberOfDigitsFound == 2)
return (digits[0] * 10) + digits[1];
if (numberOfDigitsFound == 3)
return (digits[0] * 100) + (digits[1] * 10) + digits[2];
return (digits[0] * 1000) + (digits[1] * 100) + (digits[2] * 10) + digits[3];
}
static BOOL hasAtLeastOneAlphaCharacter(const char *s) {
NSUInteger length = strlen(s);
NSUInteger i = 0;
for (i = 0; i < length; i++) {
if (isalpha(s[i]))
return YES;
}
return NO;
}
#pragma mark - Time Zones and offsets
static NSInteger offsetInSecondsForTimeZoneAbbreviation(const char *abbreviation) {
/*Linear search should be fine. It's a C array, and short (under 100 items).
Most common time zones are at the beginning of the array. (We can tweak this as needed.)*/
NSUInteger i;
for (i = 0; i < kNumberOfTimeZones; i++) {
RSTimeZoneAbbreviationAndOffset zone = timeZoneTable[i];
if (strcmp(abbreviation, zone.abbreviation) == 0) {
if (zone.offsetHours < 0)
return (zone.offsetHours * 60 * 60) - (zone.offsetMinutes * 60);
return (zone.offsetHours * 60 * 60) + (zone.offsetMinutes * 60);
}
}
return 0;
}
static NSInteger offsetInSecondsForOffsetCharacters(const char *timeZoneCharacters) {
BOOL isPlus = timeZoneCharacters[0] == '+';
NSUInteger finalIndex = 0;
NSInteger hours = nextNumericValue(timeZoneCharacters, strlen(timeZoneCharacters), 0, 2, &finalIndex);
NSInteger minutes = nextNumericValue(timeZoneCharacters, strlen(timeZoneCharacters), finalIndex + 1, 2, &finalIndex);
if (hours == NSNotFound)
hours = 0;
if (minutes == NSNotFound)
minutes = 0;
if (hours == 0 && minutes == 0)
return 0;
NSInteger seconds = (hours * 60 * 60) + (minutes * 60);
if (!isPlus)
seconds = 0 - seconds;
return seconds;
}
static const char *rs_GMT = "GMT";
static const char *rs_UTC = "UTC";
static NSInteger parsedTimeZoneOffset(const char *bytes, NSUInteger numberOfBytes, NSUInteger startingIndex) {
/*Examples: GMT Z +0000 -0000 +07:00 -0700 PDT EST
Parse into char[5] -- drop any colon characters. If numeric, calculate seconds from GMT.
If alpha, special-case GMT and Z, otherwise look up in time zone list to get offset.*/
char timeZoneCharacters[6] = {0, 0, 0, 0, 0, 0}; //nil-terminated last character
NSUInteger i = 0;
NSUInteger numberOfCharactersFound = 0;
for (i = startingIndex; i < numberOfBytes; i++) {
char ch = bytes[i];
if (ch == ':' || ch == ' ')
continue;
if (isdigit(ch) || isalpha(ch) || ch == '+' || ch == '-') {
numberOfCharactersFound++;
timeZoneCharacters[numberOfCharactersFound - 1] = ch;
}
if (numberOfCharactersFound >= 5)
break;
}
if (numberOfCharactersFound < 1 || timeZoneCharacters[0] == 'Z' || timeZoneCharacters[0] == 'z')
return 0;
if (strcasestr(timeZoneCharacters, rs_GMT) != nil || strcasestr(timeZoneCharacters, rs_UTC))
return 0;
if (hasAtLeastOneAlphaCharacter(timeZoneCharacters))
return offsetInSecondsForTimeZoneAbbreviation(timeZoneCharacters);
return offsetInSecondsForOffsetCharacters(timeZoneCharacters);
}
#pragma mark - Date Creation
static NSDate *dateWithYearMonthDayHourMinuteSecondAndTimeZoneOffset(NSInteger year, NSInteger month, NSInteger day, NSInteger hour, NSInteger minute, NSInteger second, NSInteger milliseconds, NSInteger timeZoneOffset) {
struct tm timeInfo;
timeInfo.tm_sec = (int)second;
timeInfo.tm_min = (int)minute;
timeInfo.tm_hour = (int)hour;
timeInfo.tm_mday = (int)day;
timeInfo.tm_mon = (int)(month - 1); //It's 1-based coming in
timeInfo.tm_year = (int)(year - 1900); //see time.h -- it's years since 1900
timeInfo.tm_wday = -1;
timeInfo.tm_yday = -1;
timeInfo.tm_isdst = -1;
timeInfo.tm_gmtoff = 0;//[timeZone secondsFromGMT];
timeInfo.tm_zone = nil;
NSTimeInterval rawTime = (NSTimeInterval)(timegm(&timeInfo) - timeZoneOffset); //timegm instead of mktime (which uses local time zone)
if (rawTime == (time_t)ULONG_MAX) {
/*NSCalendar is super-amazingly-slow (which is partly why RSDateParser exists), so this is used only when the date is far enough in the future (19 January 2038 03:14:08Z on 32-bit systems) that timegm fails. If profiling says that this is a performance issue, then you've got a weird app that needs to work with dates far in the future.*/
NSDateComponents *dateComponents = [NSDateComponents new];
dateComponents.timeZone = [NSTimeZone timeZoneForSecondsFromGMT:timeZoneOffset];
dateComponents.year = year;
dateComponents.month = month;
dateComponents.day = day;
dateComponents.hour = hour;
dateComponents.minute = minute;
dateComponents.second = second + (milliseconds / 1000);
return [[NSCalendar autoupdatingCurrentCalendar] dateFromComponents:dateComponents];
}
if (milliseconds > 0) {
rawTime += ((float)milliseconds / 1000.0f);
}
return [NSDate dateWithTimeIntervalSince1970:rawTime];
}
#pragma mark - Standard Formats
static NSDate *RSParsePubDateWithBytes(const char *bytes, NSUInteger numberOfBytes) {
/*@"EEE',' dd MMM yyyy HH':'mm':'ss ZZZ"
@"EEE, dd MMM yyyy HH:mm:ss zzz"
@"dd MMM yyyy HH:mm zzz"
@"dd MMM yyyy HH:mm ZZZ"
@"EEE, dd MMM yyyy"
@"EEE, dd MMM yyyy HH:mm zzz"
etc.*/
NSUInteger finalIndex = 0;
NSInteger day = 1;
NSInteger month = RSJanuary;
NSInteger year = 1970;
NSInteger hour = 0;
NSInteger minute = 0;
NSInteger second = 0;
NSInteger timeZoneOffset = 0;
day = nextNumericValue(bytes, numberOfBytes, 0, 2, &finalIndex);
if (day < 1 || day == NSNotFound)
day = 1;
month = nextMonthValue(bytes, numberOfBytes, finalIndex + 1, &finalIndex);
year = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 4, &finalIndex);
hour = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex);
if (hour == NSNotFound)
hour = 0;
minute = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex);
if (minute == NSNotFound)
minute = 0;
NSUInteger currentIndex = finalIndex + 1;
BOOL hasSeconds = (currentIndex < numberOfBytes) && (bytes[currentIndex] == ':');
if (hasSeconds)
second = nextNumericValue(bytes, numberOfBytes, currentIndex, 2, &finalIndex);
currentIndex = finalIndex + 1;
BOOL hasTimeZone = (currentIndex < numberOfBytes) && (bytes[currentIndex] == ' ');
if (hasTimeZone)
timeZoneOffset = parsedTimeZoneOffset(bytes, numberOfBytes, currentIndex);
return dateWithYearMonthDayHourMinuteSecondAndTimeZoneOffset(year, month, day, hour, minute, second, 0, timeZoneOffset);
}
static NSDate *RSParseW3CWithBytes(const char *bytes, NSUInteger numberOfBytes) {
/*@"yyyy'-'MM'-'dd'T'HH':'mm':'ss"
@"yyyy-MM-dd'T'HH:mm:sszzz"
@"yyyy-MM-dd'T'HH:mm:ss'.'SSSzzz"
etc.*/
NSUInteger finalIndex = 0;
NSInteger day = 1;
NSInteger month = RSJanuary;
NSInteger year = 1970;
NSInteger hour = 0;
NSInteger minute = 0;
NSInteger second = 0;
NSInteger milliseconds = 0;
NSInteger timeZoneOffset = 0;
year = nextNumericValue(bytes, numberOfBytes, 0, 4, &finalIndex);
month = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex);
day = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex);
hour = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex);
minute = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex);
second = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex);
NSUInteger currentIndex = finalIndex + 1;
BOOL hasMilliseconds = (currentIndex < numberOfBytes) && (bytes[currentIndex] == '.');
if (hasMilliseconds) {
milliseconds = nextNumericValue(bytes, numberOfBytes, currentIndex, 3, &finalIndex);
currentIndex = finalIndex + 1;
}
timeZoneOffset = parsedTimeZoneOffset(bytes, numberOfBytes, currentIndex);
return dateWithYearMonthDayHourMinuteSecondAndTimeZoneOffset(year, month, day, hour, minute, second, milliseconds, timeZoneOffset);
}
static BOOL dateIsPubDate(const char *bytes, NSUInteger numberOfBytes) {
NSUInteger i = 0;
for (i = 0; i < numberOfBytes; i++) {
if (bytes[i] == ' ' || bytes[i] == ',')
return YES;
}
return NO;
}
static BOOL dateIsW3CDate(const char *bytes, NSUInteger numberOfBytes) {
// Something like 2010-11-17T08:40:07-05:00
// But might be missing T character in the middle.
// Looks for four digits in a row followed by a -.
for (NSUInteger i = 0; i < numberOfBytes; i++) {
char ch = bytes[i];
if (ch == ' ' || ch == '\r' || ch == '\n' || ch == '\t') {
continue;
}
if (numberOfBytes - i < 5) {
return NO;
}
return isdigit(ch) && isdigit(bytes[i + 1]) && isdigit(bytes[i + 2]) && isdigit(bytes[i + 3]) && bytes[i + 4] == '-';
}
return NO;
}
static BOOL numberOfBytesIsOutsideReasonableRange(NSUInteger numberOfBytes) {
return numberOfBytes < 6 || numberOfBytes > 150;
}
#pragma mark - API
NSDate *RSDateWithBytes(const char *bytes, NSUInteger numberOfBytes) {
if (numberOfBytesIsOutsideReasonableRange(numberOfBytes))
return nil;
if (dateIsW3CDate(bytes, numberOfBytes)) {
return RSParseW3CWithBytes(bytes, numberOfBytes);
}
if (dateIsPubDate(bytes, numberOfBytes))
return RSParsePubDateWithBytes(bytes, numberOfBytes);
// Fallback, in case our detection fails.
return RSParseW3CWithBytes(bytes, numberOfBytes);
}
NSDate *RSDateWithString(NSString *dateString) {
const char *utf8String = [dateString UTF8String];
return RSDateWithBytes(utf8String, strlen(utf8String));
}

View File

@@ -0,0 +1,35 @@
//
// RSHTMLLinkParser.h
// RSParser
//
// Created by Brent Simmons on 8/7/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
NS_ASSUME_NONNULL_BEGIN
/*Returns all <a href="some_url">some_text</a> as RSHTMLLink object array.*/
@class ParserData;
@class RSHTMLLink;
@interface RSHTMLLinkParser : NSObject
+ (NSArray <RSHTMLLink *> *)htmlLinksWithParserData:(ParserData *)parserData;
@end
@interface RSHTMLLink : NSObject
// Any of these, even urlString, may be nil, because HTML can be bad.
@property (nonatomic, nullable, readonly) NSString *urlString; //absolute
@property (nonatomic, nullable, readonly) NSString *text;
@property (nonatomic, nullable, readonly) NSString *title; //title attribute inside anchor tag
@end
NS_ASSUME_NONNULL_END

View File

@@ -0,0 +1,154 @@
//
// RSHTMLLinkParser.m
// RSParser
//
// Created by Brent Simmons on 8/7/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
#import "RSHTMLLinkParser.h"
#import "RSSAXHTMLParser.h"
#import "RSSAXParser.h"
#import "RSParserInternal.h"
#import "ParserData.h"
#import <libxml/xmlstring.h>
@interface RSHTMLLinkParser() <RSSAXHTMLParserDelegate>
@property (nonatomic, readonly) NSMutableArray *links;
@property (nonatomic, readonly) ParserData *parserData;
@property (nonatomic, readonly) NSMutableArray *dictionaries;
@property (nonatomic, readonly) NSURL *baseURL;
@end
@interface RSHTMLLink()
@property (nonatomic, readwrite) NSString *urlString; //absolute
@property (nonatomic, readwrite) NSString *text;
@property (nonatomic, readwrite) NSString *title; //title attribute inside anchor tag
@end
@implementation RSHTMLLinkParser
#pragma mark - Class Methods
+ (NSArray *)htmlLinksWithParserData:(ParserData *)parserData {
RSHTMLLinkParser *parser = [[self alloc] initWithParserData:parserData];
return parser.links;
}
#pragma mark - Init
- (instancetype)initWithParserData:(ParserData *)parserData {
NSParameterAssert(parserData.data);
NSParameterAssert(parserData.url);
self = [super init];
if (!self) {
return nil;
}
_links = [NSMutableArray new];
_parserData = parserData;
_dictionaries = [NSMutableArray new];
_baseURL = [NSURL URLWithString:parserData.url];
[self parse];
return self;
}
#pragma mark - Parse
- (void)parse {
RSSAXHTMLParser *parser = [[RSSAXHTMLParser alloc] initWithDelegate:self];
[parser parseData:self.parserData.data];
[parser finishParsing];
}
- (RSHTMLLink *)currentLink {
return self.links.lastObject;
}
static NSString *kHrefKey = @"href";
- (NSString *)urlStringFromDictionary:(NSDictionary *)d {
NSString *href = [d rsparser_objectForCaseInsensitiveKey:kHrefKey];
if (!href) {
return nil;
}
NSURL *absoluteURL = [NSURL URLWithString:href relativeToURL:self.baseURL];
return absoluteURL.absoluteString;
}
static NSString *kTitleKey = @"title";
- (NSString *)titleFromDictionary:(NSDictionary *)d {
return [d rsparser_objectForCaseInsensitiveKey:kTitleKey];
}
- (void)handleLinkAttributes:(NSDictionary *)d {
RSHTMLLink *link = self.currentLink;
link.urlString = [self urlStringFromDictionary:d];
link.title = [self titleFromDictionary:d];
}
static const char *kAnchor = "a";
static const NSInteger kAnchorLength = 2;
- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLStartElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes {
if (!RSSAXEqualTags(localName, kAnchor, kAnchorLength)) {
return;
}
RSHTMLLink *link = [RSHTMLLink new];
[self.links addObject:link];
NSDictionary *d = [SAXParser attributesDictionary:attributes];
if (!RSParserObjectIsEmpty(d)) {
[self handleLinkAttributes:d];
}
[SAXParser beginStoringCharacters];
}
- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName {
if (!RSSAXEqualTags(localName, kAnchor, kAnchorLength)) {
return;
}
self.currentLink.text = SAXParser.currentStringWithTrimmedWhitespace;
}
@end
@implementation RSHTMLLink
@end

View File

@@ -0,0 +1,97 @@
//
// RSHTMLMetadata.h
// RSParser
//
// Created by Brent Simmons on 3/6/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
@import CoreGraphics;
@class RSHTMLMetadataFeedLink;
@class RSHTMLMetadataAppleTouchIcon;
@class RSHTMLMetadataFavicon;
@class RSHTMLOpenGraphProperties;
@class RSHTMLOpenGraphImage;
@class RSHTMLTag;
@class RSHTMLTwitterProperties;
NS_ASSUME_NONNULL_BEGIN
@interface RSHTMLMetadata : NSObject
- (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray <RSHTMLTag *> *)tags;
@property (nonatomic, readonly) NSString *baseURLString;
@property (nonatomic, readonly) NSArray <RSHTMLTag *> *tags;
@property (nonatomic, readonly) NSArray <NSString *> *faviconLinks DEPRECATED_MSG_ATTRIBUTE("Use the favicons property instead.");
@property (nonatomic, readonly) NSArray <RSHTMLMetadataFavicon *> *favicons;
@property (nonatomic, readonly) NSArray <RSHTMLMetadataAppleTouchIcon *> *appleTouchIcons;
@property (nonatomic, readonly) NSArray <RSHTMLMetadataFeedLink *> *feedLinks;
@property (nonatomic, readonly) RSHTMLOpenGraphProperties *openGraphProperties;
@property (nonatomic, readonly) RSHTMLTwitterProperties *twitterProperties;
@end
@interface RSHTMLMetadataAppleTouchIcon : NSObject
@property (nonatomic, readonly) NSString *rel;
@property (nonatomic, nullable, readonly) NSString *sizes;
@property (nonatomic, readonly) CGSize size;
@property (nonatomic, nullable, readonly) NSString *urlString; // Absolute.
@end
@interface RSHTMLMetadataFeedLink : NSObject
@property (nonatomic, nullable, readonly) NSString *title;
@property (nonatomic, nullable, readonly) NSString *type;
@property (nonatomic, nullable, readonly) NSString *urlString; // Absolute.
@end
@interface RSHTMLMetadataFavicon : NSObject
@property (nonatomic, nullable, readonly) NSString *type;
@property (nonatomic, nullable, readonly) NSString *urlString;
@end
@interface RSHTMLOpenGraphProperties : NSObject
// TODO: the rest. At this writing (Nov. 26, 2017) I just care about og:image.
// See http://ogp.me/
- (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray <RSHTMLTag *> *)tags;
@property (nonatomic, readonly) NSArray <RSHTMLOpenGraphImage *> *images;
@end
@interface RSHTMLOpenGraphImage : NSObject
@property (nonatomic, nullable, readonly) NSString *url;
@property (nonatomic, nullable, readonly) NSString *secureURL;
@property (nonatomic, nullable, readonly) NSString *mimeType;
@property (nonatomic, readonly) CGFloat width;
@property (nonatomic, readonly) CGFloat height;
@property (nonatomic, nullable, readonly) NSString *altText;
@end
@interface RSHTMLTwitterProperties : NSObject
// TODO: the rest. At this writing (Nov. 26, 2017) I just care about twitter:image:src.
- (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray <RSHTMLTag *> *)tags;
@property (nonatomic, nullable, readonly) NSString *imageURL; // twitter:image:src
@end
NS_ASSUME_NONNULL_END

View File

@@ -0,0 +1,483 @@
//
// RSHTMLMetadata.m
// RSParser
//
// Created by Brent Simmons on 3/6/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
#import "RSHTMLMetadata.h"
#import "RSParserInternal.h"
#import "RSHTMLTag.h"
static NSString *urlStringFromDictionary(NSDictionary *d);
static NSString *absoluteURLStringWithRelativeURLString(NSString *relativeURLString, NSString *baseURLString);
static NSString *absoluteURLStringWithDictionary(NSDictionary *d, NSString *baseURLString);
static NSArray *objectsOfClassWithTags(Class class, NSArray *tags, NSString *baseURLString);
static NSString *relValue(NSDictionary *d);
static BOOL typeIsFeedType(NSString *type);
static NSString *kIconRelValue = @"icon";
static NSString *kHrefKey = @"href";
static NSString *kSrcKey = @"src";
static NSString *kAppleTouchIconValue = @"apple-touch-icon";
static NSString *kAppleTouchIconPrecomposedValue = @"apple-touch-icon-precomposed";
static NSString *kSizesKey = @"sizes";
static NSString *kTitleKey = @"title";
static NSString *kRelKey = @"rel";
static NSString *kAlternateKey = @"alternate";
static NSString *kRSSSuffix = @"/rss+xml";
static NSString *kAtomSuffix = @"/atom+xml";
static NSString *kJSONSuffix = @"/json";
static NSString *kTypeKey = @"type";
@interface RSHTMLMetadataAppleTouchIcon ()
- (instancetype)initWithTag:(RSHTMLTag *)tag baseURLString:(NSString *)baseURLString;
@end
@interface RSHTMLMetadataFeedLink ()
- (instancetype)initWithTag:(RSHTMLTag *)tag baseURLString:(NSString *)baseURLString;
@end
@interface RSHTMLMetadataFavicon ()
- (instancetype)initWithTag:(RSHTMLTag *)tag baseURLString:(NSString *)baseURLString;
@end
@implementation RSHTMLMetadata
#pragma mark - Init
- (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray <RSHTMLTag *> *)tags {
self = [super init];
if (!self) {
return nil;
}
_baseURLString = urlString;
_tags = tags;
_favicons = [self resolvedFaviconLinks];
NSArray *appleTouchIconTags = [self appleTouchIconTags];
_appleTouchIcons = objectsOfClassWithTags([RSHTMLMetadataAppleTouchIcon class], appleTouchIconTags, urlString);
NSArray *feedLinkTags = [self feedLinkTags];
_feedLinks = objectsOfClassWithTags([RSHTMLMetadataFeedLink class], feedLinkTags, urlString);
_openGraphProperties = [[RSHTMLOpenGraphProperties alloc] initWithURLString:urlString tags:tags];
_twitterProperties = [[RSHTMLTwitterProperties alloc] initWithURLString:urlString tags:tags];
return self;
}
#pragma mark - Private
- (NSArray<RSHTMLTag *> *)linkTagsWithMatchingRel:(NSString *)valueToMatch {
// Case-insensitive; matches a whitespace-delimited word
NSMutableArray<RSHTMLTag *> *tags = [NSMutableArray array];
for (RSHTMLTag *tag in self.tags) {
if (tag.type != RSHTMLTagTypeLink || RSParserStringIsEmpty(urlStringFromDictionary(tag.attributes))) {
continue;
}
NSString *oneRelValue = relValue(tag.attributes);
if (oneRelValue) {
NSArray *relValues = [oneRelValue componentsSeparatedByCharactersInSet:NSCharacterSet.whitespaceAndNewlineCharacterSet];
for (NSString *relValue in relValues) {
if ([relValue compare:valueToMatch options:NSCaseInsensitiveSearch] == NSOrderedSame) {
[tags addObject:tag];
break;
}
}
}
}
return tags;
}
- (NSArray<RSHTMLTag *> *)appleTouchIconTags {
NSMutableArray *tags = [NSMutableArray new];
for (RSHTMLTag *tag in self.tags) {
if (tag.type != RSHTMLTagTypeLink) {
continue;
}
NSString *oneRelValue = relValue(tag.attributes).lowercaseString;
if ([oneRelValue isEqualToString:kAppleTouchIconValue] || [oneRelValue isEqualToString:kAppleTouchIconPrecomposedValue]) {
[tags addObject:tag];
}
}
return tags;
}
- (NSArray<RSHTMLTag *> *)feedLinkTags {
NSMutableArray *tags = [NSMutableArray new];
for (RSHTMLTag *tag in self.tags) {
if (tag.type != RSHTMLTagTypeLink) {
continue;
}
NSDictionary *oneDictionary = tag.attributes;
NSString *oneRelValue = relValue(oneDictionary).lowercaseString;
if (![oneRelValue isEqualToString:kAlternateKey]) {
continue;
}
NSString *oneType = [oneDictionary rsparser_objectForCaseInsensitiveKey:kTypeKey];
if (!typeIsFeedType(oneType)) {
continue;
}
if (RSParserStringIsEmpty(urlStringFromDictionary(oneDictionary))) {
continue;
}
[tags addObject:tag];
}
return tags;
}
- (NSArray<NSString *> *)faviconLinks {
NSMutableArray *urls = [NSMutableArray array];
for (RSHTMLMetadataFavicon *favicon in self.favicons) {
[urls addObject:favicon.urlString];
}
return urls;
}
- (NSArray<RSHTMLMetadataFavicon *> *)resolvedFaviconLinks {
NSArray<RSHTMLTag *> *tags = [self linkTagsWithMatchingRel:kIconRelValue];
NSMutableArray *links = [NSMutableArray array];
NSMutableSet<NSString *> *seenHrefs = [NSMutableSet setWithCapacity:tags.count];
for (RSHTMLTag *tag in tags) {
RSHTMLMetadataFavicon *link = [[RSHTMLMetadataFavicon alloc] initWithTag:tag baseURLString:self.baseURLString];
NSString *urlString = link.urlString;
if (urlString == nil) {
continue;
}
if (![seenHrefs containsObject:urlString]) {
[links addObject:link];
[seenHrefs addObject:urlString];
}
}
return links;
}
@end
static NSString *relValue(NSDictionary *d) {
return [d rsparser_objectForCaseInsensitiveKey:kRelKey];
}
static NSString *urlStringFromDictionary(NSDictionary *d) {
NSString *urlString = [d rsparser_objectForCaseInsensitiveKey:kHrefKey];
if (urlString) {
return urlString;
}
return [d rsparser_objectForCaseInsensitiveKey:kSrcKey];
}
static NSString *absoluteURLStringWithRelativeURLString(NSString *relativeURLString, NSString *baseURLString) {
NSURL *url = [NSURL URLWithString:baseURLString];
if (!url) {
return nil;
}
NSURL *absoluteURL = [NSURL URLWithString:relativeURLString relativeToURL:url];
return absoluteURL.absoluteURL.standardizedURL.absoluteString;
}
static NSString *absoluteURLStringWithDictionary(NSDictionary *d, NSString *baseURLString) {
NSString *urlString = urlStringFromDictionary(d);
if (RSParserStringIsEmpty(urlString)) {
return nil;
}
return absoluteURLStringWithRelativeURLString(urlString, baseURLString);
}
static NSArray *objectsOfClassWithTags(Class class, NSArray *tags, NSString *baseURLString) {
NSMutableArray *objects = [NSMutableArray new];
for (RSHTMLTag *tag in tags) {
id oneObject = [[class alloc] initWithTag:tag baseURLString:baseURLString];
if (oneObject) {
[objects addObject:oneObject];
}
}
return objects;
}
static BOOL typeIsFeedType(NSString *type) {
type = type.lowercaseString;
return [type hasSuffix:kRSSSuffix] || [type hasSuffix:kAtomSuffix] || [type hasSuffix:kJSONSuffix];
}
@implementation RSHTMLMetadataAppleTouchIcon
- (instancetype)initWithTag:(RSHTMLTag *)tag baseURLString:(NSString *)baseURLString {
self = [super init];
if (!self) {
return nil;
}
NSDictionary *d = tag.attributes;
_urlString = absoluteURLStringWithDictionary(d, baseURLString);
_sizes = [d rsparser_objectForCaseInsensitiveKey:kSizesKey];
_rel = [d rsparser_objectForCaseInsensitiveKey:kRelKey];
_size = CGSizeZero;
if (_sizes) {
NSArray *components = [_sizes componentsSeparatedByString:@"x"];
if (components.count == 2) {
CGFloat width = [components[0] floatValue];
CGFloat height = [components[1] floatValue];
_size = CGSizeMake(width, height);
}
}
return self;
}
@end
@implementation RSHTMLMetadataFeedLink
- (instancetype)initWithTag:(RSHTMLTag *)tag baseURLString:(NSString *)baseURLString {
self = [super init];
if (!self) {
return nil;
}
NSDictionary *d = tag.attributes;
_urlString = absoluteURLStringWithDictionary(d, baseURLString);
_title = [d rsparser_objectForCaseInsensitiveKey:kTitleKey];
_type = [d rsparser_objectForCaseInsensitiveKey:kTypeKey];
return self;
}
@end
@implementation RSHTMLMetadataFavicon
- (instancetype)initWithTag:(RSHTMLTag *)tag baseURLString:(NSString *)baseURLString {
self = [super init];
if (!self) {
return nil;
}
NSDictionary *d = tag.attributes;
_urlString = absoluteURLStringWithDictionary(d, baseURLString);
_type = [d rsparser_objectForCaseInsensitiveKey:kTypeKey];
return self;
}
@end
@interface RSHTMLOpenGraphImage ()
@property (nonatomic, readwrite) NSString *url;
@property (nonatomic, readwrite) NSString *secureURL;
@property (nonatomic, readwrite) NSString *mimeType;
@property (nonatomic, readwrite) CGFloat width;
@property (nonatomic, readwrite) CGFloat height;
@property (nonatomic, readwrite) NSString *altText;
@end
@implementation RSHTMLOpenGraphImage
@end
@interface RSHTMLOpenGraphProperties ()
@property (nonatomic) NSMutableArray *ogImages;
@end
@implementation RSHTMLOpenGraphProperties
- (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray <RSHTMLTag *> *)tags {
self = [super init];
if (!self) {
return nil;
}
_ogImages = [NSMutableArray new];
[self parseTags:tags];
return self;
}
- (RSHTMLOpenGraphImage *)currentImage {
return self.ogImages.lastObject;
}
- (RSHTMLOpenGraphImage *)pushImage {
RSHTMLOpenGraphImage *image = [RSHTMLOpenGraphImage new];
[self.ogImages addObject:image];
return image;
}
- (RSHTMLOpenGraphImage *)ensureImage {
RSHTMLOpenGraphImage *image = [self currentImage];
if (image != nil) {
return image;
}
return [self pushImage];
}
- (NSArray *)images {
return self.ogImages;
}
static NSString *ogPrefix = @"og:";
static NSString *ogImage = @"og:image";
static NSString *ogImageURL = @"og:image:url";
static NSString *ogImageSecureURL = @"og:image:secure_url";
static NSString *ogImageType = @"og:image:type";
static NSString *ogImageWidth = @"og:image:width";
static NSString *ogImageHeight = @"og:image:height";
static NSString *ogImageAlt = @"og:image:alt";
static NSString *ogPropertyKey = @"property";
static NSString *ogContentKey = @"content";
- (void)parseTags:(NSArray *)tags {
for (RSHTMLTag *tag in tags) {
if (tag.type != RSHTMLTagTypeMeta) {
continue;
}
NSString *propertyName = tag.attributes[ogPropertyKey];
if (!propertyName || ![propertyName hasPrefix:ogPrefix]) {
continue;
}
NSString *content = tag.attributes[ogContentKey];
if (!content) {
continue;
}
if ([propertyName isEqualToString:ogImage]) {
RSHTMLOpenGraphImage *image = [self currentImage];
if (!image || image.url) { // Most likely case, since og:image will probably appear before other image attributes.
image = [self pushImage];
}
image.url = content;
}
else if ([propertyName isEqualToString:ogImageURL]) {
[self ensureImage].url = content;
}
else if ([propertyName isEqualToString:ogImageSecureURL]) {
[self ensureImage].secureURL = content;
}
else if ([propertyName isEqualToString:ogImageType]) {
[self ensureImage].mimeType = content;
}
else if ([propertyName isEqualToString:ogImageAlt]) {
[self ensureImage].altText = content;
}
else if ([propertyName isEqualToString:ogImageWidth]) {
[self ensureImage].width = [content floatValue];
}
else if ([propertyName isEqualToString:ogImageHeight]) {
[self ensureImage].height = [content floatValue];
}
}
}
@end
@implementation RSHTMLTwitterProperties
static NSString *twitterNameKey = @"name";
static NSString *twitterContentKey = @"content";
static NSString *twitterImageSrc = @"twitter:image:src";
- (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray <RSHTMLTag *> *)tags {
self = [super init];
if (!self) {
return nil;
}
for (RSHTMLTag *tag in tags) {
if (tag.type != RSHTMLTagTypeMeta) {
continue;
}
NSString *name = tag.attributes[twitterNameKey];
if (!name || ![name isEqualToString:twitterImageSrc]) {
continue;
}
NSString *content = tag.attributes[twitterContentKey];
if (!content || content.length < 1) {
continue;
}
_imageURL = content;
break;
}
return self;
}
@end

View File

@@ -0,0 +1,24 @@
//
// RSHTMLMetadataParser.h
// RSParser
//
// Created by Brent Simmons on 3/6/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
@class RSHTMLMetadata;
@class ParserData;
NS_ASSUME_NONNULL_BEGIN
@interface RSHTMLMetadataParser : NSObject
+ (RSHTMLMetadata *)HTMLMetadataWithParserData:(ParserData *)parserData;
@end
NS_ASSUME_NONNULL_END

View File

@@ -0,0 +1,151 @@
//
// RSHTMLMetadataParser.m
// RSParser
//
// Created by Brent Simmons on 3/6/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
#import "RSHTMLMetadataParser.h"
#import "RSHTMLMetadata.h"
#import "RSSAXHTMLParser.h"
#import "RSSAXHTMLParser.h"
#import "RSSAXParser.h"
#import "RSParserInternal.h"
#import "ParserData.h"
#import "RSHTMLTag.h"
#import <libxml/xmlstring.h>
@interface RSHTMLMetadataParser () <RSSAXHTMLParserDelegate>
@property (nonatomic, readonly) ParserData *parserData;
@property (nonatomic, readwrite) RSHTMLMetadata *metadata;
@property (nonatomic) NSMutableArray *tags;
@property (nonatomic) BOOL didFinishParsing;
@property (nonatomic) BOOL shouldScanPastHeadSection;
@end
@implementation RSHTMLMetadataParser
#pragma mark - Class Methods
+ (RSHTMLMetadata *)HTMLMetadataWithParserData:(ParserData *)parserData {
RSHTMLMetadataParser *parser = [[self alloc] initWithParserData:parserData];
return parser.metadata;
}
#pragma mark - Init
- (instancetype)initWithParserData:(ParserData *)parserData {
NSParameterAssert(parserData.data);
NSParameterAssert(parserData.url);
self = [super init];
if (!self) {
return nil;
}
_parserData = parserData;
_tags = [NSMutableArray new];
// YouTube has a weird bug where, on some pages, it puts the feed link tag after the head section, in the body section.
// This allows for a special case where we continue to scan after the head section.
// (Yes, this match could yield false positives, but its harmless.)
_shouldScanPastHeadSection = [parserData.url rangeOfString:@"youtube" options:NSCaseInsensitiveSearch].location != NSNotFound;
[self parse];
return self;
}
#pragma mark - Parse
- (void)parse {
RSSAXHTMLParser *parser = [[RSSAXHTMLParser alloc] initWithDelegate:self];
[parser parseData:self.parserData.data];
[parser finishParsing];
self.metadata = [[RSHTMLMetadata alloc] initWithURLString:self.parserData.url tags:self.tags];
}
static NSString *kHrefKey = @"href";
static NSString *kSrcKey = @"src";
static NSString *kRelKey = @"rel";
- (NSString *)linkForDictionary:(NSDictionary *)d {
NSString *link = [d rsparser_objectForCaseInsensitiveKey:kHrefKey];
if (link) {
return link;
}
return [d rsparser_objectForCaseInsensitiveKey:kSrcKey];
}
- (void)handleLinkAttributes:(NSDictionary *)d {
if (RSParserStringIsEmpty([d rsparser_objectForCaseInsensitiveKey:kRelKey])) {
return;
}
if (RSParserStringIsEmpty([self linkForDictionary:d])) {
return;
}
RSHTMLTag *tag = [RSHTMLTag linkTagWithAttributes:d];
[self.tags addObject:tag];
}
- (void)handleMetaAttributes:(NSDictionary *)d {
RSHTMLTag *tag = [RSHTMLTag metaTagWithAttributes:d];
[self.tags addObject:tag];
}
#pragma mark - RSSAXHTMLParserDelegate
static const char *kBody = "body";
static const NSInteger kBodyLength = 5;
static const char *kLink = "link";
static const NSInteger kLinkLength = 5;
static const char *kMeta = "meta";
static const NSInteger kMetaLength = 5;
- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLStartElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes {
if (self.didFinishParsing) {
return;
}
if (RSSAXEqualTags(localName, kBody, kBodyLength) && !self.shouldScanPastHeadSection) {
self.didFinishParsing = YES;
return;
}
if (RSSAXEqualTags(localName, kLink, kLinkLength)) {
NSDictionary *d = [SAXParser attributesDictionary:attributes];
if (!RSParserObjectIsEmpty(d)) {
[self handleLinkAttributes:d];
}
return;
}
if (RSSAXEqualTags(localName, kMeta, kMetaLength)) {
NSDictionary *d = [SAXParser attributesDictionary:attributes];
if (!RSParserObjectIsEmpty(d)) {
[self handleMetaAttributes:d];
}
}
}
@end

View File

@@ -0,0 +1,33 @@
//
// RSHTMLTag.h
// RSParser
//
// Created by Brent Simmons on 11/26/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
NS_ASSUME_NONNULL_BEGIN
extern NSString *RSHTMLTagNameLink; // @"link"
extern NSString *RSHTMLTagNameMeta; // @"meta"
typedef NS_ENUM(NSInteger, RSHTMLTagType) {
RSHTMLTagTypeLink,
RSHTMLTagTypeMeta
};
@interface RSHTMLTag : NSObject
- (instancetype)initWithType:(RSHTMLTagType)type attributes:(NSDictionary *)attributes;
+ (RSHTMLTag *)linkTagWithAttributes:(NSDictionary *)attributes;
+ (RSHTMLTag *)metaTagWithAttributes:(NSDictionary *)attributes;
@property (nonatomic, readonly) RSHTMLTagType type;
@property (nonatomic, readonly) NSDictionary *attributes;
@end
NS_ASSUME_NONNULL_END

View File

@@ -0,0 +1,43 @@
//
// RSHTMLTag.m
// RSParser
//
// Created by Brent Simmons on 11/26/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
#import "RSHTMLTag.h"
NSString *RSHTMLTagNameLink = @"link";
NSString *RSHTMLTagNameMeta = @"meta";
@implementation RSHTMLTag
- (instancetype)initWithType:(RSHTMLTagType)type attributes:(NSDictionary *)attributes {
self = [super init];
if (!self) {
return nil;
}
_type = type;
_attributes = attributes;
return self;
}
+ (RSHTMLTag *)linkTagWithAttributes:(NSDictionary *)attributes {
return [[self alloc] initWithType:RSHTMLTagTypeLink attributes:attributes];
}
+ (RSHTMLTag *)metaTagWithAttributes:(NSDictionary *)attributes {
return [[self alloc] initWithType:RSHTMLTagTypeMeta attributes:attributes];
}
- (NSString *)description {
return [NSString stringWithFormat:@"<%@: %p> type: %ld attributes: %@", NSStringFromClass([self class]), self, (long)self.type, self.attributes];
}
@end

View File

@@ -0,0 +1,36 @@
//
// RSOPMLAttributes.h
// RSParser
//
// Created by Brent Simmons on 2/28/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
// OPML allows for arbitrary attributes.
// These are the common attributes in OPML files used as RSS subscription lists.
extern NSString *OPMLTextKey; //text
extern NSString *OPMLTitleKey; //title
extern NSString *OPMLDescriptionKey; //description
extern NSString *OPMLTypeKey; //type
extern NSString *OPMLVersionKey; //version
extern NSString *OPMLHMTLURLKey; //htmlUrl
extern NSString *OPMLXMLURLKey; //xmlUrl
@interface NSDictionary (RSOPMLAttributes)
// A frequent error in OPML files is to mess up the capitalization,
// so these do a case-insensitive lookup.
@property (nonatomic, readonly) NSString *opml_text;
@property (nonatomic, readonly) NSString *opml_title;
@property (nonatomic, readonly) NSString *opml_description;
@property (nonatomic, readonly) NSString *opml_type;
@property (nonatomic, readonly) NSString *opml_version;
@property (nonatomic, readonly) NSString *opml_htmlUrl;
@property (nonatomic, readonly) NSString *opml_xmlUrl;
@end

View File

@@ -0,0 +1,68 @@
//
// RSOPMLAttributes.m
// RSParser
//
// Created by Brent Simmons on 2/28/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
#import "RSOPMLAttributes.h"
#import "RSParserInternal.h"
NSString *OPMLTextKey = @"text";
NSString *OPMLTitleKey = @"title";
NSString *OPMLDescriptionKey = @"description";
NSString *OPMLTypeKey = @"type";
NSString *OPMLVersionKey = @"version";
NSString *OPMLHMTLURLKey = @"htmlUrl";
NSString *OPMLXMLURLKey = @"xmlUrl";
@implementation NSDictionary (RSOPMLAttributes)
- (NSString *)opml_text {
return [self rsparser_objectForCaseInsensitiveKey:OPMLTextKey];
}
- (NSString *)opml_title {
return [self rsparser_objectForCaseInsensitiveKey:OPMLTitleKey];
}
- (NSString *)opml_description {
return [self rsparser_objectForCaseInsensitiveKey:OPMLDescriptionKey];
}
- (NSString *)opml_type {
return [self rsparser_objectForCaseInsensitiveKey:OPMLTypeKey];
}
- (NSString *)opml_version {
return [self rsparser_objectForCaseInsensitiveKey:OPMLVersionKey];
}
- (NSString *)opml_htmlUrl {
return [self rsparser_objectForCaseInsensitiveKey:OPMLHMTLURLKey];
}
- (NSString *)opml_xmlUrl {
return [self rsparser_objectForCaseInsensitiveKey:OPMLXMLURLKey];
}
@end

View File

@@ -0,0 +1,21 @@
//
// RSOPMLDocument.h
// RSParser
//
// Created by Brent Simmons on 2/28/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
#import "RSOPMLItem.h"
@interface RSOPMLDocument : RSOPMLItem
@property (nonatomic) NSString *title;
@property (nonatomic) NSString *url;
@end

View File

@@ -0,0 +1,14 @@
//
// RSOPMLDocument.m
// RSParser
//
// Created by Brent Simmons on 2/28/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
#import "RSOPMLDocument.h"
@implementation RSOPMLDocument
@end

View File

@@ -0,0 +1,19 @@
//
// RSOPMLError.h
// RSParser
//
// Created by Brent Simmons on 2/28/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
extern NSString *RSOPMLErrorDomain;
typedef NS_ENUM(NSInteger, RSOPMLErrorCode) {
RSOPMLErrorCodeDataIsWrongFormat = 1024
};
NSError *RSOPMLWrongFormatError(NSString *fileName);

View File

@@ -0,0 +1,22 @@
//
// RSOPMLError.m
// RSParser
//
// Created by Brent Simmons on 2/28/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
#import "RSOPMLError.h"
NSString *RSOPMLErrorDomain = @"com.ranchero.OPML";
NSError *RSOPMLWrongFormatError(NSString *fileName) {
NSString *localizedDescriptionFormatString = NSLocalizedString(@"The file %@ cant be parsed because its not an OPML file.", @"OPML wrong format");
NSString *localizedDescription = [NSString stringWithFormat:localizedDescriptionFormatString, fileName];
NSString *localizedFailureString = NSLocalizedString(@"The file is not an OPML file.", @"OPML wrong format");
NSDictionary *userInfo = @{NSLocalizedDescriptionKey: localizedDescription, NSLocalizedFailureReasonErrorKey: localizedFailureString};
return [[NSError alloc] initWithDomain:RSOPMLErrorDomain code:RSOPMLErrorCodeDataIsWrongFormat userInfo:userInfo];
}

View File

@@ -0,0 +1,24 @@
//
// RSOPMLFeedSpecifier.h
// RSParser
//
// Created by Brent Simmons on 2/28/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
NS_ASSUME_NONNULL_BEGIN
@interface RSOPMLFeedSpecifier : NSObject
- (instancetype)initWithTitle:(NSString * _Nullable)title feedDescription:(NSString * _Nullable)feedDescription homePageURL:(NSString * _Nullable)homePageURL feedURL:(NSString *)feedURL;
@property (nonatomic, nullable, readonly) NSString *title;
@property (nonatomic, nullable, readonly) NSString *feedDescription;
@property (nonatomic, nullable, readonly) NSString *homePageURL;
@property (nonatomic, readonly) NSString *feedURL;
@end
NS_ASSUME_NONNULL_END

View File

@@ -0,0 +1,51 @@
//
// RSOPMLFeedSpecifier.m
// RSParser
//
// Created by Brent Simmons on 2/28/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
#import "RSOPMLFeedSpecifier.h"
#import "RSParserInternal.h"
@implementation RSOPMLFeedSpecifier
- (instancetype)initWithTitle:(NSString *)title feedDescription:(NSString *)feedDescription homePageURL:(NSString *)homePageURL feedURL:(NSString *)feedURL {
NSParameterAssert(!RSParserStringIsEmpty(feedURL));
self = [super init];
if (!self) {
return nil;
}
if (RSParserStringIsEmpty(title)) {
_title = nil;
}
else {
_title = title;
}
if (RSParserStringIsEmpty(feedDescription)) {
_feedDescription = nil;
}
else {
_feedDescription = feedDescription;
}
if (RSParserStringIsEmpty(homePageURL)) {
_homePageURL = nil;
}
else {
_homePageURL = homePageURL;
}
_feedURL = feedURL;
return self;
}
@end

View File

@@ -0,0 +1,30 @@
//
// RSOPMLItem.h
// RSParser
//
// Created by Brent Simmons on 2/28/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
@class RSOPMLFeedSpecifier;
NS_ASSUME_NONNULL_BEGIN
@interface RSOPMLItem : NSObject
@property (nonatomic, nullable) NSDictionary *attributes;
@property (nonatomic, nullable) NSArray <RSOPMLItem *> *children;
- (void)addChild:(RSOPMLItem *)child;
@property (nonatomic, nullable, readonly) RSOPMLFeedSpecifier *feedSpecifier;
@property (nonatomic, nullable, readonly) NSString *titleFromAttributes;
@property (nonatomic, readonly) BOOL isFolder;
@end
NS_ASSUME_NONNULL_END

View File

@@ -0,0 +1,87 @@
//
// RSOPMLItem.m
// RSParser
//
// Created by Brent Simmons on 2/28/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
#import "RSOPMLItem.h"
#import "RSOPMLAttributes.h"
#import "RSOPMLFeedSpecifier.h"
#import "RSParserInternal.h"
@interface RSOPMLItem ()
@property (nonatomic) NSMutableArray *mutableChildren;
@end
@implementation RSOPMLItem
@synthesize children = _children;
@synthesize feedSpecifier = _feedSpecifier;
- (NSArray *)children {
return [self.mutableChildren copy];
}
- (void)setChildren:(NSArray *)children {
_children = children;
self.mutableChildren = [_children mutableCopy];
}
- (void)addChild:(RSOPMLItem *)child {
if (!self.mutableChildren) {
self.mutableChildren = [NSMutableArray new];
}
[self.mutableChildren addObject:child];
}
- (RSOPMLFeedSpecifier *)feedSpecifier {
if (_feedSpecifier) {
return _feedSpecifier;
}
NSString *feedURL = self.attributes.opml_xmlUrl;
if (RSParserObjectIsEmpty(feedURL)) {
return nil;
}
_feedSpecifier = [[RSOPMLFeedSpecifier alloc] initWithTitle:self.titleFromAttributes feedDescription:self.attributes.opml_description homePageURL:self.attributes.opml_htmlUrl feedURL:feedURL];
return _feedSpecifier;
}
- (NSString *)titleFromAttributes {
NSString *title = self.attributes.opml_title;
if (title) {
return title;
}
title = self.attributes.opml_text;
if (title) {
return title;
}
return nil;
}
- (BOOL)isFolder {
return self.mutableChildren.count > 0;
}
@end

View File

@@ -0,0 +1,26 @@
//
// RSOPMLParser.h
// RSParser
//
// Created by Brent Simmons on 7/12/15.
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
@class ParserData;
@class RSOPMLDocument;
typedef void (^OPMLParserCallback)(RSOPMLDocument *opmlDocument, NSError *error);
// Parses on background thread; calls back on main thread.
void RSParseOPML(ParserData *parserData, OPMLParserCallback callback);
@interface RSOPMLParser: NSObject
+ (RSOPMLDocument *)parseOPMLWithParserData:(ParserData *)parserData error:(NSError **)error;
@end

View File

@@ -0,0 +1,310 @@
//
// RSOPMLParser.m
// RSParser
//
// Created by Brent Simmons on 7/12/15.
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
//
#import "RSOPMLParser.h"
#import "RSSAXParser.h"
#import "RSOPMLItem.h"
#import "RSOPMLDocument.h"
#import "RSOPMLAttributes.h"
#import "RSOPMLError.h"
#import "RSOPMLParser.h"
#import "ParserData.h"
#import <libxml/xmlstring.h>
@interface RSOPMLParser () <RSSAXParserDelegate>
@property (nonatomic, readwrite) RSOPMLDocument *OPMLDocument;
@property (nonatomic, readwrite) NSError *error;
@property (nonatomic) NSMutableArray *itemStack;
@end
void RSParseOPML(ParserData *parserData, OPMLParserCallback callback) {
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
@autoreleasepool {
NSError *error = nil;
RSOPMLDocument *opmlDocument = [RSOPMLParser parseOPMLWithParserData:parserData error:&error];
dispatch_async(dispatch_get_main_queue(), ^{
callback(opmlDocument, error);
});
}
});
}
@implementation RSOPMLParser
#pragma mark - Class Methods
+ (RSOPMLDocument *)parseOPMLWithParserData:(ParserData *)parserData error:(NSError **)error {
RSOPMLParser *parser = [[RSOPMLParser alloc] initWithParserData:parserData];
RSOPMLDocument *document = parser.OPMLDocument;
document.url = parserData.url;
if (parser.error && error) {
*error = parser.error;
return nil;
}
return document;
}
#pragma mark - Init
- (instancetype)initWithParserData:(ParserData *)parserData {
self = [super init];
if (!self) {
return nil;
}
[self parse:parserData];
return self;
}
#pragma mark - Private
- (void)parse:(ParserData *)parserData {
@autoreleasepool {
if (![self canParseData:parserData.data]) {
NSString *filename = nil;
NSURL *url = [NSURL URLWithString:parserData.url];
if (url && url.isFileURL) {
filename = url.path.lastPathComponent;
}
if ([parserData.url hasPrefix:@"http"]) {
filename = parserData.url;
}
if (!filename) {
filename = parserData.url;
}
self.error = RSOPMLWrongFormatError(filename);
return;
}
RSSAXParser *parser = [[RSSAXParser alloc] initWithDelegate:self];
self.itemStack = [NSMutableArray new];
self.OPMLDocument = [RSOPMLDocument new];
[self pushItem:self.OPMLDocument];
[parser parseData:parserData.data];
[parser finishParsing];
}
}
- (BOOL)canParseData:(NSData *)d {
// Check for <opml and <outline near the top.
@autoreleasepool {
NSString *s = [[NSString alloc] initWithBytesNoCopy:(void *)d.bytes length:d.length encoding:NSUTF8StringEncoding freeWhenDone:NO];
if (!s) {
NSDictionary *options = @{NSStringEncodingDetectionSuggestedEncodingsKey : @[@(NSUTF8StringEncoding)]};
(void)[NSString stringEncodingForData:d encodingOptions:options convertedString:&s usedLossyConversion:nil];
}
if (!s) {
return NO;
}
static const NSInteger numberOfCharactersToSearch = 4096;
NSRange rangeToSearch = NSMakeRange(0, numberOfCharactersToSearch);
if (s.length < numberOfCharactersToSearch) {
rangeToSearch.length = s.length;
}
NSRange opmlRange = [s rangeOfString:@"<opml" options:NSCaseInsensitiveSearch range:rangeToSearch];
if (opmlRange.length < 1) {
return NO;
}
}
return YES;
}
- (void)pushItem:(RSOPMLItem *)item {
[self.itemStack addObject:item];
}
- (void)popItem {
NSAssert(self.itemStack.count > 0, nil);
/*If itemStack is empty, bad things are happening.
But we still shouldn't crash in production.*/
if (self.itemStack.count > 0) {
[self.itemStack removeLastObject];
}
}
- (RSOPMLItem *)currentItem {
return self.itemStack.lastObject;
}
#pragma mark - RSSAXParserDelegate
static const char *kOutline = "outline";
static const char kOutlineLength = 8;
- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes {
if (RSSAXEqualTags(localName, kTitle, kTitleLength)) {
[SAXParser beginStoringCharacters];
return;
}
if (!RSSAXEqualTags(localName, kOutline, kOutlineLength)) {
return;
}
RSOPMLItem *item = [RSOPMLItem new];
item.attributes = [SAXParser attributesDictionary:attributes numberOfAttributes:numberOfAttributes];
[[self currentItem] addChild:item];
[self pushItem:item];
}
- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri {
if (RSSAXEqualTags(localName, kTitle, kTitleLength)) {
RSOPMLItem* item = [self currentItem];
if ([item isKindOfClass:[RSOPMLDocument class]]) {
((RSOPMLDocument *)item).title = SAXParser.currentStringWithTrimmedWhitespace;
}
return;
}
if (RSSAXEqualTags(localName, kOutline, kOutlineLength)) {
[self popItem];
}
}
static const char *kText = "text";
static const NSInteger kTextLength = 5;
static const char *kTitle = "title";
static const NSInteger kTitleLength = 6;
static const char *kDescription = "description";
static const NSInteger kDescriptionLength = 12;
static const char *kType = "type";
static const NSInteger kTypeLength = 5;
static const char *kVersion = "version";
static const NSInteger kVersionLength = 8;
static const char *kHTMLURL = "htmlUrl";
static const NSInteger kHTMLURLLength = 8;
static const char *kXMLURL = "xmlUrl";
static const NSInteger kXMLURLLength = 7;
- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const xmlChar *)name prefix:(const xmlChar *)prefix {
if (prefix) {
return nil;
}
size_t nameLength = strlen((const char *)name);
if (nameLength == kTextLength - 1) {
if (RSSAXEqualTags(name, kText, kTextLength)) {
return OPMLTextKey;
}
if (RSSAXEqualTags(name, kType, kTypeLength)) {
return OPMLTypeKey;
}
}
else if (nameLength == kTitleLength - 1) {
if (RSSAXEqualTags(name, kTitle, kTitleLength)) {
return OPMLTitleKey;
}
}
else if (nameLength == kXMLURLLength - 1) {
if (RSSAXEqualTags(name, kXMLURL, kXMLURLLength)) {
return OPMLXMLURLKey;
}
}
else if (nameLength == kVersionLength - 1) {
if (RSSAXEqualTags(name, kVersion, kVersionLength)) {
return OPMLVersionKey;
}
if (RSSAXEqualTags(name, kHTMLURL, kHTMLURLLength)) {
return OPMLHMTLURLKey;
}
}
else if (nameLength == kDescriptionLength - 1) {
if (RSSAXEqualTags(name, kDescription, kDescriptionLength)) {
return OPMLDescriptionKey;
}
}
return nil;
}
static const char *kRSSUppercase = "RSS";
static const char *kRSSLowercase = "rss";
static const NSUInteger kRSSLength = 3;
static NSString *RSSUppercaseValue = @"RSS";
static NSString *RSSLowercaseValue = @"rss";
static NSString *emptyString = @"";
static BOOL equalBytes(const void *bytes1, const void *bytes2, NSUInteger length) {
return memcmp(bytes1, bytes2, length) == 0;
}
- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForValue:(const void *)bytes length:(NSUInteger)length {
if (length < 1) {
return emptyString;
}
if (length == kRSSLength) {
if (equalBytes(bytes, kRSSUppercase, kRSSLength)) {
return RSSUppercaseValue;
}
else if (equalBytes(bytes, kRSSLowercase, kRSSLength)) {
return RSSLowercaseValue;
}
}
return nil;
}
@end

View File

@@ -0,0 +1,37 @@
//
// RSParsedArticle.h
// RSParser
//
// Created by Brent Simmons on 12/6/14.
// Copyright (c) 2014 Ranchero Software LLC. All rights reserved.
//
@import Foundation;
@class RSParsedEnclosure;
@class RSParsedAuthor;
@interface RSParsedArticle : NSObject
- (nonnull instancetype)initWithFeedURL:(NSString * _Nonnull)feedURL;
@property (nonatomic, readonly, nonnull) NSString *feedURL;
@property (nonatomic, nonnull) NSString *articleID; //guid, if present, or calculated from other attributes. Should be unique to the feed, but not necessarily unique across different feeds. (Not suitable for a database ID.)
@property (nonatomic, nullable) NSString *guid;
@property (nonatomic, nullable) NSString *title;
@property (nonatomic, nullable) NSString *body;
@property (nonatomic, nullable) NSString *link;
@property (nonatomic, nullable) NSString *permalink;
@property (nonatomic, nullable) NSSet<RSParsedAuthor *> *authors;
@property (nonatomic, nullable) NSSet<RSParsedEnclosure *> *enclosures;
@property (nonatomic, nullable) NSDate *datePublished;
@property (nonatomic, nullable) NSDate *dateModified;
@property (nonatomic, nonnull) NSDate *dateParsed;
@property (nonatomic, nullable) NSString *language;
- (void)addEnclosure:(RSParsedEnclosure *_Nonnull)enclosure;
- (void)addAuthor:(RSParsedAuthor *_Nonnull)author;
@end

View File

@@ -0,0 +1,134 @@
//
// RSParsedArticle.m
// RSParser
//
// Created by Brent Simmons on 12/6/14.
// Copyright (c) 2014 Ranchero Software LLC. All rights reserved.
//
#import "RSParsedArticle.h"
#import "RSParserInternal.h"
#import "NSString+RSParser.h"
#import "RSParsedAuthor.h"
#import "RSParsedEnclosure.h"
@implementation RSParsedArticle
#pragma mark - Init
- (instancetype)initWithFeedURL:(NSString *)feedURL {
NSParameterAssert(feedURL != nil);
self = [super init];
if (!self) {
return nil;
}
_feedURL = feedURL;
_dateParsed = [NSDate date];
return self;
}
#pragma mark - Enclosures
- (void)addEnclosure:(RSParsedEnclosure *)enclosure {
if (self.enclosures) {
self.enclosures = [self.enclosures setByAddingObject:enclosure];
}
else {
self.enclosures = [NSSet setWithObject:enclosure];
}
}
#pragma mark - Authors
- (void)addAuthor:(RSParsedAuthor *)author {
if (self.authors) {
self.authors = [self.authors setByAddingObject:author];
}
else {
self.authors = [NSSet setWithObject:author];
}
}
#pragma mark - articleID
- (NSString *)articleID {
if (self.guid) {
return self.guid;
}
if (!_articleID) {
_articleID = [self calculatedArticleID];
}
return _articleID;
}
- (NSString *)calculatedArticleID {
/*Concatenate a combination of properties when no guid. Then hash the result.
In general, feeds should have guids. When they don't, re-runs are very likely,
because there's no other 100% reliable way to determine identity.
This is intended to create an ID unique inside a feed, but not globally unique.
Not suitable for a database ID, in other words.*/
NSMutableString *s = [NSMutableString stringWithString:@""];
NSString *datePublishedTimeStampString = nil;
if (self.datePublished) {
datePublishedTimeStampString = [NSString stringWithFormat:@"%.0f", self.datePublished.timeIntervalSince1970];
}
// Ideally we have a permalink and a pubDate. Either one would probably be a good guid, but together they should be rock-solid. (In theory. Feeds are buggy, though.)
if (!RSParserStringIsEmpty(self.permalink) && datePublishedTimeStampString) {
[s appendString:self.permalink];
[s appendString:datePublishedTimeStampString];
}
else if (!RSParserStringIsEmpty(self.link) && datePublishedTimeStampString) {
[s appendString:self.link];
[s appendString:datePublishedTimeStampString];
}
else if (!RSParserStringIsEmpty(self.title) && datePublishedTimeStampString) {
[s appendString:self.title];
[s appendString:datePublishedTimeStampString];
}
else if (datePublishedTimeStampString) {
[s appendString:datePublishedTimeStampString];
}
else if (!RSParserStringIsEmpty(self.permalink)) {
[s appendString:self.permalink];
}
else if (!RSParserStringIsEmpty(self.link)) {
[s appendString:self.link];
}
else if (!RSParserStringIsEmpty(self.title)) {
[s appendString:self.title];
}
else if (!RSParserStringIsEmpty(self.body)) {
[s appendString:self.body];
}
return [s rsparser_md5Hash];
}
@end

View File

@@ -0,0 +1,19 @@
//
// RSParsedAuthor.h
// RSParserTests
//
// Created by Brent Simmons on 12/19/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
@interface RSParsedAuthor : NSObject
@property (nonatomic, nullable) NSString *name;
@property (nonatomic, nullable) NSString *emailAddress;
@property (nonatomic, nullable) NSString *url;
+ (instancetype _Nonnull )authorWithSingleString:(NSString *_Nonnull)s; // Dont know which property it is. Guess based on contents of the string. Common with RSS.
@end

View File

@@ -0,0 +1,34 @@
//
// RSParsedAuthor.m
// RSParserTests
//
// Created by Brent Simmons on 12/19/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
#import "NSString+RSParser.h"
#import "RSParsedAuthor.h"
@implementation RSParsedAuthor
+ (instancetype)authorWithSingleString:(NSString *)s {
// The author element in RSS is supposed to be email address but often its a name, and sometimes a URL.
RSParsedAuthor *author = [[self alloc] init];
if ([s rsparser_contains:@"@"]) {
author.emailAddress = s;
}
else if ([s.lowercaseString hasPrefix:@"http"]) {
author.url = s;
}
else {
author.name = s;
}
return author;
}
@end

View File

@@ -0,0 +1,22 @@
//
// RSParsedEnclosure.h
// RSParser
//
// Created by Brent Simmons on 12/18/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
NS_ASSUME_NONNULL_BEGIN
@interface RSParsedEnclosure : NSObject
@property (nonatomic) NSString *url;
@property (nonatomic) NSInteger length;
@property (nonatomic, nullable) NSString *mimeType;
@property (nonatomic, nullable) NSString *title;
@end
NS_ASSUME_NONNULL_END

View File

@@ -0,0 +1,13 @@
//
// RSParsedEnclosure.m
// RSParser
//
// Created by Brent Simmons on 12/18/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
#import "RSParsedEnclosure.h"
@implementation RSParsedEnclosure
@end

View File

@@ -0,0 +1,23 @@
//
// RSParsedFeed.h
// RSParser
//
// Created by Brent Simmons on 7/12/15.
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
@class RSParsedArticle;
@interface RSParsedFeed : NSObject
- (nonnull instancetype)initWithURLString:(NSString * _Nonnull)urlString title:(NSString * _Nullable)title link:(NSString * _Nullable)link language:(NSString * _Nullable)language articles:(NSArray <RSParsedArticle *>* _Nonnull)articles;
@property (nonatomic, readonly, nonnull) NSString *urlString;
@property (nonatomic, readonly, nullable) NSString *title;
@property (nonatomic, readonly, nullable) NSString *link;
@property (nonatomic, readonly, nullable) NSString *language;
@property (nonatomic, readonly, nonnull) NSSet <RSParsedArticle *>*articles;
@end

View File

@@ -0,0 +1,32 @@
//
// RSParsedFeed.m
// RSParser
//
// Created by Brent Simmons on 7/12/15.
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
//
#import "RSParsedFeed.h"
@implementation RSParsedFeed
- (instancetype)initWithURLString:(NSString *)urlString title:(NSString *)title link:(NSString *)link language:(NSString *)language articles:(NSSet *)articles {
self = [super init];
if (!self) {
return nil;
}
_urlString = urlString;
_title = title;
_link = link;
_language = language;
_articles = articles;
return self;
}
@end

View File

@@ -0,0 +1,24 @@
//
// RSParserInternal.h
// RSParser
//
// Created by Brent Simmons on 12/26/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
NS_ASSUME_NONNULL_BEGIN
BOOL RSParserObjectIsEmpty(id _Nullable obj);
BOOL RSParserStringIsEmpty(NSString * _Nullable s);
@interface NSDictionary (RSParserInternal)
- (nullable id)rsparser_objectForCaseInsensitiveKey:(NSString *)key;
@end
NS_ASSUME_NONNULL_END

View File

@@ -0,0 +1,61 @@
//
// RSParserInternal.m
// RSParser
//
// Created by Brent Simmons on 12/26/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
#import "RSParserInternal.h"
#import <CommonCrypto/CommonDigest.h>
static BOOL RSParserIsNil(id obj) {
return obj == nil || obj == [NSNull null];
}
BOOL RSParserObjectIsEmpty(id obj) {
if (RSParserIsNil(obj)) {
return YES;
}
if ([obj respondsToSelector:@selector(count)]) {
return [obj count] < 1;
}
if ([obj respondsToSelector:@selector(length)]) {
return [obj length] < 1;
}
return NO; /*Shouldn't get here very often.*/
}
BOOL RSParserStringIsEmpty(NSString *s) {
return RSParserIsNil(s) || s.length < 1;
}
@implementation NSDictionary (RSParserInternal)
- (nullable id)rsparser_objectForCaseInsensitiveKey:(NSString *)key {
id obj = self[key];
if (obj) {
return obj;
}
for (NSString *oneKey in self.allKeys) {
if ([oneKey isKindOfClass:[NSString class]] && [key caseInsensitiveCompare:oneKey] == NSOrderedSame) {
return self[oneKey];
}
}
return nil;
}
@end

View File

@@ -0,0 +1,19 @@
//
// RSRSSParser.h
// RSParser
//
// Created by Brent Simmons on 1/6/15.
// Copyright (c) 2015 Ranchero Software LLC. All rights reserved.
//
@import Foundation;
@class ParserData;
@class RSParsedFeed;
@interface RSRSSParser : NSObject
+ (RSParsedFeed *)parseFeedWithData:(ParserData *)parserData;
@end

View File

@@ -0,0 +1,523 @@
//
// RSRSSParser.m
// RSParser
//
// Created by Brent Simmons on 1/6/15.
// Copyright (c) 2015 Ranchero Software LLC. All rights reserved.
//
#import "RSRSSParser.h"
#import "RSSAXParser.h"
#import "RSParsedFeed.h"
#import "RSParsedArticle.h"
#import "RSParserInternal.h"
#import "NSString+RSParser.h"
#import "RSDateParser.h"
#import "ParserData.h"
#import "RSParsedEnclosure.h"
#import "RSParsedAuthor.h"
#import <libxml/xmlstring.h>
@interface RSRSSParser () <RSSAXParserDelegate>
@property (nonatomic) NSData *feedData;
@property (nonatomic) NSString *urlString;
@property (nonatomic) NSDictionary *currentAttributes;
@property (nonatomic) RSSAXParser *parser;
@property (nonatomic) NSMutableArray *articles;
@property (nonatomic) BOOL parsingArticle;
@property (nonatomic) BOOL parsingAuthor;
@property (nonatomic, readonly) RSParsedArticle *currentArticle;
@property (nonatomic) BOOL parsingChannelImage;
@property (nonatomic, readonly) NSDate *currentDate;
@property (nonatomic) BOOL endRSSFound;
@property (nonatomic) NSString *link;
@property (nonatomic) NSString *title;
@property (nonatomic) NSDate *dateParsed;
@property (nonatomic) BOOL isRDF;
@property (nonatomic) NSString *language;
@end
@implementation RSRSSParser
#pragma mark - Class Methods
+ (RSParsedFeed *)parseFeedWithData:(ParserData *)parserData {
RSRSSParser *parser = [[[self class] alloc] initWithParserData:parserData];
return [parser parseFeed];
}
#pragma mark - Init
- (instancetype)initWithParserData:(ParserData *)parserData {
self = [super init];
if (!self) {
return nil;
}
_feedData = parserData.data;
_urlString = parserData.url;
_parser = [[RSSAXParser alloc] initWithDelegate:self];
_articles = [NSMutableArray new];
return self;
}
#pragma mark - API
- (RSParsedFeed *)parseFeed {
[self parse];
RSParsedFeed *parsedFeed = [[RSParsedFeed alloc] initWithURLString:self.urlString title:self.title link:self.link language:self.language articles:self.articles];
return parsedFeed;
}
#pragma mark - Constants
static NSString *kIsPermaLinkKey = @"isPermaLink";
static NSString *kURLKey = @"url";
static NSString *kLengthKey = @"length";
static NSString *kTypeKey = @"type";
static NSString *kFalseValue = @"false";
static NSString *kTrueValue = @"true";
static NSString *kContentEncodedKey = @"content:encoded";
static NSString *kDCDateKey = @"dc:date";
static NSString *kDCCreatorKey = @"dc:creator";
static NSString *kRDFAboutKey = @"rdf:about";
static const char *kItem = "item";
static const NSInteger kItemLength = 5;
static const char *kImage = "image";
static const NSInteger kImageLength = 6;
static const char *kLink = "link";
static const NSInteger kLinkLength = 5;
static const char *kTitle = "title";
static const NSInteger kTitleLength = 6;
static const char *kDC = "dc";
static const NSInteger kDCLength = 3;
static const char *kCreator = "creator";
static const NSInteger kCreatorLength = 8;
static const char *kDate = "date";
static const NSInteger kDateLength = 5;
static const char *kContent = "content";
static const NSInteger kContentLength = 8;
static const char *kEncoded = "encoded";
static const NSInteger kEncodedLength = 8;
static const char *kGuid = "guid";
static const NSInteger kGuidLength = 5;
static const char *kPubDate = "pubDate";
static const NSInteger kPubDateLength = 8;
static const char *kAuthor = "author";
static const NSInteger kAuthorLength = 7;
static const char *kDescription = "description";
static const NSInteger kDescriptionLength = 12;
static const char *kRSS = "rss";
static const NSInteger kRSSLength = 4;
static const char *kURL = "url";
static const NSInteger kURLLength = 4;
static const char *kLength = "length";
static const NSInteger kLengthLength = 7;
static const char *kType = "type";
static const NSInteger kTypeLength = 5;
static const char *kIsPermaLink = "isPermaLink";
static const NSInteger kIsPermaLinkLength = 12;
static const char *kRDF = "rdf";
static const NSInteger kRDFlength = 4;
static const char *kAbout = "about";
static const NSInteger kAboutLength = 6;
static const char *kFalse = "false";
static const NSInteger kFalseLength = 6;
static const char *kTrue = "true";
static const NSInteger kTrueLength = 5;
static const char *kUppercaseRDF = "RDF";
static const NSInteger kUppercaseRDFLength = 4;
static const char *kEnclosure = "enclosure";
static const NSInteger kEnclosureLength = 10;
static const char *kLanguage = "language";
static const NSInteger kLanguageLength = 9;
#pragma mark - Parsing
- (void)parse {
self.dateParsed = [NSDate date];
@autoreleasepool {
[self.parser parseData:self.feedData];
[self.parser finishParsing];
}
}
- (void)addArticle {
RSParsedArticle *article = [[RSParsedArticle alloc] initWithFeedURL:self.urlString];
article.dateParsed = self.dateParsed;
[self.articles addObject:article];
}
- (RSParsedArticle *)currentArticle {
return self.articles.lastObject;
}
- (void)addFeedElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix {
if (prefix != NULL) {
return;
}
if (RSSAXEqualTags(localName, kLink, kLinkLength)) {
if (!self.link) {
self.link = [self currentString];
}
}
else if (RSSAXEqualTags(localName, kTitle, kTitleLength)) {
self.title = [self currentString];
}
else if (RSSAXEqualTags(localName, kLanguage, kLanguageLength)) {
self.language = [self currentString];
}
}
- (void)addAuthorWithString:(NSString *)authorString {
if (RSParserStringIsEmpty(authorString)) {
return;
}
RSParsedAuthor *author = [RSParsedAuthor authorWithSingleString:[self currentString]];
[self.currentArticle addAuthor:author];
}
- (void)addDCElement:(const xmlChar *)localName {
if (RSSAXEqualTags(localName, kCreator, kCreatorLength)) {
[self addAuthorWithString:[self currentString]];
}
else if (RSSAXEqualTags(localName, kDate, kDateLength)) {
self.currentArticle.datePublished = self.currentDate;
}
}
- (void)addGuid {
NSString *guid = [self currentString];
self.currentArticle.guid = guid;
NSString *isPermaLinkValue = [self.currentAttributes rsparser_objectForCaseInsensitiveKey:@"ispermalink"];
if (!isPermaLinkValue || ![isPermaLinkValue isEqualToString:@"false"]) {
if ([self stringIsProbablyAURLOrRelativePath:guid]) {
self.currentArticle.permalink = [self urlString:guid];
}
}
}
- (void)addEnclosure {
NSDictionary *attributes = self.currentAttributes;
NSString *url = attributes[kURLKey];
if (!url || url.length < 1) {
return;
}
RSParsedEnclosure *enclosure = [[RSParsedEnclosure alloc] init];
enclosure.url = url;
enclosure.length = [attributes[kLengthKey] integerValue];
enclosure.mimeType = attributes[kTypeKey];
[self.currentArticle addEnclosure:enclosure];
}
- (BOOL)stringIsProbablyAURLOrRelativePath:(NSString *)s {
/*The RSS guid is defined as a permalink, except when it appears like this:
<guid isPermaLink="false">someidentifier</guid>
However, people often seem to think its *not* a permalink by default, even
though it is. So we try to detect the situation where the value is not a URL string,
and not even a relative path. This may need to evolve over time as we find
feeds broken in different ways.*/
if (![s rsparser_contains:@"/"]) {
// This seems to be just about the best possible check.
// Bad guids are often just integers, for instance.
return NO;
}
if ([s.lowercaseString hasPrefix:@"tag:"]) { // A common non-URL guid form
return NO;
}
return YES;
}
- (NSString *)urlString:(NSString *)s {
/*Resolve against home page URL (if available) or feed URL.*/
if ([[s lowercaseString] hasPrefix:@"http"]) {
return s;
}
if (!self.link) {
//TODO: get feed URL and use that to resolve URL.*/
return s;
}
NSURL *baseURL = [NSURL URLWithString:self.link];
if (!baseURL) {
return s;
}
NSURL *resolvedURL = [NSURL URLWithString:s relativeToURL:baseURL];
if (resolvedURL.absoluteString) {
return resolvedURL.absoluteString;
}
return s;
}
- (NSString *)currentString {
return self.parser.currentStringWithTrimmedWhitespace;
}
- (void)addArticleElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix {
if (RSSAXEqualTags(prefix, kDC, kDCLength)) {
[self addDCElement:localName];
return;
}
if (RSSAXEqualTags(prefix, kContent, kContentLength) && RSSAXEqualTags(localName, kEncoded, kEncodedLength)) {
NSString *s = [self currentString];
if (!RSParserStringIsEmpty(s)) {
self.currentArticle.body = s;
}
return;
}
if (prefix != NULL) {
return;
}
if (RSSAXEqualTags(localName, kGuid, kGuidLength)) {
[self addGuid];
}
else if (RSSAXEqualTags(localName, kPubDate, kPubDateLength)) {
self.currentArticle.datePublished = self.currentDate;
}
else if (RSSAXEqualTags(localName, kAuthor, kAuthorLength)) {
[self addAuthorWithString:[self currentString]];
}
else if (RSSAXEqualTags(localName, kLink, kLinkLength)) {
self.currentArticle.link = [self urlString:[self currentString]];
}
else if (RSSAXEqualTags(localName, kDescription, kDescriptionLength)) {
if (!self.currentArticle.body) {
self.currentArticle.body = [self currentString];
}
}
else if (!self.parsingAuthor && RSSAXEqualTags(localName, kTitle, kTitleLength)) {
NSString *articleTitle = [self currentString];
if (articleTitle != nil) {
self.currentArticle.title = articleTitle;
}
}
else if (RSSAXEqualTags(localName, kEnclosure, kEnclosureLength)) {
[self addEnclosure];
}
}
- (NSDate *)currentDate {
return RSDateWithBytes(self.parser.currentCharacters.bytes, self.parser.currentCharacters.length);
}
#pragma mark - RSSAXParserDelegate
- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes {
if (self.endRSSFound) {
return;
}
if (RSSAXEqualTags(localName, kUppercaseRDF, kUppercaseRDFLength)) {
self.isRDF = YES;
return;
}
NSDictionary *xmlAttributes = nil;
if ((self.isRDF && RSSAXEqualTags(localName, kItem, kItemLength)) || RSSAXEqualTags(localName, kGuid, kGuidLength) || RSSAXEqualTags(localName, kEnclosure, kEnclosureLength)) {
xmlAttributes = [self.parser attributesDictionary:attributes numberOfAttributes:numberOfAttributes];
}
if (self.currentAttributes != xmlAttributes) {
self.currentAttributes = xmlAttributes;
}
if (!prefix && RSSAXEqualTags(localName, kItem, kItemLength)) {
[self addArticle];
self.parsingArticle = YES;
if (self.isRDF && xmlAttributes && xmlAttributes[kRDFAboutKey]) { /*RSS 1.0 guid*/
self.currentArticle.guid = xmlAttributes[kRDFAboutKey];
self.currentArticle.permalink = self.currentArticle.guid;
}
}
else if (!prefix && RSSAXEqualTags(localName, kImage, kImageLength)) {
self.parsingChannelImage = YES;
}
else if (!prefix && RSSAXEqualTags(localName, kAuthor, kAuthorLength)) {
if (self.parsingArticle) {
self.parsingAuthor = true;
}
}
if (!self.parsingChannelImage) {
[self.parser beginStoringCharacters];
}
}
- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri {
if (self.endRSSFound) {
return;
}
if (self.isRDF && RSSAXEqualTags(localName, kUppercaseRDF, kUppercaseRDFLength)) {
self.endRSSFound = YES;
}
else if (RSSAXEqualTags(localName, kRSS, kRSSLength)) {
self.endRSSFound = YES;
}
else if (RSSAXEqualTags(localName, kImage, kImageLength)) {
self.parsingChannelImage = NO;
}
else if (RSSAXEqualTags(localName, kItem, kItemLength)) {
self.parsingArticle = NO;
}
else if (self.parsingArticle) {
[self addArticleElement:localName prefix:prefix];
if (RSSAXEqualTags(localName, kAuthor, kAuthorLength)) {
self.parsingAuthor = NO;
}
}
else if (!self.parsingChannelImage) {
[self addFeedElement:localName prefix:prefix];
}
}
- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const xmlChar *)name prefix:(const xmlChar *)prefix {
if (RSSAXEqualTags(prefix, kRDF, kRDFlength)) {
if (RSSAXEqualTags(name, kAbout, kAboutLength)) {
return kRDFAboutKey;
}
return nil;
}
if (prefix) {
return nil;
}
if (RSSAXEqualTags(name, kIsPermaLink, kIsPermaLinkLength)) {
return kIsPermaLinkKey;
}
if (RSSAXEqualTags(name, kURL, kURLLength)) {
return kURLKey;
}
if (RSSAXEqualTags(name, kLength, kLengthLength)) {
return kLengthKey;
}
if (RSSAXEqualTags(name, kType, kTypeLength)) {
return kTypeKey;
}
return nil;
}
static BOOL equalBytes(const void *bytes1, const void *bytes2, NSUInteger length) {
return memcmp(bytes1, bytes2, length) == 0;
}
- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForValue:(const void *)bytes length:(NSUInteger)length {
static const NSUInteger falseLength = kFalseLength - 1;
static const NSUInteger trueLength = kTrueLength - 1;
if (length == falseLength && equalBytes(bytes, kFalse, falseLength)) {
return kFalseValue;
}
if (length == trueLength && equalBytes(bytes, kTrue, trueLength)) {
return kTrueValue;
}
return nil;
}
@end

View File

@@ -0,0 +1,55 @@
//
// RSSAXHTMLParser.h
// RSParser
//
// Created by Brent Simmons on 3/6/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
NS_ASSUME_NONNULL_BEGIN
@class RSSAXHTMLParser;
@protocol RSSAXHTMLParserDelegate <NSObject>
@optional
- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLStartElement:(const unsigned char *)localName attributes:(const unsigned char *_Nullable*_Nullable)attributes;
- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLEndElement:(nullable const unsigned char *)localName;
// Length is guaranteed to be greater than 0.
- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLCharactersFound:(nullable const unsigned char *)characters length:(NSUInteger)length;
- (void)saxParserDidReachEndOfDocument:(RSSAXHTMLParser *)SAXParser; // If canceled, may not get called (but might).
@end
@interface RSSAXHTMLParser : NSObject
- (instancetype)initWithDelegate:(id<RSSAXHTMLParserDelegate>)delegate;
- (void)parseData:(NSData *)data;
- (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes;
- (void)finishParsing;
- (void)cancel;
@property (nullable, nonatomic, strong, readonly) NSData *currentCharacters; // nil if not storing characters. UTF-8 encoded.
@property (nullable, nonatomic, strong, readonly) NSString *currentString; // Convenience to get string version of currentCharacters.
@property (nullable, nonatomic, strong, readonly) NSString *currentStringWithTrimmedWhitespace;
- (void)beginStoringCharacters; // Delegate can call from XMLStartElement. Characters will be available in XMLEndElement as currentCharacters property. Storing characters is stopped after each XMLEndElement.
// Delegate can call from within XMLStartElement.
- (nullable NSDictionary *)attributesDictionary:(const unsigned char *_Nullable*_Nullable)attributes;
@end
NS_ASSUME_NONNULL_END

View File

@@ -0,0 +1,321 @@
//
// RSSAXHTMLParser.m
// RSParser
//
// Created by Brent Simmons on 3/6/16.
// Copyright © 2016 Ranchero Software, LLC. All rights reserved.
//
#import "RSSAXHTMLParser.h"
#import "RSSAXParser.h"
#import "RSParserInternal.h"
#import <libxml/tree.h>
#import <libxml/xmlstring.h>
#import <libxml/HTMLparser.h>
@interface RSSAXHTMLParser ()
@property (nonatomic) id<RSSAXHTMLParserDelegate> delegate;
@property (nonatomic, assign) htmlParserCtxtPtr context;
@property (nonatomic, assign) BOOL storingCharacters;
@property (nonatomic) NSMutableData *characters;
@property (nonatomic) BOOL delegateRespondsToStartElementMethod;
@property (nonatomic) BOOL delegateRespondsToEndElementMethod;
@property (nonatomic) BOOL delegateRespondsToCharactersFoundMethod;
@property (nonatomic) BOOL delegateRespondsToEndOfDocumentMethod;
@end
@implementation RSSAXHTMLParser
+ (void)initialize {
RSSAXInitLibXMLParser();
}
#pragma mark - Init
- (instancetype)initWithDelegate:(id<RSSAXHTMLParserDelegate>)delegate {
self = [super init];
if (self == nil)
return nil;
_delegate = delegate;
if ([_delegate respondsToSelector:@selector(saxParser:XMLStartElement:attributes:)]) {
_delegateRespondsToStartElementMethod = YES;
}
if ([_delegate respondsToSelector:@selector(saxParser:XMLEndElement:)]) {
_delegateRespondsToEndElementMethod = YES;
}
if ([_delegate respondsToSelector:@selector(saxParser:XMLCharactersFound:length:)]) {
_delegateRespondsToCharactersFoundMethod = YES;
}
if ([_delegate respondsToSelector:@selector(saxParserDidReachEndOfDocument:)]) {
_delegateRespondsToEndOfDocumentMethod = YES;
}
return self;
}
#pragma mark - Dealloc
- (void)dealloc {
if (_context != nil) {
htmlFreeParserCtxt(_context);
_context = nil;
}
_delegate = nil;
}
#pragma mark - API
static xmlSAXHandler saxHandlerStruct;
- (void)parseData:(NSData *)data {
[self parseBytes:data.bytes numberOfBytes:data.length];
}
- (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes {
if (self.context == nil) {
xmlCharEncoding characterEncoding = xmlDetectCharEncoding(bytes, (int)numberOfBytes);
self.context = htmlCreatePushParserCtxt(&saxHandlerStruct, (__bridge void *)self, nil, 0, nil, characterEncoding);
htmlCtxtUseOptions(self.context, XML_PARSE_RECOVER | XML_PARSE_NONET | HTML_PARSE_COMPACT);
}
@autoreleasepool {
htmlParseChunk(self.context, (const char *)bytes, (int)numberOfBytes, 0);
}
}
- (void)finishParsing {
NSAssert(self.context != nil, nil);
if (self.context == nil)
return;
@autoreleasepool {
htmlParseChunk(self.context, nil, 0, 1);
htmlFreeParserCtxt(self.context);
self.context = nil;
self.characters = nil;
}
}
- (void)cancel {
@autoreleasepool {
xmlStopParser(self.context);
}
}
- (void)beginStoringCharacters {
self.storingCharacters = YES;
self.characters = [NSMutableData new];
}
- (void)endStoringCharacters {
self.storingCharacters = NO;
self.characters = nil;
}
- (NSData *)currentCharacters {
if (!self.storingCharacters) {
return nil;
}
return self.characters;
}
- (NSString *)currentString {
NSData *d = self.currentCharacters;
if (RSParserObjectIsEmpty(d)) {
return nil;
}
return [[NSString alloc] initWithData:d encoding:NSUTF8StringEncoding];
}
- (NSString *)currentStringWithTrimmedWhitespace {
return [self.currentString stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]];
}
#pragma mark - Attributes Dictionary
- (NSDictionary *)attributesDictionary:(const xmlChar **)attributes {
if (!attributes) {
return nil;
}
NSMutableDictionary *d = [NSMutableDictionary new];
NSInteger ix = 0;
NSString *currentKey = nil;
while (true) {
const xmlChar *oneAttribute = attributes[ix];
ix++;
if (!currentKey && !oneAttribute) {
break;
}
if (!currentKey) {
currentKey = [NSString stringWithUTF8String:(const char *)oneAttribute];
}
else {
NSString *value = nil;
if (oneAttribute) {
value = [NSString stringWithUTF8String:(const char *)oneAttribute];
}
d[currentKey] = value ? value : @"";
currentKey = nil;
}
}
return [d copy];
}
#pragma mark - Callbacks
- (void)xmlEndDocument {
@autoreleasepool {
if (self.delegateRespondsToEndOfDocumentMethod) {
[self.delegate saxParserDidReachEndOfDocument:self];
}
[self endStoringCharacters];
}
}
- (void)xmlCharactersFound:(const xmlChar *)ch length:(NSUInteger)length {
if (length < 1) {
return;
}
@autoreleasepool {
if (self.storingCharacters) {
[self.characters appendBytes:(const void *)ch length:length];
}
if (self.delegateRespondsToCharactersFoundMethod) {
[self.delegate saxParser:self XMLCharactersFound:ch length:length];
}
}
}
- (void)xmlStartElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes {
@autoreleasepool {
if (self.delegateRespondsToStartElementMethod) {
[self.delegate saxParser:self XMLStartElement:localName attributes:attributes];
}
}
}
- (void)xmlEndElement:(const xmlChar *)localName {
@autoreleasepool {
if (self.delegateRespondsToEndElementMethod) {
[self.delegate saxParser:self XMLEndElement:localName];
}
[self endStoringCharacters];
}
}
@end
static void startElementSAX(void *context, const xmlChar *localname, const xmlChar **attributes) {
[(__bridge RSSAXHTMLParser *)context xmlStartElement:localname attributes:attributes];
}
static void endElementSAX(void *context, const xmlChar *localname) {
[(__bridge RSSAXHTMLParser *)context xmlEndElement:localname];
}
static void charactersFoundSAX(void *context, const xmlChar *ch, int len) {
[(__bridge RSSAXHTMLParser *)context xmlCharactersFound:ch length:(NSUInteger)len];
}
static void endDocumentSAX(void *context) {
[(__bridge RSSAXHTMLParser *)context xmlEndDocument];
}
static htmlSAXHandler saxHandlerStruct = {
nil, /* internalSubset */
nil, /* isStandalone */
nil, /* hasInternalSubset */
nil, /* hasExternalSubset */
nil, /* resolveEntity */
nil, /* getEntity */
nil, /* entityDecl */
nil, /* notationDecl */
nil, /* attributeDecl */
nil, /* elementDecl */
nil, /* unparsedEntityDecl */
nil, /* setDocumentLocator */
nil, /* startDocument */
endDocumentSAX, /* endDocument */
startElementSAX, /* startElement*/
endElementSAX, /* endElement */
nil, /* reference */
charactersFoundSAX, /* characters */
nil, /* ignorableWhitespace */
nil, /* processingInstruction */
nil, /* comment */
nil, /* warning */
nil, /* error */
nil, /* fatalError //: unused error() get all the errors */
nil, /* getParameterEntity */
nil, /* cdataBlock */
nil, /* externalSubset */
XML_SAX2_MAGIC,
nil,
nil, /* startElementNs */
nil, /* endElementNs */
nil /* serror */
};

View File

@@ -0,0 +1,69 @@
//
// RSSAXParser.h
// RSParser
//
// Created by Brent Simmons on 3/25/15.
// Copyright (c) 2015 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
/*Thread-safe, not re-entrant.
Calls to the delegate will happen on the same thread where the parser runs.
This is a low-level streaming XML parser, a thin wrapper for libxml2's SAX parser. It doesn't do much Foundation-ifying quite on purpose -- because the goal is performance and low memory use.
This class is not meant to be sub-classed. Use the delegate methods.
*/
@class RSSAXParser;
@protocol RSSAXParserDelegate <NSObject>
@optional
- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const unsigned char *)localName prefix:(const unsigned char *)prefix uri:(const unsigned char *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const unsigned char **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const unsigned char **)attributes;
- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const unsigned char *)localName prefix:(const unsigned char *)prefix uri:(const unsigned char *)uri;
// Length is guaranteed to be greater than 0.
- (void)saxParser:(RSSAXParser *)SAXParser XMLCharactersFound:(const unsigned char *)characters length:(NSUInteger)length;
- (void)saxParserDidReachEndOfDocument:(RSSAXParser *)SAXParser; /*If canceled, may not get called (but might).*/
- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const unsigned char *)name prefix:(const unsigned char *)prefix; /*Okay to return nil. Prefix may be nil.*/
- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForValue:(const void *)bytes length:(NSUInteger)length;
@end
void RSSAXInitLibXMLParser(void); // Needed by RSSAXHTMLParser.
/*For use by delegate.*/
BOOL RSSAXEqualTags(const unsigned char *localName, const char *tag, NSInteger tagLength);
@interface RSSAXParser : NSObject
- (instancetype)initWithDelegate:(id<RSSAXParserDelegate>)delegate;
- (void)parseData:(NSData *)data;
- (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes;
- (void)finishParsing;
- (void)cancel;
@property (nonatomic, strong, readonly) NSData *currentCharacters; /*nil if not storing characters. UTF-8 encoded.*/
@property (nonatomic, strong, readonly) NSString *currentString; /*Convenience to get string version of currentCharacters.*/
@property (nonatomic, strong, readonly) NSString *currentStringWithTrimmedWhitespace;
- (void)beginStoringCharacters; /*Delegate can call from XMLStartElement. Characters will be available in XMLEndElement as currentCharacters property. Storing characters is stopped after each XMLEndElement.*/
/*Delegate can call from within XMLStartElement. Returns nil if numberOfAttributes < 1.*/
- (NSDictionary *)attributesDictionary:(const unsigned char **)attributes numberOfAttributes:(NSInteger)numberOfAttributes;
@end

View File

@@ -0,0 +1,353 @@
//
// RSSAXParser.m
// RSParser
//
// Created by Brent Simmons on 3/25/15.
// Copyright (c) 2015 Ranchero Software, LLC. All rights reserved.
//
#import "RSSAXParser.h"
#import "RSParserInternal.h"
#import <libxml/parser.h>
#import <libxml/tree.h>
#import <libxml/xmlstring.h>
@interface RSSAXParser ()
@property (nonatomic, weak) id<RSSAXParserDelegate> delegate;
@property (nonatomic, assign) xmlParserCtxtPtr context;
@property (nonatomic, assign) BOOL storingCharacters;
@property (nonatomic) NSMutableData *characters;
@property (nonatomic) BOOL delegateRespondsToInternedStringMethod;
@property (nonatomic) BOOL delegateRespondsToInternedStringForValueMethod;
@property (nonatomic) BOOL delegateRespondsToStartElementMethod;
@property (nonatomic) BOOL delegateRespondsToEndElementMethod;
@property (nonatomic) BOOL delegateRespondsToCharactersFoundMethod;
@property (nonatomic) BOOL delegateRespondsToEndOfDocumentMethod;
@end
@implementation RSSAXParser
+ (void)initialize {
RSSAXInitLibXMLParser();
}
#pragma mark - Init
- (instancetype)initWithDelegate:(id<RSSAXParserDelegate>)delegate {
self = [super init];
if (self == nil)
return nil;
_delegate = delegate;
if ([_delegate respondsToSelector:@selector(saxParser:internedStringForName:prefix:)]) {
_delegateRespondsToInternedStringMethod = YES;
}
if ([_delegate respondsToSelector:@selector(saxParser:internedStringForValue:length:)]) {
_delegateRespondsToInternedStringForValueMethod = YES;
}
if ([_delegate respondsToSelector:@selector(saxParser:XMLStartElement:prefix:uri:numberOfNamespaces:namespaces:numberOfAttributes:numberDefaulted:attributes:)]) {
_delegateRespondsToStartElementMethod = YES;
}
if ([_delegate respondsToSelector:@selector(saxParser:XMLEndElement:prefix:uri:)]) {
_delegateRespondsToEndElementMethod = YES;
}
if ([_delegate respondsToSelector:@selector(saxParser:XMLCharactersFound:length:)]) {
_delegateRespondsToCharactersFoundMethod = YES;
}
if ([_delegate respondsToSelector:@selector(saxParserDidReachEndOfDocument:)]) {
_delegateRespondsToEndOfDocumentMethod = YES;
}
return self;
}
#pragma mark - Dealloc
- (void)dealloc {
if (_context != nil) {
xmlFreeParserCtxt(_context);
_context = nil;
}
_delegate = nil;
}
#pragma mark - API
static xmlSAXHandler saxHandlerStruct;
- (void)parseData:(NSData *)data {
[self parseBytes:data.bytes numberOfBytes:data.length];
}
- (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes {
if (self.context == nil) {
self.context = xmlCreatePushParserCtxt(&saxHandlerStruct, (__bridge void *)self, nil, 0, nil);
xmlCtxtUseOptions(self.context, XML_PARSE_RECOVER | XML_PARSE_NOENT);
}
@autoreleasepool {
xmlParseChunk(self.context, (const char *)bytes, (int)numberOfBytes, 0);
}
}
- (void)finishParsing {
NSAssert(self.context != nil, nil);
if (self.context == nil)
return;
@autoreleasepool {
xmlParseChunk(self.context, nil, 0, 1);
xmlFreeParserCtxt(self.context);
self.context = nil;
self.characters = nil;
}
}
- (void)cancel {
@autoreleasepool {
xmlStopParser(self.context);
}
}
- (void)beginStoringCharacters {
self.storingCharacters = YES;
self.characters = [NSMutableData new];
}
- (void)endStoringCharacters {
self.storingCharacters = NO;
self.characters = nil;
}
- (NSData *)currentCharacters {
if (!self.storingCharacters) {
return nil;
}
return self.characters;
}
- (NSString *)currentString {
NSData *d = self.currentCharacters;
if (RSParserObjectIsEmpty(d)) {
return nil;
}
return [[NSString alloc] initWithData:d encoding:NSUTF8StringEncoding];
}
- (NSString *)currentStringWithTrimmedWhitespace {
return [self.currentString stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]];
}
#pragma mark - Attributes Dictionary
- (NSDictionary *)attributesDictionary:(const xmlChar **)attributes numberOfAttributes:(NSInteger)numberOfAttributes {
if (numberOfAttributes < 1 || !attributes) {
return nil;
}
NSMutableDictionary *d = [NSMutableDictionary new];
@autoreleasepool {
NSInteger i = 0, j = 0;
for (i = 0, j = 0; i < numberOfAttributes; i++, j+=5) {
NSUInteger lenValue = (NSUInteger)(attributes[j + 4] - attributes[j + 3]);
NSString *value = nil;
if (self.delegateRespondsToInternedStringForValueMethod) {
value = [self.delegate saxParser:self internedStringForValue:(const void *)attributes[j + 3] length:lenValue];
}
if (!value) {
value = [[NSString alloc] initWithBytes:(const void *)attributes[j + 3] length:lenValue encoding:NSUTF8StringEncoding];
}
NSString *attributeName = nil;
if (self.delegateRespondsToInternedStringMethod) {
attributeName = [self.delegate saxParser:self internedStringForName:(const xmlChar *)attributes[j] prefix:(const xmlChar *)attributes[j + 1]];
}
if (!attributeName) {
attributeName = [NSString stringWithUTF8String:(const char *)attributes[j]];
if (attributes[j + 1]) {
NSString *attributePrefix = [NSString stringWithUTF8String:(const char *)attributes[j + 1]];
attributeName = [NSString stringWithFormat:@"%@:%@", attributePrefix, attributeName];
}
}
if (value && attributeName) {
d[attributeName] = value;
}
}
}
return d;
}
#pragma mark - Equal Tags
BOOL RSSAXEqualTags(const xmlChar *localName, const char *tag, NSInteger tagLength) {
if (!localName) {
return NO;
}
return !strncmp((const char *)localName, tag, (size_t)tagLength);
}
#pragma mark - Callbacks
- (void)xmlEndDocument {
@autoreleasepool {
if (self.delegateRespondsToEndOfDocumentMethod) {
[self.delegate saxParserDidReachEndOfDocument:self];
}
[self endStoringCharacters];
}
}
- (void)xmlCharactersFound:(const xmlChar *)ch length:(NSUInteger)length {
if (length < 1) {
return;
}
@autoreleasepool {
if (self.storingCharacters) {
[self.characters appendBytes:(const void *)ch length:length];
}
if (self.delegateRespondsToCharactersFoundMethod) {
[self.delegate saxParser:self XMLCharactersFound:ch length:length];
}
}
}
- (void)xmlStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(int)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(int)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes {
@autoreleasepool {
if (self.delegateRespondsToStartElementMethod) {
[self.delegate saxParser:self XMLStartElement:localName prefix:prefix uri:uri numberOfNamespaces:numberOfNamespaces namespaces:namespaces numberOfAttributes:numberOfAttributes numberDefaulted:numberDefaulted attributes:attributes];
}
}
}
- (void)xmlEndElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri {
@autoreleasepool {
if (self.delegateRespondsToEndElementMethod) {
[self.delegate saxParser:self XMLEndElement:localName prefix:prefix uri:uri];
}
[self endStoringCharacters];
}
}
@end
static void startElementSAX(void *context, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI, int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int nb_defaulted, const xmlChar **attributes) {
[(__bridge RSSAXParser *)context xmlStartElement:localname prefix:prefix uri:URI numberOfNamespaces:nb_namespaces namespaces:namespaces numberOfAttributes:nb_attributes numberDefaulted:nb_defaulted attributes:attributes];
}
static void endElementSAX(void *context, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI) {
[(__bridge RSSAXParser *)context xmlEndElement:localname prefix:prefix uri:URI];
}
static void charactersFoundSAX(void *context, const xmlChar *ch, int len) {
[(__bridge RSSAXParser *)context xmlCharactersFound:ch length:(NSUInteger)len];
}
static void endDocumentSAX(void *context) {
[(__bridge RSSAXParser *)context xmlEndDocument];
}
static xmlSAXHandler saxHandlerStruct = {
nil, /* internalSubset */
nil, /* isStandalone */
nil, /* hasInternalSubset */
nil, /* hasExternalSubset */
nil, /* resolveEntity */
nil, /* getEntity */
nil, /* entityDecl */
nil, /* notationDecl */
nil, /* attributeDecl */
nil, /* elementDecl */
nil, /* unparsedEntityDecl */
nil, /* setDocumentLocator */
nil, /* startDocument */
endDocumentSAX, /* endDocument */
nil, /* startElement*/
nil, /* endElement */
nil, /* reference */
charactersFoundSAX, /* characters */
nil, /* ignorableWhitespace */
nil, /* processingInstruction */
nil, /* comment */
nil, /* warning */
nil, /* error */
nil, /* fatalError //: unused error() get all the errors */
nil, /* getParameterEntity */
nil, /* cdataBlock */
nil, /* externalSubset */
XML_SAX2_MAGIC,
nil,
startElementSAX, /* startElementNs */
endElementSAX, /* endElementNs */
nil /* serror */
};
void RSSAXInitLibXMLParser(void) {
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
xmlInitParser();
});
}

View File

@@ -0,0 +1,56 @@
//
// RSParser.h
// RSParser
//
// Created by Brent Simmons on 6/20/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
@import Foundation;
#import "../ParserData.h"
#import "../RSDateParser.h"
// OPML
#import "../RSOPMLParser.h"
#import "../RSOPMLDocument.h"
#import "../RSOPMLItem.h"
#import "../RSOPMLAttributes.h"
#import "../RSOPMLFeedSpecifier.h"
#import "../RSOPMLError.h"
// For writing your own XML parser.
#import "../RSSAXParser.h"
// You should use FeedParser (Swift) instead of these two specific parsers
// and the objects they create.
// But theyre available if you want them.
#import "../RSRSSParser.h"
#import "../RSAtomParser.h"
#import "../RSParsedFeed.h"
#import "../RSParsedArticle.h"
#import "../RSParsedEnclosure.h"
#import "../RSParsedAuthor.h"
// HTML
#import "../RSHTMLMetadataParser.h"
#import "../RSHTMLMetadata.h"
#import "../RSHTMLLinkParser.h"
#import "../RSSAXHTMLParser.h" // For writing your own HTML parser.
#import "../RSHTMLTag.h"
// Utilities
#import "../NSData+RSParser.h"
#import "../NSString+RSParser.h"

View File

@@ -0,0 +1,9 @@
//
// Exports.swift
//
//
// Created by Stuart Breckenridge on 29/7/20.
//
import Foundation
@_exported import RSParserObjC

View File

@@ -0,0 +1,91 @@
//
// FeedParser.swift
// RSParser
//
// Created by Brent Simmons on 6/20/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
import Foundation
import RSParserObjC
// FeedParser handles RSS, Atom, JSON Feed, and RSS-in-JSON.
// You dont need to know the type of feed.
public typealias FeedParserCallback = (_ parsedFeed: ParsedFeed?, _ error: Error?) -> Void
public struct FeedParser {
private static let parseQueue = DispatchQueue(label: "FeedParser parse queue")
public static func canParse(_ parserData: ParserData) -> Bool {
let type = feedType(parserData)
switch type {
case .jsonFeed, .rssInJSON, .rss, .atom:
return true
default:
return false
}
}
public static func mightBeAbleToParseBasedOnPartialData(_ parserData: ParserData) -> Bool {
let type = feedType(parserData, isPartialData: true)
switch type {
case .jsonFeed, .rssInJSON, .rss, .atom, .unknown:
return true
default:
return false
}
}
public static func parse(_ parserData: ParserData) throws -> ParsedFeed? {
// This is generally fast enough to call on the main thread 
// but its probably a good idea to use a background queue if
// you might be doing a lot of parsing. (Such as in a feed reader.)
do {
let type = feedType(parserData)
switch type {
case .jsonFeed:
return try JSONFeedParser.parse(parserData)
case .rssInJSON:
return try RSSInJSONParser.parse(parserData)
case .rss:
return RSSParser.parse(parserData)
case .atom:
return AtomParser.parse(parserData)
case .unknown, .notAFeed:
return nil
}
}
catch { throw error }
}
public static func parse(_ parserData: ParserData, _ completion: @escaping FeedParserCallback) {
parseQueue.async {
do {
let parsedFeed = try parse(parserData)
DispatchQueue.main.async {
completion(parsedFeed, nil)
}
}
catch {
DispatchQueue.main.async {
completion(nil, error)
}
}
}
}
}

View File

@@ -0,0 +1,29 @@
//
// FeedParserError.swift
// RSParser
//
// Created by Brent Simmons on 6/24/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
import Foundation
public struct FeedParserError: Error {
public enum FeedParserErrorType {
case rssChannelNotFound
case rssItemsNotFound
case jsonFeedVersionNotFound
case jsonFeedItemsNotFound
case jsonFeedTitleNotFound
case invalidJSON
}
public let errorType: FeedParserErrorType
public init(_ errorType: FeedParserErrorType) {
self.errorType = errorType
}
}

View File

@@ -0,0 +1,64 @@
//
// FeedType.swift
// RSParser
//
// Created by Brent Simmons on 6/20/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
import Foundation
#if SWIFT_PACKAGE
import RSParserObjC
#endif
public enum FeedType {
case rss
case atom
case jsonFeed
case rssInJSON
case unknown
case notAFeed
}
private let minNumberOfBytesRequired = 128
public func feedType(_ parserData: ParserData, isPartialData: Bool = false) -> FeedType {
// Can call with partial data while still downloading, for instance.
// If theres not enough data, return .unknown. Ask again when theres more data.
// If its definitely not a feed, return .notAFeed.
//
// This is fast enough to call on the main thread.
if parserData.data.count < minNumberOfBytesRequired {
return .unknown
}
let nsdata = parserData.data as NSData
if nsdata.isProbablyJSONFeed() {
return .jsonFeed
}
if nsdata.isProbablyRSSInJSON() {
return .rssInJSON
}
if nsdata.isProbablyRSS() {
return .rss
}
if nsdata.isProbablyAtom() {
return .atom
}
if isPartialData && nsdata.isProbablyJSON() {
// Might not be able to detect a JSON Feed without all data.
// Dr. Drangs JSON Feed (see althis.json and allthis-partial.json in tests)
// has, at this writing, the JSON version element at the end of the feed,
// which is totally legal but it means not being able to detect
// that its a JSON Feed without all the data.
// So this returns .unknown instead of .notAFeed.
return .unknown
}
return .notAFeed
}

View File

@@ -0,0 +1,250 @@
//
// JSONFeedParser.swift
// RSParser
//
// Created by Brent Simmons on 6/25/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
import Foundation
#if SWIFT_PACKAGE
import RSParserObjC
#endif
// See https://jsonfeed.org/version/1.1
public struct JSONFeedParser {
struct Key {
static let version = "version"
static let items = "items"
static let title = "title"
static let homePageURL = "home_page_url"
static let feedURL = "feed_url"
static let feedDescription = "description"
static let nextURL = "next_url"
static let icon = "icon"
static let favicon = "favicon"
static let expired = "expired"
static let author = "author"
static let authors = "authors"
static let name = "name"
static let url = "url"
static let avatar = "avatar"
static let hubs = "hubs"
static let type = "type"
static let contentHTML = "content_html"
static let contentText = "content_text"
static let externalURL = "external_url"
static let summary = "summary"
static let image = "image"
static let bannerImage = "banner_image"
static let datePublished = "date_published"
static let dateModified = "date_modified"
static let tags = "tags"
static let uniqueID = "id"
static let attachments = "attachments"
static let mimeType = "mime_type"
static let sizeInBytes = "size_in_bytes"
static let durationInSeconds = "duration_in_seconds"
static let language = "language"
}
static let jsonFeedVersionMarker = "://jsonfeed.org/version/" // Allow for the mistake of not getting the scheme exactly correct.
public static func parse(_ parserData: ParserData) throws -> ParsedFeed? {
guard let d = JSONUtilities.dictionary(with: parserData.data) else {
throw FeedParserError(.invalidJSON)
}
guard let version = d[Key.version] as? String, let _ = version.range(of: JSONFeedParser.jsonFeedVersionMarker) else {
throw FeedParserError(.jsonFeedVersionNotFound)
}
guard let itemsArray = d[Key.items] as? JSONArray else {
throw FeedParserError(.jsonFeedItemsNotFound)
}
guard let title = d[Key.title] as? String else {
throw FeedParserError(.jsonFeedTitleNotFound)
}
let authors = parseAuthors(d)
let homePageURL = d[Key.homePageURL] as? String
let feedURL = d[Key.feedURL] as? String ?? parserData.url
let feedDescription = d[Key.feedDescription] as? String
let nextURL = d[Key.nextURL] as? String
let iconURL = d[Key.icon] as? String
let faviconURL = d[Key.favicon] as? String
let expired = d[Key.expired] as? Bool ?? false
let hubs = parseHubs(d)
let language = d[Key.language] as? String
let items = parseItems(itemsArray, parserData.url)
return ParsedFeed(type: .jsonFeed, title: title, homePageURL: homePageURL, feedURL: feedURL, language: language, feedDescription: feedDescription, nextURL: nextURL, iconURL: iconURL, faviconURL: faviconURL, authors: authors, expired: expired, hubs: hubs, items: items)
}
}
private extension JSONFeedParser {
static func parseAuthors(_ dictionary: JSONDictionary) -> Set<ParsedAuthor>? {
if let authorsArray = dictionary[Key.authors] as? JSONArray {
var authors = Set<ParsedAuthor>()
for author in authorsArray {
if let parsedAuthor = parseAuthor(author) {
authors.insert(parsedAuthor)
}
}
return authors
}
guard let authorDictionary = dictionary[Key.author] as? JSONDictionary,
let parsedAuthor = parseAuthor(authorDictionary) else {
return nil
}
return Set([parsedAuthor])
}
static func parseAuthor(_ dictionary: JSONDictionary) -> ParsedAuthor? {
let name = dictionary[Key.name] as? String
let url = dictionary[Key.url] as? String
let avatar = dictionary[Key.avatar] as? String
if name == nil && url == nil && avatar == nil {
return nil
}
return ParsedAuthor(name: name, url: url, avatarURL: avatar, emailAddress: nil)
}
static func parseHubs(_ dictionary: JSONDictionary) -> Set<ParsedHub>? {
guard let hubsArray = dictionary[Key.hubs] as? JSONArray else {
return nil
}
let hubs = hubsArray.compactMap { (hubDictionary) -> ParsedHub? in
guard let hubURL = hubDictionary[Key.url] as? String, let hubType = hubDictionary[Key.type] as? String else {
return nil
}
return ParsedHub(type: hubType, url: hubURL)
}
return hubs.isEmpty ? nil : Set(hubs)
}
static func parseItems(_ itemsArray: JSONArray, _ feedURL: String) -> Set<ParsedItem> {
return Set(itemsArray.compactMap { (oneItemDictionary) -> ParsedItem? in
return parseItem(oneItemDictionary, feedURL)
})
}
static func parseItem(_ itemDictionary: JSONDictionary, _ feedURL: String) -> ParsedItem? {
guard let uniqueID = parseUniqueID(itemDictionary) else {
return nil
}
let contentHTML = itemDictionary[Key.contentHTML] as? String
let contentText = itemDictionary[Key.contentText] as? String
if contentHTML == nil && contentText == nil {
return nil
}
let url = itemDictionary[Key.url] as? String
let externalURL = itemDictionary[Key.externalURL] as? String
let title = parseTitle(itemDictionary, feedURL)
let language = itemDictionary[Key.language] as? String
let summary = itemDictionary[Key.summary] as? String
let imageURL = itemDictionary[Key.image] as? String
let bannerImageURL = itemDictionary[Key.bannerImage] as? String
let datePublished = parseDate(itemDictionary[Key.datePublished] as? String)
let dateModified = parseDate(itemDictionary[Key.dateModified] as? String)
let authors = parseAuthors(itemDictionary)
var tags: Set<String>? = nil
if let tagsArray = itemDictionary[Key.tags] as? [String] {
tags = Set(tagsArray)
}
let attachments = parseAttachments(itemDictionary)
return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: feedURL, url: url, externalURL: externalURL, title: title, language: language, contentHTML: contentHTML, contentText: contentText, summary: summary, imageURL: imageURL, bannerImageURL: bannerImageURL, datePublished: datePublished, dateModified: dateModified, authors: authors, tags: tags, attachments: attachments)
}
static func parseTitle(_ itemDictionary: JSONDictionary, _ feedURL: String) -> String? {
guard let title = itemDictionary[Key.title] as? String else {
return nil
}
if isSpecialCaseTitleWithEntitiesFeed(feedURL) {
return (title as NSString).rsparser_stringByDecodingHTMLEntities()
}
return title
}
static func isSpecialCaseTitleWithEntitiesFeed(_ feedURL: String) -> Bool {
// As of 16 Feb. 2018, Kottkes and Heers feeds includes HTML entities in the title elements.
// If we find more feeds like this, well add them here. If these feeds get fixed, well remove them.
let lowerFeedURL = feedURL.lowercased()
let matchStrings = ["kottke.org", "pxlnv.com", "macstories.net", "macobserver.com"]
for matchString in matchStrings {
if lowerFeedURL.contains(matchString) {
return true
}
}
return false
}
static func parseUniqueID(_ itemDictionary: JSONDictionary) -> String? {
if let uniqueID = itemDictionary[Key.uniqueID] as? String {
return uniqueID // Spec says it must be a string
}
// Version 1 spec also says that if its a number, even though thats incorrect, it should be coerced to a string.
if let uniqueID = itemDictionary[Key.uniqueID] as? Int {
return "\(uniqueID)"
}
if let uniqueID = itemDictionary[Key.uniqueID] as? Double {
return "\(uniqueID)"
}
return nil
}
static func parseDate(_ dateString: String?) -> Date? {
guard let dateString = dateString, !dateString.isEmpty else {
return nil
}
return RSDateWithString(dateString)
}
static func parseAttachments(_ itemDictionary: JSONDictionary) -> Set<ParsedAttachment>? {
guard let attachmentsArray = itemDictionary[Key.attachments] as? JSONArray else {
return nil
}
return Set(attachmentsArray.compactMap { parseAttachment($0) })
}
static func parseAttachment(_ attachmentObject: JSONDictionary) -> ParsedAttachment? {
guard let url = attachmentObject[Key.url] as? String else {
return nil
}
guard let mimeType = attachmentObject[Key.mimeType] as? String else {
return nil
}
let title = attachmentObject[Key.title] as? String
let sizeInBytes = attachmentObject[Key.sizeInBytes] as? Int
let durationInSeconds = attachmentObject[Key.durationInSeconds] as? Int
return ParsedAttachment(url: url, mimeType: mimeType, title: title, sizeInBytes: sizeInBytes, durationInSeconds: durationInSeconds)
}
}

View File

@@ -0,0 +1,184 @@
//
// RSSInJSONParser.swift
// RSParser
//
// Created by Brent Simmons on 6/24/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
import Foundation
#if SWIFT_PACKAGE
import RSParserObjC
#endif
// See https://github.com/scripting/Scripting-News/blob/master/rss-in-json/README.md
// Also: http://cyber.harvard.edu/rss/rss.html
public struct RSSInJSONParser {
public static func parse(_ parserData: ParserData) throws -> ParsedFeed? {
do {
guard let parsedObject = try JSONSerialization.jsonObject(with: parserData.data) as? JSONDictionary else {
throw FeedParserError(.invalidJSON)
}
guard let rssObject = parsedObject["rss"] as? JSONDictionary else {
throw FeedParserError(.rssChannelNotFound)
}
guard let channelObject = rssObject["channel"] as? JSONDictionary else {
throw FeedParserError(.rssChannelNotFound)
}
// Id bet money that in practice the items array wont always appear correctly inside the channel object.
// Id also bet that sometimes it gets called "items" instead of "item".
var itemsObject = channelObject["item"] as? JSONArray
if itemsObject == nil {
itemsObject = parsedObject["item"] as? JSONArray
}
if itemsObject == nil {
itemsObject = channelObject["items"] as? JSONArray
}
if itemsObject == nil {
itemsObject = parsedObject["items"] as? JSONArray
}
if itemsObject == nil {
throw FeedParserError(.rssItemsNotFound)
}
let title = channelObject["title"] as? String
let homePageURL = channelObject["link"] as? String
let feedURL = parserData.url
let feedDescription = channelObject["description"] as? String
let feedLanguage = channelObject["language"] as? String
let items = parseItems(itemsObject!, parserData.url)
return ParsedFeed(type: .rssInJSON, title: title, homePageURL: homePageURL, feedURL: feedURL, language: feedLanguage, feedDescription: feedDescription, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items)
}
catch { throw error }
}
}
private extension RSSInJSONParser {
static func parseItems(_ itemsObject: JSONArray, _ feedURL: String) -> Set<ParsedItem> {
return Set(itemsObject.compactMap{ (oneItemDictionary) -> ParsedItem? in
return parsedItemWithDictionary(oneItemDictionary, feedURL)
})
}
static func parsedItemWithDictionary(_ itemDictionary: JSONDictionary, _ feedURL: String) -> ParsedItem? {
let externalURL = itemDictionary["link"] as? String
let title = itemDictionary["title"] as? String
var contentHTML = itemDictionary["description"] as? String
var contentText: String? = nil
if contentHTML != nil && !(contentHTML!.contains("<")) {
contentText = contentHTML
contentHTML = nil
}
if contentHTML == nil && contentText == nil && title == nil {
return nil
}
var datePublished: Date? = nil
if let datePublishedString = itemDictionary["pubDate"] as? String {
datePublished = RSDateWithString(datePublishedString)
}
let authors = parseAuthors(itemDictionary)
let tags = parseTags(itemDictionary)
let attachments = parseAttachments(itemDictionary)
var uniqueID: String? = itemDictionary["guid"] as? String
if uniqueID == nil {
// Calculate a uniqueID based on a combination of non-empty elements. Then hash the result.
// Items should have guids. When they don't, re-runs are very likely
// because there's no other 100% reliable way to determine identity.
// This calculated uniqueID is valid only for this particular feed. (Just like ids in JSON Feed.)
var s = ""
if let datePublished = datePublished {
s += "\(datePublished.timeIntervalSince1970)"
}
if let title = title {
s += title
}
if let externalURL = externalURL {
s += externalURL
}
if let authorEmailAddress = authors?.first?.emailAddress {
s += authorEmailAddress
}
if let oneAttachmentURL = attachments?.first?.url {
s += oneAttachmentURL
}
if s.isEmpty {
// Sheesh. Tough case.
if let _ = contentHTML {
s = contentHTML!
}
if let _ = contentText {
s = contentText!
}
}
uniqueID = (s as NSString).rsparser_md5Hash()
}
if let uniqueID = uniqueID {
return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: feedURL, url: nil, externalURL: externalURL, title: title, language: nil, contentHTML: contentHTML, contentText: contentText, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: nil, authors: authors, tags: tags, attachments: attachments)
}
return nil
}
static func parseAuthors(_ itemDictionary: JSONDictionary) -> Set<ParsedAuthor>? {
guard let authorEmailAddress = itemDictionary["author"] as? String else {
return nil
}
let parsedAuthor = ParsedAuthor(name: nil, url: nil, avatarURL: nil, emailAddress: authorEmailAddress)
return Set([parsedAuthor])
}
static func parseTags(_ itemDictionary: JSONDictionary) -> Set<String>? {
if let categoryObject = itemDictionary["category"] as? JSONDictionary {
if let oneTag = categoryObject["#value"] as? String {
return Set([oneTag])
}
return nil
}
else if let categoryArray = itemDictionary["category"] as? JSONArray {
return Set(categoryArray.compactMap{ $0["#value"] as? String })
}
return nil
}
static func parseAttachments(_ itemDictionary: JSONDictionary) -> Set<ParsedAttachment>? {
guard let enclosureObject = itemDictionary["enclosure"] as? JSONDictionary else {
return nil
}
guard let attachmentURL = enclosureObject["url"] as? String else {
return nil
}
var attachmentSize = enclosureObject["length"] as? Int
if attachmentSize == nil {
if let attachmentSizeString = enclosureObject["length"] as? String {
attachmentSize = (attachmentSizeString as NSString).integerValue
}
}
let type = enclosureObject["type"] as? String
if let attachment = ParsedAttachment(url: attachmentURL, mimeType: type, title: nil, sizeInBytes: attachmentSize, durationInSeconds: nil) {
return Set([attachment])
}
return nil
}
}

View File

@@ -0,0 +1,36 @@
//
// ParsedAttachment.swift
// RSParser
//
// Created by Brent Simmons on 6/20/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
import Foundation
public struct ParsedAttachment: Hashable {
public let url: String
public let mimeType: String?
public let title: String?
public let sizeInBytes: Int?
public let durationInSeconds: Int?
public init?(url: String, mimeType: String?, title: String?, sizeInBytes: Int?, durationInSeconds: Int?) {
if url.isEmpty {
return nil
}
self.url = url
self.mimeType = mimeType
self.title = title
self.sizeInBytes = sizeInBytes
self.durationInSeconds = durationInSeconds
}
// MARK: - Hashable
public func hash(into hasher: inout Hasher) {
hasher.combine(url)
}
}

View File

@@ -0,0 +1,44 @@
//
// ParsedAuthor.swift
// RSParser
//
// Created by Brent Simmons on 6/20/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
import Foundation
public struct ParsedAuthor: Hashable, Codable {
public let name: String?
public let url: String?
public let avatarURL: String?
public let emailAddress: String?
public init(name: String?, url: String?, avatarURL: String?, emailAddress: String?) {
self.name = name
self.url = url
self.avatarURL = avatarURL
self.emailAddress = emailAddress
}
// MARK: - Hashable
public func hash(into hasher: inout Hasher) {
if let name = name {
hasher.combine(name)
}
else if let url = url {
hasher.combine(url)
}
else if let emailAddress = emailAddress {
hasher.combine(emailAddress)
}
else if let avatarURL = avatarURL {
hasher.combine(avatarURL)
}
else {
hasher.combine("")
}
}
}

View File

@@ -0,0 +1,42 @@
//
// ParsedFeed.swift
// RSParser
//
// Created by Brent Simmons on 6/20/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
import Foundation
public struct ParsedFeed {
public let type: FeedType
public let title: String?
public let homePageURL: String?
public let feedURL: String?
public let language: String?
public let feedDescription: String?
public let nextURL: String?
public let iconURL: String?
public let faviconURL: String?
public let authors: Set<ParsedAuthor>?
public let expired: Bool
public let hubs: Set<ParsedHub>?
public let items: Set<ParsedItem>
public init(type: FeedType, title: String?, homePageURL: String?, feedURL: String?, language: String?, feedDescription: String?, nextURL: String?, iconURL: String?, faviconURL: String?, authors: Set<ParsedAuthor>?, expired: Bool, hubs: Set<ParsedHub>?, items: Set<ParsedItem>) {
self.type = type
self.title = title
self.homePageURL = homePageURL?.nilIfEmptyOrWhitespace
self.feedURL = feedURL
self.language = language
self.feedDescription = feedDescription
self.nextURL = nextURL
self.iconURL = iconURL
self.faviconURL = faviconURL
self.authors = authors
self.expired = expired
self.hubs = hubs
self.items = items
}
}

View File

@@ -0,0 +1,15 @@
//
// ParsedHub.swift
// RSParser
//
// Created by Brent Simmons on 6/20/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
import Foundation
public struct ParsedHub: Hashable {
public let type: String
public let url: String
}

View File

@@ -0,0 +1,67 @@
//
// ParsedItem.swift
// RSParser
//
// Created by Brent Simmons on 6/20/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
import Foundation
public struct ParsedItem: Hashable {
public let syncServiceID: String? //Nil when not syncing
public let uniqueID: String //RSS guid, for instance; may be calculated
public let feedURL: String
public let url: String?
public let externalURL: String?
public let title: String?
public let language: String?
public let contentHTML: String?
public let contentText: String?
public let summary: String?
public let imageURL: String?
public let bannerImageURL: String?
public let datePublished: Date?
public let dateModified: Date?
public let authors: Set<ParsedAuthor>?
public let tags: Set<String>?
public let attachments: Set<ParsedAttachment>?
public init(syncServiceID: String?, uniqueID: String, feedURL: String, url: String?, externalURL: String?, title: String?,
language: String?, contentHTML: String?, contentText: String?, summary: String?, imageURL: String?,
bannerImageURL: String?,datePublished: Date?, dateModified: Date?, authors: Set<ParsedAuthor>?,
tags: Set<String>?, attachments: Set<ParsedAttachment>?) {
self.syncServiceID = syncServiceID
self.uniqueID = uniqueID
self.feedURL = feedURL
self.url = url
self.externalURL = externalURL
self.title = title
self.language = language
self.contentHTML = contentHTML
self.contentText = contentText
self.summary = summary
self.imageURL = imageURL
self.bannerImageURL = bannerImageURL
self.datePublished = datePublished
self.dateModified = dateModified
self.authors = authors
self.tags = tags
self.attachments = attachments
}
// MARK: - Hashable
public func hash(into hasher: inout Hasher) {
if let syncServiceID = syncServiceID {
hasher.combine(syncServiceID)
}
else {
hasher.combine(uniqueID)
hasher.combine(feedURL)
}
}
}

View File

@@ -0,0 +1,32 @@
//
// AtomParser.swift
// RSParser
//
// Created by Brent Simmons on 6/25/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
import Foundation
#if SWIFT_PACKAGE
import RSParserObjC
#endif
// RSSParser wraps the Objective-C RSAtomParser.
//
// The Objective-C parser creates RSParsedFeed, RSParsedArticle, etc.
// This wrapper then creates ParsedFeed, ParsedItem, etc. so that it creates
// the same things that JSONFeedParser and RSSInJSONParser create.
//
// In general, you should see FeedParser.swift for all your feed-parsing needs.
public struct AtomParser {
public static func parse(_ parserData: ParserData) -> ParsedFeed? {
if let rsParsedFeed = RSAtomParser.parseFeed(with: parserData) {
return RSParsedFeedTransformer.parsedFeed(rsParsedFeed)
}
return nil
}
}

View File

@@ -0,0 +1,80 @@
//
// RSParsedFeedTransformer.swift
// RSParser
//
// Created by Brent Simmons on 6/25/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
import Foundation
#if SWIFT_PACKAGE
import RSParserObjC
#endif
// RSRSSParser and RSAtomParser were written in Objective-C quite a while ago.
// They create an RSParsedFeed object and related Objective-C objects.
// These functions take an RSParsedFeed and return a Swift-y ParsedFeed,
// which is part of providing a single API for feed parsing.
struct RSParsedFeedTransformer {
static func parsedFeed(_ rsParsedFeed: RSParsedFeed) -> ParsedFeed {
let items = parsedItems(rsParsedFeed.articles)
return ParsedFeed(type: .rss, title: rsParsedFeed.title, homePageURL: rsParsedFeed.link, feedURL: rsParsedFeed.urlString, language: rsParsedFeed.language, feedDescription: nil, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items)
}
}
private extension RSParsedFeedTransformer {
static func parsedItems(_ parsedArticles: Set<RSParsedArticle>) -> Set<ParsedItem> {
// Create Set<ParsedItem> from Set<RSParsedArticle>
return Set(parsedArticles.map(parsedItem))
}
static func parsedItem(_ parsedArticle: RSParsedArticle) -> ParsedItem {
let uniqueID = parsedArticle.articleID
let url = parsedArticle.permalink
let externalURL = parsedArticle.link
let title = parsedArticle.title
let language = parsedArticle.language
let contentHTML = parsedArticle.body
let datePublished = parsedArticle.datePublished
let dateModified = parsedArticle.dateModified
let authors = parsedAuthors(parsedArticle.authors)
let attachments = parsedAttachments(parsedArticle.enclosures)
return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: parsedArticle.feedURL, url: url, externalURL: externalURL, title: title, language: language, contentHTML: contentHTML, contentText: nil, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: dateModified, authors: authors, tags: nil, attachments: attachments)
}
static func parsedAuthors(_ authors: Set<RSParsedAuthor>?) -> Set<ParsedAuthor>? {
guard let authors = authors, !authors.isEmpty else {
return nil
}
let transformedAuthors = authors.compactMap { (author) -> ParsedAuthor? in
return ParsedAuthor(name: author.name, url: author.url, avatarURL: nil, emailAddress: author.emailAddress)
}
return transformedAuthors.isEmpty ? nil : Set(transformedAuthors)
}
static func parsedAttachments(_ enclosures: Set<RSParsedEnclosure>?) -> Set<ParsedAttachment>? {
guard let enclosures = enclosures, !enclosures.isEmpty else {
return nil
}
let attachments = enclosures.compactMap { (enclosure) -> ParsedAttachment? in
let sizeInBytes = enclosure.length > 0 ? enclosure.length : nil
return ParsedAttachment(url: enclosure.url, mimeType: enclosure.mimeType, title: nil, sizeInBytes: sizeInBytes, durationInSeconds: nil)
}
return attachments.isEmpty ? nil : Set(attachments)
}
}

View File

@@ -0,0 +1,29 @@
//
// RSSParser.swift
// RSParser
//
// Created by Brent Simmons on 6/25/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
import Foundation
import RSParserObjC
// RSSParser wraps the Objective-C RSRSSParser.
//
// The Objective-C parser creates RSParsedFeed, RSParsedArticle, etc.
// This wrapper then creates ParsedFeed, ParsedItem, etc. so that it creates
// the same things that JSONFeedParser and RSSInJSONParser create.
//
// In general, you should see FeedParser.swift for all your feed-parsing needs.
public struct RSSParser {
public static func parse(_ parserData: ParserData) -> ParsedFeed? {
if let rsParsedFeed = RSRSSParser.parseFeed(with: parserData) {
return RSParsedFeedTransformer.parsedFeed(rsParsedFeed)
}
return nil
}
}

View File

@@ -0,0 +1,12 @@
//
// JSONDictionary.swift
// RSParser
//
// Created by Brent Simmons on 6/24/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
import Foundation
public typealias JSONDictionary = [String: Any]
public typealias JSONArray = [JSONDictionary]

View File

@@ -0,0 +1,27 @@
//
// JSONUtilities.swift
// RSParser
//
// Created by Brent Simmons on 12/10/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
import Foundation
public struct JSONUtilities {
public static func object(with data: Data) -> Any? {
return try? JSONSerialization.jsonObject(with: data)
}
public static func dictionary(with data: Data) -> JSONDictionary? {
return object(with: data) as? JSONDictionary
}
public static func array(with data: Data) -> JSONArray? {
return object(with: data) as? JSONArray
}
}

View File

@@ -0,0 +1,17 @@
//
// String+RSParser.swift
// RSParser
//
// Created by Nate Weaver on 2020-01-19.
// Copyright © 2020 Ranchero Software, LLC. All rights reserved.
//
import Foundation
extension String {
var nilIfEmptyOrWhitespace: String? {
return self.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty ? nil : self
}
}