Make it build. Add a README.

This commit is contained in:
Brent Simmons
2017-06-25 16:32:07 -07:00
parent 1ad4474b29
commit e9c19e427a
41 changed files with 454 additions and 485 deletions

View File

@@ -0,0 +1,28 @@
//
// AtomParser.swift
// RSParser
//
// Created by Brent Simmons on 6/25/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
import Foundation
// RSSParser wraps the Objective-C RSAtomParser.
//
// The Objective-C parser creates RSParsedFeed, RSParsedArticle, etc.
// This wrapper then creates ParsedFeed, ParsedItem, etc. so that it creates
// the same things that JSONFeedParser and RSSInJSONParser create.
//
// In general, you should see FeedParser.swift for all your feed-parsing needs.
public struct AtomParser {
public static func parse(_ parserData: ParserData) -> ParsedFeed? {
if let rsParsedFeed = RSAtomParser.parseFeed(with: parserData) {
return RSParsedFeedTransformer.parsedFeed(rsParsedFeed)
}
return nil
}
}

View File

@@ -6,8 +6,13 @@
// Copyright (c) 2015 Ranchero Software LLC. All rights reserved.
//
#import "FeedParser.h"
@import Foundation;
@interface RSAtomParser : NSObject <FeedParser>
@class ParserData;
@class RSParsedFeed;
@interface RSAtomParser : NSObject
+ (RSParsedFeed *)parseFeedWithData:(ParserData *)parserData;
@end

View File

@@ -9,12 +9,11 @@
#import <libxml/xmlstring.h>
#import "RSAtomParser.h"
#import "RSSAXParser.h"
#import "FeedParser.h"
#import "RSParsedFeed.h"
#import "RSParsedArticle.h"
#import "RSXMLData.h"
#import "NSString+RSXML.h"
#import "NSString+RSParser.h"
#import "RSDateParser.h"
#import <RSParser/RSParser-Swift.h>
@interface RSAtomParser () <RSSAXParserDelegate>
@@ -44,57 +43,24 @@
#pragma mark - Class Methods
+ (BOOL)canParseFeed:(RSXMLData *)xmlData {
+ (RSParsedFeed *)parseFeedWithData:(ParserData *)parserData {
// Checking for '<feed' and '<entry' within first n characters should do it.
@autoreleasepool {
NSData *feedData = xmlData.data;
NSString *s = [[NSString alloc] initWithBytesNoCopy:(void *)feedData.bytes length:feedData.length encoding:NSUTF8StringEncoding freeWhenDone:NO];
if (!s) {
s = [[NSString alloc] initWithData:feedData encoding:NSUTF8StringEncoding];
}
if (!s) {
s = [[NSString alloc] initWithData:feedData encoding:NSUnicodeStringEncoding];
}
if (!s) {
return NO;
}
static const NSInteger numberOfCharactersToSearch = 4096;
NSRange rangeToSearch = NSMakeRange(0, numberOfCharactersToSearch);
if (s.length < numberOfCharactersToSearch) {
rangeToSearch.length = s.length;
}
NSRange feedRange = [s rangeOfString:@"<feed" options:NSLiteralSearch range:rangeToSearch];
NSRange entryRange = [s rangeOfString:@"<entry" options:NSLiteralSearch range:rangeToSearch];
if (feedRange.length < 1 || entryRange.length < 1) {
return NO;
}
if (feedRange.location > entryRange.location) {
return NO; // Wrong order.
}
}
return YES;
RSAtomParser *parser = [[[self class] alloc] initWithParserData:parserData];
return [parser parseFeed];
}
#pragma mark - Init
- (instancetype)initWithXMLData:(RSXMLData *)xmlData {
- (instancetype)initWithParserData:(ParserData *)parserData {
self = [super init];
if (!self) {
return nil;
}
_feedData = xmlData.data;
_urlString = xmlData.urlString;
_feedData = parserData.data;
_urlString = parserData.url;
_parser = [[RSSAXParser alloc] initWithDelegate:self];
_attributesStack = [NSMutableArray new];
_articles = [NSMutableArray new];
@@ -105,7 +71,7 @@
#pragma mark - API
- (RSParsedFeed *)parseFeed:(NSError **)error {
- (RSParsedFeed *)parseFeed {
[self parse];
@@ -315,7 +281,7 @@ static const NSInteger kSelfLength = 5;
- (NSString *)currentStringWithHTMLEntitiesDecoded {
return [self.parser.currentStringWithTrimmedWhitespace rs_stringByDecodingHTMLEntities];
return [self.parser.currentStringWithTrimmedWhitespace rsparser_stringByDecodingHTMLEntities];
}

View File

@@ -1,28 +0,0 @@
//
// RSFeedParser.h
// RSParser
//
// Created by Brent Simmons on 1/4/15.
// Copyright (c) 2015 Ranchero Software LLC. All rights reserved.
//
#import "FeedParser.h"
// If you have a feed and dont know or care what it is (RSS or Atom),
// then call RSParseFeed or RSParseFeedSync.
@class RSXMLData;
@class RSParsedFeed;
NS_ASSUME_NONNULL_BEGIN
BOOL RSCanParseFeed(RSXMLData *xmlData);
typedef void (^RSParsedFeedBlock)(RSParsedFeed * _Nullable parsedFeed, NSError * _Nullable error);
// callback is called on main queue.
void RSParseFeed(RSXMLData *xmlData, RSParsedFeedBlock callback);
RSParsedFeed * _Nullable RSParseFeedSync(RSXMLData *xmlData, NSError * _Nullable * _Nullable error);
NS_ASSUME_NONNULL_END

View File

@@ -1,216 +0,0 @@
//
// FeedParser.m
// RSParser
//
// Created by Brent Simmons on 1/4/15.
// Copyright (c) 2015 Ranchero Software LLC. All rights reserved.
//
#import "RSFeedParser.h"
#import "FeedParser.h"
#import "RSXMLData.h"
#import "RSRSSParser.h"
#import "RSAtomParser.h"
static NSArray *parserClasses(void) {
static NSArray *gParserClasses = nil;
static dispatch_once_t onceToken;
dispatch_once(&onceToken, ^{
gParserClasses = @[[RSRSSParser class], [RSAtomParser class]];
});
return gParserClasses;
}
static BOOL feedMayBeParseable(RSXMLData *xmlData) {
/*Sanity checks.*/
if (!xmlData.data) {
return NO;
}
/*TODO: check size, type, etc.*/
return YES;
}
static BOOL optimisticCanParseRSSData(const char *bytes, NSUInteger numberOfBytes);
static BOOL optimisticCanParseAtomData(const char *bytes, NSUInteger numberOfBytes);
static BOOL optimisticCanParseRDF(const char *bytes, NSUInteger numberOfBytes);
static BOOL dataIsProbablyHTML(const char *bytes, NSUInteger numberOfBytes);
static BOOL dataIsSomeWeirdException(const char *bytes, NSUInteger numberOfBytes);
static BOOL dataHasLeftCaret(const char *bytes, NSUInteger numberOfBytes);
static const NSUInteger maxNumberOfBytesToSearch = 4096;
static const NSUInteger minNumberOfBytesToSearch = 20;
static Class parserClassForXMLData(RSXMLData *xmlData) {
if (!feedMayBeParseable(xmlData)) {
return nil;
}
// TODO: check for things like images and movies and return nil.
const char *bytes = xmlData.data.bytes;
NSUInteger numberOfBytes = xmlData.data.length;
if (numberOfBytes > minNumberOfBytesToSearch) {
if (numberOfBytes > maxNumberOfBytesToSearch) {
numberOfBytes = maxNumberOfBytesToSearch;
}
if (!dataHasLeftCaret(bytes, numberOfBytes)) {
return nil;
}
if (optimisticCanParseRSSData(bytes, numberOfBytes)) {
return [RSRSSParser class];
}
if (optimisticCanParseAtomData(bytes, numberOfBytes)) {
return [RSAtomParser class];
}
if (optimisticCanParseRDF(bytes, numberOfBytes)) {
return nil; //TODO: parse RDF feeds
}
if (dataIsProbablyHTML(bytes, numberOfBytes)) {
return nil;
}
if (dataIsSomeWeirdException(bytes, numberOfBytes)) {
return nil;
}
}
for (Class parserClass in parserClasses()) {
if ([parserClass canParseFeed:xmlData]) {
return [[parserClass alloc] initWithXMLData:xmlData];
}
}
return nil;
}
static id<FeedParser> parserForXMLData(RSXMLData *xmlData) {
Class parserClass = parserClassForXMLData(xmlData);
if (!parserClass) {
return nil;
}
return [[parserClass alloc] initWithXMLData:xmlData];
}
static BOOL canParseXMLData(RSXMLData *xmlData) {
return parserClassForXMLData(xmlData) != nil;
}
static BOOL didFindString(const char *string, const char *bytes, NSUInteger numberOfBytes) {
char *foundString = strnstr(bytes, string, numberOfBytes);
return foundString != NULL;
}
static BOOL dataHasLeftCaret(const char *bytes, NSUInteger numberOfBytes) {
return didFindString("<", bytes, numberOfBytes);
}
static BOOL dataIsProbablyHTML(const char *bytes, NSUInteger numberOfBytes) {
// Wont catch every single case, which is fine.
if (didFindString("<html", bytes, numberOfBytes)) {
return YES;
}
if (didFindString("<body", bytes, numberOfBytes)) {
return YES;
}
if (didFindString("doctype html", bytes, numberOfBytes)) {
return YES;
}
if (didFindString("DOCTYPE html", bytes, numberOfBytes)) {
return YES;
}
if (didFindString("DOCTYPE HTML", bytes, numberOfBytes)) {
return YES;
}
if (didFindString("<meta", bytes, numberOfBytes)) {
return YES;
}
if (didFindString("<HTML", bytes, numberOfBytes)) {
return YES;
}
return NO;
}
static BOOL dataIsSomeWeirdException(const char *bytes, NSUInteger numberOfBytes) {
if (didFindString("<errors xmlns='http://schemas.google", bytes, numberOfBytes)) {
return YES;
}
return NO;
}
static BOOL optimisticCanParseRDF(const char *bytes, NSUInteger numberOfBytes) {
return didFindString("<rdf:RDF", bytes, numberOfBytes);
}
static BOOL optimisticCanParseRSSData(const char *bytes, NSUInteger numberOfBytes) {
if (!didFindString("<rss", bytes, numberOfBytes)) {
return NO;
}
return didFindString("<channel", bytes, numberOfBytes);
}
static BOOL optimisticCanParseAtomData(const char *bytes, NSUInteger numberOfBytes) {
return didFindString("<feed", bytes, numberOfBytes);
}
static void callCallback(RSParsedFeedBlock callback, RSParsedFeed *parsedFeed, NSError *error) {
dispatch_async(dispatch_get_main_queue(), ^{
@autoreleasepool {
if (callback) {
callback(parsedFeed, error);
}
}
});
}
#pragma mark - API
BOOL RSCanParseFeed(RSXMLData *xmlData) {
return canParseXMLData(xmlData);
}
void RSParseFeed(RSXMLData *xmlData, RSParsedFeedBlock callback) {
dispatch_async(dispatch_get_global_queue(QOS_CLASS_UTILITY, 0), ^{
NSError *error = nil;
RSParsedFeed *parsedFeed = RSParseFeedSync(xmlData, &error);
callCallback(callback, parsedFeed, error);
});
}
RSParsedFeed *RSParseFeedSync(RSXMLData *xmlData, NSError **error) {
id<FeedParser> parser = parserForXMLData(xmlData);
return [parser parseFeed:error];
}

View File

@@ -1,6 +1,6 @@
//
// RSParsedArticle.h
// RSXML
// RSParser
//
// Created by Brent Simmons on 12/6/14.
// Copyright (c) 2014 Ranchero Software LLC. All rights reserved.

View File

@@ -1,13 +1,14 @@
//
// RSParsedArticle.m
// RSXML
// RSParser
//
// Created by Brent Simmons on 12/6/14.
// Copyright (c) 2014 Ranchero Software LLC. All rights reserved.
//
#import "RSParsedArticle.h"
#import "RSXMLInternal.h"
#import "RSParserInternal.h"
#import "NSString+RSParser.h"
@implementation RSParsedArticle
@@ -89,7 +90,7 @@
NSAssert(!RSParserStringIsEmpty(self.feedURL), nil);
[s appendString:self.feedURL];
return [s rsxml_md5HashString];
return [s rsparser_md5Hash];
}
- (void)calculateArticleID {

View File

@@ -1,6 +1,6 @@
//
// RSParsedFeed.h
// RSXML
// RSParser
//
// Created by Brent Simmons on 7/12/15.
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.

View File

@@ -1,6 +1,6 @@
//
// RSParsedFeed.m
// RSXML
// RSParser
//
// Created by Brent Simmons on 7/12/15.
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.

View File

@@ -0,0 +1,60 @@
//
// RSParsedFeedTransformer.swift
// RSParser
//
// Created by Brent Simmons on 6/25/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
import Foundation
// RSRSSParser and RSAtomParser were written in Objective-C quite a while ago.
// They create an RSParsedFeed object and related Objective-C objects.
// These functions take an RSParsedFeed and return a Swift-y ParsedFeed,
// which is part of providing a single API for feed parsing.
struct RSParsedFeedTransformer {
static func parsedFeed(_ rsParsedFeed: RSParsedFeed) -> ParsedFeed {
let items = parsedItems(rsParsedFeed.articles)
return ParsedFeed(type: .rss, title: rsParsedFeed.title, homePageURL: rsParsedFeed.link, feedURL: rsParsedFeed.urlString, feedDescription: nil, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items)
}
}
private extension RSParsedFeedTransformer {
static func parsedItems(_ parsedArticles: Set<RSParsedArticle>) -> [ParsedItem] {
// Create [ParsedItem] from set of RSParsedArticle.
var items = [ParsedItem]()
for oneParsedArticle in parsedArticles {
items += [parsedItem(oneParsedArticle)]
}
return items
}
static func parsedItem(_ parsedArticle: RSParsedArticle) -> ParsedItem {
let uniqueID = parsedArticle.articleID
let url = parsedArticle.permalink
let externalURL = parsedArticle.link
let title = parsedArticle.title
let contentHTML = parsedArticle.body
let datePublished = parsedArticle.datePublished
let dateModified = parsedArticle.dateModified
let authors = parsedAuthors(parsedArticle.author)
return ParsedItem(uniqueID: uniqueID, url: url, externalURL: externalURL, title: title, contentHTML: contentHTML, contentText: nil, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: dateModified, authors: authors, tags: nil, attachments: nil)
}
static func parsedAuthors(_ authorEmailAddress: String?) -> [ParsedAuthor]? {
guard let authorEmailAddress = authorEmailAddress else {
return nil
}
let author = ParsedAuthor(name: nil, url: nil, avatarURL: nil, emailAddress: authorEmailAddress)
return [author]
}
}

View File

@@ -6,8 +6,14 @@
// Copyright (c) 2015 Ranchero Software LLC. All rights reserved.
//
#import "FeedParser.h"
@import Foundation;
@class ParserData;
@class RSParsedFeed;
@interface RSRSSParser : NSObject
+ (RSParsedFeed *)parseFeedWithData:(ParserData *)parserData;
@interface RSRSSParser : NSObject <FeedParser>
@end

View File

@@ -11,10 +11,10 @@
#import "RSSAXParser.h"
#import "RSParsedFeed.h"
#import "RSParsedArticle.h"
#import "RSXMLData.h"
#import "RSXMLInternal.h"
#import "NSString+RSXML.h"
#import "RSParserInternal.h"
#import "NSString+RSParser.h"
#import "RSDateParser.h"
#import <RSParser/RSParser-Swift.h>
@interface RSRSSParser () <RSSAXParserDelegate>
@@ -40,57 +40,23 @@
#pragma mark - Class Methods
+ (BOOL)canParseFeed:(RSXMLData *)xmlData {
+ (RSParsedFeed *)parseFeedWithData:(ParserData *)parserData {
// Checking for '<rss' and '<channel>' within first n characters should do it.
// TODO: handle RSS 1.0
@autoreleasepool {
NSData *feedData = xmlData.data;
NSString *s = [[NSString alloc] initWithBytesNoCopy:(void *)feedData.bytes length:feedData.length encoding:NSUTF8StringEncoding freeWhenDone:NO];
if (!s) {
s = [[NSString alloc] initWithData:feedData encoding:NSUTF8StringEncoding];
}
if (!s) {
s = [[NSString alloc] initWithData:feedData encoding:NSUnicodeStringEncoding];
}
if (!s) {
return NO;
}
static const NSInteger numberOfCharactersToSearch = 4096;
NSRange rangeToSearch = NSMakeRange(0, numberOfCharactersToSearch);
if (s.length < numberOfCharactersToSearch) {
rangeToSearch.length = s.length;
}
NSRange rssRange = [s rangeOfString:@"<rss" options:NSLiteralSearch range:rangeToSearch];
NSRange channelRange = [s rangeOfString:@"<channel>" options:NSLiteralSearch range:rangeToSearch];
if (rssRange.length < 1 || channelRange.length < 1) {
return NO;
}
if (rssRange.location > channelRange.location) {
return NO; // Wrong order.
}
}
return YES;
RSRSSParser *parser = [[[self class] alloc] initWithParserData:parserData];
return [parser parseFeed];
}
#pragma mark - Init
- (instancetype)initWithXMLData:(RSXMLData *)xmlData {
- (instancetype)initWithParserData:(ParserData *)parserData {
self = [super init];
if (!self) {
return nil;
}
_feedData = xmlData.data;
_urlString = xmlData.urlString;
_feedData = parserData.data;
_urlString = parserData.url;
_parser = [[RSSAXParser alloc] initWithDelegate:self];
_articles = [NSMutableArray new];
@@ -100,7 +66,7 @@
#pragma mark - API
- (RSParsedFeed *)parseFeed:(NSError **)error {
- (RSParsedFeed *)parseFeed {
[self parse];
@@ -292,7 +258,7 @@ static const NSInteger kTrueLength = 5;
- (NSString *)currentStringWithHTMLEntitiesDecoded {
return [self.parser.currentStringWithTrimmedWhitespace rs_stringByDecodingHTMLEntities];
return [self.parser.currentStringWithTrimmedWhitespace rsparser_stringByDecodingHTMLEntities];
}
- (void)addArticleElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix {

View File

@@ -0,0 +1,28 @@
//
// RSSParser.swift
// RSParser
//
// Created by Brent Simmons on 6/25/17.
// Copyright © 2017 Ranchero Software, LLC. All rights reserved.
//
import Foundation
// RSSParser wraps the Objective-C RSRSSParser.
//
// The Objective-C parser creates RSParsedFeed, RSParsedArticle, etc.
// This wrapper then creates ParsedFeed, ParsedItem, etc. so that it creates
// the same things that JSONFeedParser and RSSInJSONParser create.
//
// In general, you should see FeedParser.swift for all your feed-parsing needs.
public struct RSSParser {
public static func parse(_ parserData: ParserData) -> ParsedFeed? {
if let rsParsedFeed = RSRSSParser.parseFeed(with: parserData) {
return RSParsedFeedTransformer.parsedFeed(rsParsedFeed)
}
return nil
}
}