diff --git a/Modules/Account/Package.swift b/Modules/Account/Package.swift index 48ee3e33e..539cb3ea5 100644 --- a/Modules/Account/Package.swift +++ b/Modules/Account/Package.swift @@ -12,7 +12,6 @@ let package = Package( ], dependencies: [ .package(path: "../Parser"), - .package(path: "../ParserObjC"), .package(path: "../Articles"), .package(path: "../ArticlesDatabase"), .package(path: "../Web"), @@ -35,7 +34,6 @@ let package = Package( name: "Account", dependencies: [ "Parser", - "ParserObjC", "Web", "Articles", "ArticlesDatabase", diff --git a/Modules/Account/Sources/Account/Account.swift b/Modules/Account/Sources/Account/Account.swift index 278fe5cfe..efc42748b 100644 --- a/Modules/Account/Sources/Account/Account.swift +++ b/Modules/Account/Sources/Account/Account.swift @@ -13,7 +13,6 @@ import UIKit import Foundation import Articles import Parser -import ParserObjC import Database import ArticlesDatabase import Web diff --git a/Modules/Account/Sources/Account/AccountDelegates/CloudKitAccountDelegate.swift b/Modules/Account/Sources/Account/AccountDelegates/CloudKitAccountDelegate.swift index 3e83b9da9..544a30e93 100644 --- a/Modules/Account/Sources/Account/AccountDelegates/CloudKitAccountDelegate.swift +++ b/Modules/Account/Sources/Account/AccountDelegates/CloudKitAccountDelegate.swift @@ -12,7 +12,6 @@ import SystemConfiguration import os.log import SyncDatabase import Parser -import ParserObjC import Articles import ArticlesDatabase import Web diff --git a/Modules/Account/Sources/Account/AccountDelegates/LocalAccountDelegate.swift b/Modules/Account/Sources/Account/AccountDelegates/LocalAccountDelegate.swift index 01ec34b3c..00a9b05f0 100644 --- a/Modules/Account/Sources/Account/AccountDelegates/LocalAccountDelegate.swift +++ b/Modules/Account/Sources/Account/AccountDelegates/LocalAccountDelegate.swift @@ -9,7 +9,6 @@ import Foundation import os.log import Parser -import ParserObjC import Articles import ArticlesDatabase import Web diff --git a/Modules/Account/Sources/Account/OPMLFile.swift b/Modules/Account/Sources/Account/OPMLFile.swift index c44b15d69..a8fe0593f 100644 --- a/Modules/Account/Sources/Account/OPMLFile.swift +++ b/Modules/Account/Sources/Account/OPMLFile.swift @@ -9,7 +9,6 @@ import Foundation import os import Parser -import ParserObjC import Core @MainActor final class OPMLFile { diff --git a/Modules/Account/Sources/Account/OPMLNormalizer.swift b/Modules/Account/Sources/Account/OPMLNormalizer.swift index bfad43687..ab6111f89 100644 --- a/Modules/Account/Sources/Account/OPMLNormalizer.swift +++ b/Modules/Account/Sources/Account/OPMLNormalizer.swift @@ -8,7 +8,6 @@ import Foundation import Parser -import ParserObjC final class OPMLNormalizer { diff --git a/Modules/CloudKitSync/Sources/CloudKitSync/CloudKitAccountZone.swift b/Modules/CloudKitSync/Sources/CloudKitSync/CloudKitAccountZone.swift index 8f3b566a5..e4a18cc51 100644 --- a/Modules/CloudKitSync/Sources/CloudKitSync/CloudKitAccountZone.swift +++ b/Modules/CloudKitSync/Sources/CloudKitSync/CloudKitAccountZone.swift @@ -10,7 +10,6 @@ import Foundation import os.log import Web import Parser -import ParserObjC import CloudKit import FoundationExtras diff --git a/Modules/CloudKitSync/Sources/CloudKitSync/CloudKitArticlesZone.swift b/Modules/CloudKitSync/Sources/CloudKitSync/CloudKitArticlesZone.swift index e0c98cccd..c5aba8fa4 100644 --- a/Modules/CloudKitSync/Sources/CloudKitSync/CloudKitArticlesZone.swift +++ b/Modules/CloudKitSync/Sources/CloudKitSync/CloudKitArticlesZone.swift @@ -9,7 +9,6 @@ import Foundation import os.log import Parser -import ParserObjC import Web import CloudKit import Articles diff --git a/Modules/FeedFinder/Sources/FeedFinder/FeedFinder.swift b/Modules/FeedFinder/Sources/FeedFinder/FeedFinder.swift index 738f7b465..d2dd7cfc1 100644 --- a/Modules/FeedFinder/Sources/FeedFinder/FeedFinder.swift +++ b/Modules/FeedFinder/Sources/FeedFinder/FeedFinder.swift @@ -8,7 +8,6 @@ import Foundation import Parser -import ParserObjC import Web import CommonErrors import os.log diff --git a/Modules/FeedFinder/Sources/FeedFinder/HTMLFeedFinder.swift b/Modules/FeedFinder/Sources/FeedFinder/HTMLFeedFinder.swift index f755dbc20..805bc047b 100644 --- a/Modules/FeedFinder/Sources/FeedFinder/HTMLFeedFinder.swift +++ b/Modules/FeedFinder/Sources/FeedFinder/HTMLFeedFinder.swift @@ -9,7 +9,6 @@ import Foundation import FoundationExtras import Parser -import ParserObjC private let feedURLWordsToMatch = ["feed", "xml", "rss", "atom", "json"] diff --git a/Modules/Feedbin/Sources/Feedbin/FeedbinEntry.swift b/Modules/Feedbin/Sources/Feedbin/FeedbinEntry.swift index 809ea799c..03e90215a 100644 --- a/Modules/Feedbin/Sources/Feedbin/FeedbinEntry.swift +++ b/Modules/Feedbin/Sources/Feedbin/FeedbinEntry.swift @@ -8,7 +8,6 @@ import Foundation import Parser -import ParserObjC public final class FeedbinEntry: Decodable, @unchecked Sendable { diff --git a/Modules/Images/Sources/Images/Favicons/FaviconDownloader.swift b/Modules/Images/Sources/Images/Favicons/FaviconDownloader.swift index 31670d7df..4055b7b73 100644 --- a/Modules/Images/Sources/Images/Favicons/FaviconDownloader.swift +++ b/Modules/Images/Sources/Images/Favicons/FaviconDownloader.swift @@ -12,7 +12,6 @@ import Articles import Account import UniformTypeIdentifiers import Core -import ParserObjC public extension Notification.Name { static let FaviconDidBecomeAvailable = Notification.Name("FaviconDidBecomeAvailableNotification") // userInfo key: FaviconDownloader.UserInfoKey.faviconURL diff --git a/Modules/Images/Sources/Images/Favicons/FaviconURLFinder.swift b/Modules/Images/Sources/Images/Favicons/FaviconURLFinder.swift index b3e99fa13..f959ff3b1 100644 --- a/Modules/Images/Sources/Images/Favicons/FaviconURLFinder.swift +++ b/Modules/Images/Sources/Images/Favicons/FaviconURLFinder.swift @@ -9,7 +9,6 @@ import Foundation import CoreServices import Parser -import ParserObjC import UniformTypeIdentifiers // The favicon URLs may be specified in the head section of the home page. diff --git a/Modules/Images/Sources/Images/FeedIconDownloader.swift b/Modules/Images/Sources/Images/FeedIconDownloader.swift index 35066ff42..55a66363c 100644 --- a/Modules/Images/Sources/Images/FeedIconDownloader.swift +++ b/Modules/Images/Sources/Images/FeedIconDownloader.swift @@ -11,7 +11,6 @@ import Articles import Account import Web import Parser -import ParserObjC import Core public extension Notification.Name { diff --git a/Modules/Images/Sources/Images/RSHTMLMetadata+Extension.swift b/Modules/Images/Sources/Images/RSHTMLMetadata+Extension.swift index d421d7f33..fd5da45d7 100644 --- a/Modules/Images/Sources/Images/RSHTMLMetadata+Extension.swift +++ b/Modules/Images/Sources/Images/RSHTMLMetadata+Extension.swift @@ -8,7 +8,6 @@ import Foundation import Parser -import ParserObjC extension RSHTMLMetadata { diff --git a/Modules/LocalAccount/Sources/LocalAccount/InitialFeedDownloader.swift b/Modules/LocalAccount/Sources/LocalAccount/InitialFeedDownloader.swift index e3a408108..142833b02 100644 --- a/Modules/LocalAccount/Sources/LocalAccount/InitialFeedDownloader.swift +++ b/Modules/LocalAccount/Sources/LocalAccount/InitialFeedDownloader.swift @@ -8,7 +8,6 @@ import Foundation import Parser -import ParserObjC import Web public struct InitialFeedDownloader { diff --git a/Modules/ParserObjC/.gitignore b/Modules/ParserObjC/.gitignore deleted file mode 100644 index 0023a5340..000000000 --- a/Modules/ParserObjC/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -.DS_Store -/.build -/Packages -xcuserdata/ -DerivedData/ -.swiftpm/configuration/registries.json -.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata -.netrc diff --git a/Modules/ParserObjC/Package.swift b/Modules/ParserObjC/Package.swift deleted file mode 100644 index 69281ca1d..000000000 --- a/Modules/ParserObjC/Package.swift +++ /dev/null @@ -1,26 +0,0 @@ -// swift-tools-version: 5.10 -// The swift-tools-version declares the minimum version of Swift required to build this package. - -import PackageDescription - -let package = Package( - name: "ParserObjC", - platforms: [.macOS(.v14), .iOS(.v17)], - products: [ - // Products define the executables and libraries a package produces, making them visible to other packages. - .library( - name: "ParserObjC", - type: .dynamic, - targets: ["ParserObjC"]), - ], - targets: [ - // Targets are the basic building blocks of a package, defining a module or a test suite. - // Targets can depend on other targets in this package and products from dependencies. - .target( - name: "ParserObjC", - cSettings: [ - .headerSearchPath("include") - ] - ), - ] -) diff --git a/Modules/ParserObjC/Sources/ParserObjC/NSData+RSParser.h b/Modules/ParserObjC/Sources/ParserObjC/NSData+RSParser.h deleted file mode 100644 index be2d892f8..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/NSData+RSParser.h +++ /dev/null @@ -1,26 +0,0 @@ -// -// NSData+RSParser.h -// RSParser -// -// Created by Brent Simmons on 6/24/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - - -@interface NSData (RSParser) - -- (BOOL)isProbablyHTML; -- (BOOL)isProbablyXML; -- (BOOL)isProbablyJSON; - -- (BOOL)isProbablyJSONFeed; -- (BOOL)isProbablyRSSInJSON; -- (BOOL)isProbablyRSS; -- (BOOL)isProbablyAtom; - -@end - - - diff --git a/Modules/ParserObjC/Sources/ParserObjC/NSData+RSParser.m b/Modules/ParserObjC/Sources/ParserObjC/NSData+RSParser.m deleted file mode 100644 index 8ac9aa167..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/NSData+RSParser.m +++ /dev/null @@ -1,139 +0,0 @@ -// -// NSData+RSParser.m -// RSParser -// -// Created by Brent Simmons on 6/24/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -// - -#import "NSData+RSParser.h" - - - - -/* TODO: find real-world cases where the isProbably* cases fail when they should succeed, and add them to tests.*/ - -static BOOL bytesAreProbablyHTML(const char *bytes, NSUInteger numberOfBytes); -static BOOL bytesAreProbablyXML(const char *bytes, NSUInteger numberOfBytes); -static BOOL bytesStartWithStringIgnoringWhitespace(const char *string, const char *bytes, NSUInteger numberOfBytes); -static BOOL didFindString(const char *string, const char *bytes, NSUInteger numberOfBytes); -static BOOL bytesStartWithRSS(const char *bytes, NSUInteger numberOfBytes); -static BOOL bytesStartWithRDF(const char *bytes, NSUInteger numberOfBytes); -static BOOL bytesStartWithAtom(const char *bytes, NSUInteger numberOfBytes); - -@implementation NSData (RSParser) - -- (BOOL)isProbablyHTML { - - return bytesAreProbablyHTML(self.bytes, self.length); -} - -- (BOOL)isProbablyXML { - - return bytesAreProbablyXML(self.bytes, self.length); -} - -- (BOOL)isProbablyJSON { - - return bytesStartWithStringIgnoringWhitespace("{", self.bytes, self.length); -} - -- (BOOL)isProbablyJSONFeed { - - if (![self isProbablyJSON]) { - return NO; - } - return didFindString("://jsonfeed.org/version/", self.bytes, self.length) || didFindString(":\\/\\/jsonfeed.org\\/version\\/", self.bytes, self.length); -} - -- (BOOL)isProbablyRSSInJSON { - - if (![self isProbablyJSON]) { - return NO; - } - const char *bytes = self.bytes; - NSUInteger length = self.length; - return didFindString("rss", bytes, length) && didFindString("channel", bytes, length) && didFindString("item", bytes, length); -} - -- (BOOL)isProbablyRSS { - - if (didFindString(" tag, but it should be parsed anyway. It does have some other distinct RSS markers we can find. - return (didFindString("", self.bytes, self.length) && didFindString("", self.bytes, self.length)); -} - -- (BOOL)isProbablyAtom { - - return didFindString(", and & entity-encoded. -@property (readonly, copy) NSString *rsparser_stringByEncodingRequiredEntities; - -- (NSString *)rsparser_md5Hash; - -- (BOOL)rsparser_contains:(NSString *)s; - -@end - -NS_ASSUME_NONNULL_END diff --git a/Modules/ParserObjC/Sources/ParserObjC/NSString+RSParser.m b/Modules/ParserObjC/Sources/ParserObjC/NSString+RSParser.m deleted file mode 100755 index 8a4e7d114..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/NSString+RSParser.m +++ /dev/null @@ -1,348 +0,0 @@ -// -// NSString+RSParser.m -// RSParser -// -// Created by Brent Simmons on 9/25/15. -// Copyright © 2015 Ranchero Software, LLC. All rights reserved. -// - -#import "NSString+RSParser.h" -#import - - - - -@interface NSScanner (RSParser) - -- (BOOL)rs_scanEntityValue:(NSString * _Nullable * _Nullable)decodedEntity; - -@end - - -@implementation NSString (RSParser) - -- (BOOL)rsparser_contains:(NSString *)s { - - return [self rangeOfString:s].location != NSNotFound; -} - -- (NSString *)rsparser_stringByDecodingHTMLEntities { - - @autoreleasepool { - - NSScanner *scanner = [[NSScanner alloc] initWithString:self]; - scanner.charactersToBeSkipped = nil; - NSMutableString *result = [[NSMutableString alloc] init]; - - while (true) { - - NSString *scannedString = nil; - if ([scanner scanUpToString:@"&" intoString:&scannedString]) { - [result appendString:scannedString]; - } - if (scanner.isAtEnd) { - break; - } - NSUInteger savedScanLocation = scanner.scanLocation; - - NSString *decodedEntity = nil; - if ([scanner rs_scanEntityValue:&decodedEntity]) { - [result appendString:decodedEntity]; - } - else { - [result appendString:@"&"]; - scanner.scanLocation = savedScanLocation + 1; - } - - if (scanner.isAtEnd) { - break; - } - } - - if ([self isEqualToString:result]) { - return self; - } - return [result copy]; - } -} - - -static NSDictionary *RSEntitiesDictionary(void); -static NSString *RSParserStringWithValue(uint32_t value); - -- (NSString * _Nullable)rs_stringByDecodingEntity { - - // self may or may not have outer & and ; characters. - - NSMutableString *s = [self mutableCopy]; - - if ([s hasPrefix:@"&"]) { - [s deleteCharactersInRange:NSMakeRange(0, 1)]; - } - if ([s hasSuffix:@";"]) { - [s deleteCharactersInRange:NSMakeRange(s.length - 1, 1)]; - } - - NSDictionary *entitiesDictionary = RSEntitiesDictionary(); - - NSString *decodedEntity = entitiesDictionary[self]; - if (decodedEntity) { - return decodedEntity; - } - - if ([s hasPrefix:@"#x"] || [s hasPrefix:@"#X"]) { // Hex - NSScanner *scanner = [[NSScanner alloc] initWithString:s]; - scanner.charactersToBeSkipped = [NSCharacterSet characterSetWithCharactersInString:@"#xX"]; - unsigned int hexValue = 0; - if ([scanner scanHexInt:&hexValue]) { - return RSParserStringWithValue((uint32_t)hexValue); - } - return nil; - } - - else if ([s hasPrefix:@"#"]) { - [s deleteCharactersInRange:NSMakeRange(0, 1)]; - NSInteger value = s.integerValue; - if (value < 1) { - return nil; - } - return RSParserStringWithValue((uint32_t)value); - } - - return nil; -} - -- (NSString *)rsparser_stringByEncodingRequiredEntities { - NSMutableString *result = [NSMutableString string]; - - for (NSUInteger i = 0; i < self.length; ++i) { - unichar c = [self characterAtIndex:i]; - - switch (c) { - case '<': - [result appendString:@"<"]; - break; - case '>': - [result appendString:@">"]; - break; - case '&': - [result appendString:@"&"]; - break; - default: - [result appendFormat:@"%C", c]; - break; - } - } - - return [result copy]; -} - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -- (NSData *)_rsparser_md5HashData { - - NSData *data = [self dataUsingEncoding:NSUTF8StringEncoding]; - unsigned char hash[CC_MD5_DIGEST_LENGTH]; - CC_MD5(data.bytes, (CC_LONG)data.length, hash); - - return [NSData dataWithBytes:(const void *)hash length:CC_MD5_DIGEST_LENGTH]; -} -#pragma GCC diagnostic pop - -- (NSString *)rsparser_md5Hash { - - NSData *md5Data = [self _rsparser_md5HashData]; - const Byte *bytes = md5Data.bytes; - return [NSString stringWithFormat:@"%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x", bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], bytes[8], bytes[9], bytes[10], bytes[11], bytes[12], bytes[13], bytes[14], bytes[15]]; -} - - -@end - -@implementation NSScanner (RSParser) - -- (BOOL)rs_scanEntityValue:(NSString * _Nullable * _Nullable)decodedEntity { - - NSString *s = self.string; - NSUInteger initialScanLocation = self.scanLocation; - static NSUInteger maxEntityLength = 20; // It’s probably smaller, but this is just for sanity. - - while (true) { - - unichar ch = [s characterAtIndex:self.scanLocation]; - if ([NSCharacterSet.whitespaceAndNewlineCharacterSet characterIsMember:ch]) { - break; - } - if (ch == ';') { - if (!decodedEntity) { - return YES; - } - NSString *rawEntity = [s substringWithRange:NSMakeRange(initialScanLocation + 1, (self.scanLocation - initialScanLocation) - 1)]; - *decodedEntity = [rawEntity rs_stringByDecodingEntity]; - self.scanLocation = self.scanLocation + 1; - return *decodedEntity != nil; - } - - self.scanLocation = self.scanLocation + 1; - if (self.scanLocation - initialScanLocation > maxEntityLength) { - break; - } - if (self.isAtEnd) { - break; - } - } - - return NO; -} - -@end - -static NSString *RSParserStringWithValue(uint32_t value) { - // From WebCore's HTMLEntityParser - static const uint32_t windowsLatin1ExtensionArray[32] = { - 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87 - 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F - 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97 - 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178 // 98-9F - }; - - if ((value & ~0x1Fu) == 0x80u) { // value >= 128 && value < 160 - value = windowsLatin1ExtensionArray[value - 0x80]; - } - - value = CFSwapInt32HostToLittle(value); - - return [[NSString alloc] initWithBytes:&value length:sizeof(value) encoding:NSUTF32LittleEndianStringEncoding]; -} - -static NSDictionary *RSEntitiesDictionary(void) { - - static NSDictionary *entitiesDictionary = nil; - - static dispatch_once_t onceToken; - dispatch_once(&onceToken, ^{ - - entitiesDictionary = @{ - // Named entities - @"AElig": @"Æ", - @"Aacute": @"Á", - @"Acirc": @"Â", - @"Agrave": @"À", - @"Aring": @"Å", - @"Atilde": @"Ã", - @"Auml": @"Ä", - @"Ccedil": @"Ç", - @"Dstrok": @"Ð", - @"ETH": @"Ð", - @"Eacute": @"É", - @"Ecirc": @"Ê", - @"Egrave": @"È", - @"Euml": @"Ë", - @"Iacute": @"Í", - @"Icirc": @"Î", - @"Igrave": @"Ì", - @"Iuml": @"Ï", - @"Ntilde": @"Ñ", - @"Oacute": @"Ó", - @"Ocirc": @"Ô", - @"Ograve": @"Ò", - @"Oslash": @"Ø", - @"Otilde": @"Õ", - @"Ouml": @"Ö", - @"Pi": @"Π", - @"THORN": @"Þ", - @"Uacute": @"Ú", - @"Ucirc": @"Û", - @"Ugrave": @"Ù", - @"Uuml": @"Ü", - @"Yacute": @"Y", - @"aacute": @"á", - @"acirc": @"â", - @"acute": @"´", - @"aelig": @"æ", - @"agrave": @"à", - @"amp": @"&", - @"apos": @"'", - @"aring": @"å", - @"atilde": @"ã", - @"auml": @"ä", - @"brkbar": @"¦", - @"brvbar": @"¦", - @"ccedil": @"ç", - @"cedil": @"¸", - @"cent": @"¢", - @"copy": @"©", - @"curren": @"¤", - @"deg": @"°", - @"die": @"¨", - @"divide": @"÷", - @"eacute": @"é", - @"ecirc": @"ê", - @"egrave": @"è", - @"eth": @"ð", - @"euml": @"ë", - @"euro": @"€", - @"frac12": @"½", - @"frac14": @"¼", - @"frac34": @"¾", - @"gt": @">", - @"hearts": @"♥", - @"hellip": @"…", - @"iacute": @"í", - @"icirc": @"î", - @"iexcl": @"¡", - @"igrave": @"ì", - @"iquest": @"¿", - @"iuml": @"ï", - @"laquo": @"«", - @"ldquo": @"“", - @"lsquo": @"‘", - @"lt": @"<", - @"macr": @"¯", - @"mdash": @"—", - @"micro": @"µ", - @"middot": @"·", - @"ndash": @"–", - @"not": @"¬", - @"ntilde": @"ñ", - @"oacute": @"ó", - @"ocirc": @"ô", - @"ograve": @"ò", - @"ordf": @"ª", - @"ordm": @"º", - @"oslash": @"ø", - @"otilde": @"õ", - @"ouml": @"ö", - @"para": @"¶", - @"pi": @"π", - @"plusmn": @"±", - @"pound": @"£", - @"quot": @"\"", - @"raquo": @"»", - @"rdquo": @"”", - @"reg": @"®", - @"rsquo": @"’", - @"sect": @"§", - @"shy": RSParserStringWithValue(173), - @"sup1": @"¹", - @"sup2": @"²", - @"sup3": @"³", - @"szlig": @"ß", - @"thorn": @"þ", - @"times": @"×", - @"trade": @"™", - @"uacute": @"ú", - @"ucirc": @"û", - @"ugrave": @"ù", - @"uml": @"¨", - @"uuml": @"ü", - @"yacute": @"y", - @"yen": @"¥", - @"yuml": @"ÿ", - @"infin": @"∞", - @"nbsp": RSParserStringWithValue(160) - }; - }); - - return entitiesDictionary; -} diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSAtomParser.h b/Modules/ParserObjC/Sources/ParserObjC/RSAtomParser.h deleted file mode 100755 index 27b5d80e4..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSAtomParser.h +++ /dev/null @@ -1,18 +0,0 @@ -// -// RSAtomParser.h -// RSParser -// -// Created by Brent Simmons on 1/15/15. -// Copyright (c) 2015 Ranchero Software LLC. All rights reserved. -// - -@import Foundation; - -@class ParserData; -@class RSParsedFeed; - -@interface RSAtomParser : NSObject - -+ (RSParsedFeed *)parseFeedWithData:(ParserData *)parserData; - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSAtomParser.m b/Modules/ParserObjC/Sources/ParserObjC/RSAtomParser.m deleted file mode 100755 index eaaeeb638..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSAtomParser.m +++ /dev/null @@ -1,679 +0,0 @@ -// -// RSAtomParser.m -// RSParser -// -// Created by Brent Simmons on 1/15/15. -// Copyright (c) 2015 Ranchero Software LLC. All rights reserved. -// - - -#import "RSAtomParser.h" -#import "RSSAXParser.h" -#import "RSParsedFeed.h" -#import "RSParsedArticle.h" -#import "NSString+RSParser.h" -#import "RSDateParser.h" -#import "ParserData.h" -#import "RSParsedEnclosure.h" -#import "RSParsedAuthor.h" - -#import - -@interface RSAtomParser () - -@property (nonatomic) NSData *feedData; -@property (nonatomic) NSString *urlString; -@property (nonatomic) BOOL endFeedFound; -@property (nonatomic) BOOL parsingXHTML; -@property (nonatomic) BOOL parsingSource; -@property (nonatomic) BOOL parsingArticle; -@property (nonatomic) BOOL parsingAuthor; -@property (nonatomic) NSMutableArray *attributesStack; -@property (nonatomic, readonly) NSDictionary *currentAttributes; -@property (nonatomic) NSMutableString *xhtmlString; -@property (nonatomic) NSString *link; -@property (nonatomic) NSString *title; -@property (nonatomic) NSMutableArray *articles; -@property (nonatomic) NSDate *dateParsed; -@property (nonatomic) RSSAXParser *parser; -@property (nonatomic, readonly) RSParsedArticle *currentArticle; -@property (nonatomic) RSParsedAuthor *currentAuthor; -@property (nonatomic, readonly) NSDate *currentDate; -@property (nonatomic) NSString *language; - -@end - - -@implementation RSAtomParser - -#pragma mark - Class Methods - -+ (RSParsedFeed *)parseFeedWithData:(ParserData *)parserData { - - RSAtomParser *parser = [[[self class] alloc] initWithParserData:parserData]; - return [parser parseFeed]; -} - - -#pragma mark - Init - -- (instancetype)initWithParserData:(ParserData *)parserData { - - self = [super init]; - if (!self) { - return nil; - } - - _feedData = parserData.data; - _urlString = parserData.url; - _parser = [[RSSAXParser alloc] initWithDelegate:self]; - _attributesStack = [NSMutableArray new]; - _articles = [NSMutableArray new]; - - return self; -} - - -#pragma mark - API - -- (RSParsedFeed *)parseFeed { - - [self parse]; - - RSParsedFeed *parsedFeed = [[RSParsedFeed alloc] initWithURLString:self.urlString title:self.title link:self.link language:self.language articles:self.articles]; - - return parsedFeed; -} - - -#pragma mark - Constants - -static NSString *kTypeKey = @"type"; -static NSString *kXHTMLType = @"xhtml"; -static NSString *kRelKey = @"rel"; -static NSString *kAlternateValue = @"alternate"; -static NSString *kHrefKey = @"href"; -static NSString *kXMLKey = @"xml"; -static NSString *kBaseKey = @"base"; -static NSString *kLangKey = @"lang"; -static NSString *kXMLBaseKey = @"xml:base"; -static NSString *kXMLLangKey = @"xml:lang"; -static NSString *kTextHTMLValue = @"text/html"; -static NSString *kRelatedValue = @"related"; -static NSString *kEnclosureValue = @"enclosure"; -static NSString *kShortURLValue = @"shorturl"; -static NSString *kHTMLValue = @"html"; -static NSString *kEnValue = @"en"; -static NSString *kTextValue = @"text"; -static NSString *kSelfValue = @"self"; -static NSString *kLengthKey = @"length"; -static NSString *kTitleKey = @"title"; - -static const char *kID = "id"; -static const NSInteger kIDLength = 3; - -static const char *kTitle = "title"; -static const NSInteger kTitleLength = 6; - -static const char *kContent = "content"; -static const NSInteger kContentLength = 8; - -static const char *kSummary = "summary"; -static const NSInteger kSummaryLength = 8; - -static const char *kLink = "link"; -static const NSInteger kLinkLength = 5; - -static const char *kPublished = "published"; -static const NSInteger kPublishedLength = 10; - -static const char *kIssued = "issued"; -static const NSInteger kIssuedLength = 7; - -static const char *kUpdated = "updated"; -static const NSInteger kUpdatedLength = 8; - -static const char *kModified = "modified"; -static const NSInteger kModifiedLength = 9; - -static const char *kAuthor = "author"; -static const NSInteger kAuthorLength = 7; - -static const char *kName = "name"; -static const NSInteger kNameLength = 5; - -static const char *kEmail = "email"; -static const NSInteger kEmailLength = 6; - -static const char *kURI = "uri"; -static const NSInteger kURILength = 4; - -static const char *kEntry = "entry"; -static const NSInteger kEntryLength = 6; - -static const char *kSource = "source"; -static const NSInteger kSourceLength = 7; - -static const char *kFeed = "feed"; -static const NSInteger kFeedLength = 5; - -static const char *kType = "type"; -static const NSInteger kTypeLength = 5; - -static const char *kRel = "rel"; -static const NSInteger kRelLength = 4; - -static const char *kAlternate = "alternate"; -static const NSInteger kAlternateLength = 10; - -static const char *kHref = "href"; -static const NSInteger kHrefLength = 5; - -static const char *kXML = "xml"; -static const NSInteger kXMLLength = 4; - -static const char *kBase = "base"; -static const NSInteger kBaseLength = 5; - -static const char *kLang = "lang"; -static const NSInteger kLangLength = 5; - -static const char *kTextHTML = "text/html"; -static const NSInteger kTextHTMLLength = 10; - -static const char *kRelated = "related"; -static const NSInteger kRelatedLength = 8; - -static const char *kShortURL = "shorturl"; -static const NSInteger kShortURLLength = 9; - -static const char *kHTML = "html"; -static const NSInteger kHTMLLength = 5; - -static const char *kEn = "en"; -static const NSInteger kEnLength = 3; - -static const char *kText = "text"; -static const NSInteger kTextLength = 5; - -static const char *kSelf = "self"; -static const NSInteger kSelfLength = 5; - -static const char *kEnclosure = "enclosure"; -static const NSInteger kEnclosureLength = 10; - -static const char *kLength = "length"; -static const NSInteger kLengthLength = 7; - -#pragma mark - Parsing - -- (void)parse { - - self.dateParsed = [NSDate date]; - - @autoreleasepool { - [self.parser parseData:self.feedData]; - [self.parser finishParsing]; - } -} - - -- (void)addArticle { - - RSParsedArticle *article = [[RSParsedArticle alloc] initWithFeedURL:self.urlString]; - article.dateParsed = self.dateParsed; - - [self.articles addObject:article]; -} - - -- (RSParsedArticle *)currentArticle { - - return self.articles.lastObject; -} - - -- (NSDictionary *)currentAttributes { - - return self.attributesStack.lastObject; -} - - -- (NSDate *)currentDate { - - return RSDateWithBytes(self.parser.currentCharacters.bytes, self.parser.currentCharacters.length); -} - - -- (void)addFeedLink { - - if (self.link && self.link.length > 0) { - return; - } - - NSString *related = self.currentAttributes[kRelKey]; - if (related == kAlternateValue) { - self.link = self.currentAttributes[kHrefKey]; - } -} - - -- (void)addFeedTitle { - - if (self.title.length < 1) { - self.title = [self currentString]; - } -} - -- (void)addFeedLanguage { - - if (self.language.length < 0) { - self.language = self.currentAttributes[kXMLLangKey] -; - } -} - -- (void)addLink { - - NSDictionary *attributes = self.currentAttributes; - - NSString *urlString = attributes[kHrefKey]; - if (urlString.length < 1) { - return; - } - - RSParsedArticle *article = self.currentArticle; - - NSString *rel = attributes[kRelKey]; - if (rel.length < 1) { - rel = kAlternateValue; - } - - if (rel == kRelatedValue) { - if (!article.link) { - article.link = urlString; - } - } - else if (rel == kAlternateValue) { - if (!article.permalink) { - article.permalink = urlString; - } - } - else if (rel == kEnclosureValue) { - RSParsedEnclosure *enclosure = [self enclosureWithURLString:urlString attributes:attributes]; - [article addEnclosure:enclosure]; - } -} - -- (RSParsedEnclosure *)enclosureWithURLString:(NSString *)urlString attributes:(NSDictionary *)attributes { - - RSParsedEnclosure *enclosure = [[RSParsedEnclosure alloc] init]; - enclosure.url = urlString; - enclosure.title = attributes[kTitleKey]; - enclosure.mimeType = attributes[kTypeKey]; - enclosure.length = [attributes[kLengthKey] integerValue]; - - return enclosure; -} - -- (void)addContent { - - self.currentArticle.body = [self currentString]; -} - - -- (void)addSummary { - - if (!self.currentArticle.body) { - self.currentArticle.body = [self currentString]; - } -} - - -- (NSString *)currentString { - - return self.parser.currentStringWithTrimmedWhitespace; -} - - -- (void)addArticleElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix { - - if (prefix) { - return; - } - - if (RSSAXEqualTags(localName, kID, kIDLength)) { - self.currentArticle.guid = [self currentString]; - } - - else if (RSSAXEqualTags(localName, kTitle, kTitleLength)) { - self.currentArticle.title = [self currentString]; - } - - else if (RSSAXEqualTags(localName, kContent, kContentLength)) { - [self addContent]; - } - - else if (RSSAXEqualTags(localName, kSummary, kSummaryLength)) { - [self addSummary]; - } - - else if (RSSAXEqualTags(localName, kLink, kLinkLength)) { - [self addLink]; - } - - else if (RSSAXEqualTags(localName, kPublished, kPublishedLength)) { - self.currentArticle.datePublished = self.currentDate; - } - - else if (RSSAXEqualTags(localName, kUpdated, kUpdatedLength)) { - self.currentArticle.dateModified = self.currentDate; - } - - // Atom 0.3 dates - else if (RSSAXEqualTags(localName, kIssued, kIssuedLength)) { - if (!self.currentArticle.datePublished) { - self.currentArticle.datePublished = self.currentDate; - } - } - else if (RSSAXEqualTags(localName, kModified, kModifiedLength)) { - if (!self.currentArticle.dateModified) { - self.currentArticle.dateModified = self.currentDate; - } - } -} - - -- (void)addXHTMLTag:(const xmlChar *)localName { - - if (!localName) { - return; - } - - [self.xhtmlString appendString:@"<"]; - [self.xhtmlString appendString:[NSString stringWithUTF8String:(const char *)localName]]; - - if (self.currentAttributes.count < 1) { - [self.xhtmlString appendString:@">"]; - return; - } - - for (NSString *oneKey in self.currentAttributes) { - - [self.xhtmlString appendString:@" "]; - - NSString *oneValue = self.currentAttributes[oneKey]; - [self.xhtmlString appendString:oneKey]; - - [self.xhtmlString appendString:@"=\""]; - - oneValue = [oneValue stringByReplacingOccurrencesOfString:@"\"" withString:@"""]; - [self.xhtmlString appendString:oneValue]; - - [self.xhtmlString appendString:@"\""]; - } - - [self.xhtmlString appendString:@">"]; -} - - -#pragma mark - RSSAXParserDelegate - -- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes { - - if (self.endFeedFound) { - return; - } - - NSDictionary *xmlAttributes = [self.parser attributesDictionary:attributes numberOfAttributes:numberOfAttributes]; - if (!xmlAttributes) { - xmlAttributes = [NSDictionary dictionary]; - } - [self.attributesStack addObject:xmlAttributes]; - - if (self.parsingXHTML) { - [self addXHTMLTag:localName]; - return; - } - - if (RSSAXEqualTags(localName, kEntry, kEntryLength)) { - self.parsingArticle = YES; - [self addArticle]; - return; - } - - if (RSSAXEqualTags(localName, kAuthor, kAuthorLength)) { - self.parsingAuthor = YES; - self.currentAuthor = [[RSParsedAuthor alloc] init]; - return; - } - - if (RSSAXEqualTags(localName, kSource, kSourceLength)) { - self.parsingSource = YES; - return; - } - - BOOL isContentTag = RSSAXEqualTags(localName, kContent, kContentLength); - BOOL isSummaryTag = RSSAXEqualTags(localName, kSummary, kSummaryLength); - if (self.parsingArticle && (isContentTag || isSummaryTag)) { - - if (isContentTag) { - self.currentArticle.language = xmlAttributes[kXMLLangKey]; - } - - NSString *contentType = xmlAttributes[kTypeKey]; - if ([contentType isEqualToString:kXHTMLType]) { - self.parsingXHTML = YES; - self.xhtmlString = [NSMutableString stringWithString:@""]; - return; - } - } - - if (!self.parsingArticle && RSSAXEqualTags(localName, kLink, kLinkLength)) { - [self addFeedLink]; - return; - } - - if (RSSAXEqualTags(localName, kFeed, kFeedLength)) { - [self addFeedLanguage]; - } - - [self.parser beginStoringCharacters]; -} - - -- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri { - - if (RSSAXEqualTags(localName, kFeed, kFeedLength)) { - self.endFeedFound = YES; - return; - } - - if (self.endFeedFound) { - return; - } - - if (self.parsingXHTML) { - - BOOL isContentTag = RSSAXEqualTags(localName, kContent, kContentLength); - BOOL isSummaryTag = RSSAXEqualTags(localName, kSummary, kSummaryLength); - - if (self.parsingArticle && (isContentTag || isSummaryTag)) { - - if (isContentTag) { - self.currentArticle.body = [self.xhtmlString copy]; - } - - else if (isSummaryTag) { - if (self.currentArticle.body.length < 1) { - self.currentArticle.body = [self.xhtmlString copy]; - } - } - } - - if (isContentTag || isSummaryTag) { - self.parsingXHTML = NO; - } - - [self.xhtmlString appendString:@""]; - } - - else if (self.parsingAuthor) { - - if (RSSAXEqualTags(localName, kAuthor, kAuthorLength)) { - self.parsingAuthor = NO; - RSParsedAuthor *author = self.currentAuthor; - if (author.name || author.emailAddress || author.url) { - [self.currentArticle addAuthor:author]; - } - self.currentAuthor = nil; - } - else if (RSSAXEqualTags(localName, kName, kNameLength)) { - self.currentAuthor.name = [self currentString]; - } - else if (RSSAXEqualTags(localName, kEmail, kEmailLength)) { - self.currentAuthor.emailAddress = [self currentString]; - } - else if (RSSAXEqualTags(localName, kURI, kURILength)) { - self.currentAuthor.url = [self currentString]; - } - } - - else if (RSSAXEqualTags(localName, kEntry, kEntryLength)) { - self.parsingArticle = NO; - } - - else if (self.parsingArticle && !self.parsingSource) { - [self addArticleElement:localName prefix:prefix]; - } - - else if (RSSAXEqualTags(localName, kSource, kSourceLength)) { - self.parsingSource = NO; - } - - else if (!self.parsingArticle && !self.parsingSource && RSSAXEqualTags(localName, kTitle, kTitleLength)) { - [self addFeedTitle]; - } - - [self.attributesStack removeLastObject]; -} - - -- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const xmlChar *)name prefix:(const xmlChar *)prefix { - - if (prefix && RSSAXEqualTags(prefix, kXML, kXMLLength)) { - - if (RSSAXEqualTags(name, kBase, kBaseLength)) { - return kXMLBaseKey; - } - if (RSSAXEqualTags(name, kLang, kLangLength)) { - return kXMLLangKey; - } - } - - if (prefix) { - return nil; - } - - if (RSSAXEqualTags(name, kRel, kRelLength)) { - return kRelKey; - } - - if (RSSAXEqualTags(name, kType, kTypeLength)) { - return kTypeKey; - } - - if (RSSAXEqualTags(name, kHref, kHrefLength)) { - return kHrefKey; - } - - if (RSSAXEqualTags(name, kAlternate, kAlternateLength)) { - return kAlternateValue; - } - - if (RSSAXEqualTags(name, kLength, kLengthLength)) { - return kLengthKey; - } - - if (RSSAXEqualTags(name, kTitle, kTitleLength)) { - return kTitleKey; - } - - return nil; -} - - -static BOOL equalBytes(const void *bytes1, const void *bytes2, NSUInteger length) { - - return memcmp(bytes1, bytes2, length) == 0; -} - - -- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForValue:(const void *)bytes length:(NSUInteger)length { - - static const NSUInteger alternateLength = kAlternateLength - 1; - static const NSUInteger textHTMLLength = kTextHTMLLength - 1; - static const NSUInteger relatedLength = kRelatedLength - 1; - static const NSUInteger shortURLLength = kShortURLLength - 1; - static const NSUInteger htmlLength = kHTMLLength - 1; - static const NSUInteger enLength = kEnLength - 1; - static const NSUInteger textLength = kTextLength - 1; - static const NSUInteger selfLength = kSelfLength - 1; - static const NSUInteger enclosureLength = kEnclosureLength - 1; - - if (length == alternateLength && equalBytes(bytes, kAlternate, alternateLength)) { - return kAlternateValue; - } - - if (length == enclosureLength && equalBytes(bytes, kEnclosure, enclosureLength)) { - return kEnclosureValue; - } - - if (length == textHTMLLength && equalBytes(bytes, kTextHTML, textHTMLLength)) { - return kTextHTMLValue; - } - - if (length == relatedLength && equalBytes(bytes, kRelated, relatedLength)) { - return kRelatedValue; - } - - if (length == shortURLLength && equalBytes(bytes, kShortURL, shortURLLength)) { - return kShortURLValue; - } - - if (length == htmlLength && equalBytes(bytes, kHTML, htmlLength)) { - return kHTMLValue; - } - - if (length == enLength && equalBytes(bytes, kEn, enLength)) { - return kEnValue; - } - - if (length == textLength && equalBytes(bytes, kText, textLength)) { - return kTextValue; - } - - if (length == selfLength && equalBytes(bytes, kSelf, selfLength)) { - return kSelfValue; - } - - return nil; -} - - -- (void)saxParser:(RSSAXParser *)SAXParser XMLCharactersFound:(const unsigned char *)characters length:(NSUInteger)length { - - if (self.parsingXHTML) { - NSString *s = [[NSString alloc] initWithBytesNoCopy:(void *)characters length:length encoding:NSUTF8StringEncoding freeWhenDone:NO]; - if (s == nil) { - return; - } - // libxml decodes all entities; we need to re-encode certain characters - // (<, >, and &) when inside XHTML text content. - [self.xhtmlString appendString:s.rsparser_stringByEncodingRequiredEntities]; - } -} - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSDateParser.h b/Modules/ParserObjC/Sources/ParserObjC/RSDateParser.h deleted file mode 100755 index 5c3745a32..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSDateParser.h +++ /dev/null @@ -1,22 +0,0 @@ -// -// RSDateParser.h -// RSParser -// -// Created by Brent Simmons on 3/25/15. -// Copyright (c) 2015 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - - -// Common web dates -- RFC 822 and 8601 -- are handled here: the formats you find in JSON and XML feeds. -// These may return nil. They may also return garbage, given bad input. - -NSDate *RSDateWithString(NSString *dateString); - -// If you're using a SAX parser, you have the bytes and don't need to convert to a string first. -// It's faster and uses less memory. -// (Assumes bytes are UTF-8 or ASCII. If you're using the libxml SAX parser, this will work.) - -NSDate *RSDateWithBytes(const char *bytes, NSUInteger numberOfBytes); - diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSDateParser.m b/Modules/ParserObjC/Sources/ParserObjC/RSDateParser.m deleted file mode 100755 index cb9c572d3..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSDateParser.m +++ /dev/null @@ -1,461 +0,0 @@ -// -// RSDateParser.m -// RSParser -// -// Created by Brent Simmons on 3/25/15. -// Copyright (c) 2015 Ranchero Software, LLC. All rights reserved. -// - - -#import "RSDateParser.h" -#import - - -typedef struct { - const char *abbreviation; - const NSInteger offsetHours; - const NSInteger offsetMinutes; -} RSTimeZoneAbbreviationAndOffset; - - -#define kNumberOfTimeZones 96 - -static const RSTimeZoneAbbreviationAndOffset timeZoneTable[kNumberOfTimeZones] = { - {"GMT", 0, 0}, //Most common at top, for performance - {"PDT", -7, 0}, {"PST", -8, 0}, {"EST", -5, 0}, {"EDT", -4, 0}, - {"MDT", -6, 0}, {"MST", -7, 0}, {"CST", -6, 0}, {"CDT", -5, 0}, - {"ACT", -8, 0}, {"AFT", 4, 30}, {"AMT", 4, 0}, {"ART", -3, 0}, - {"AST", 3, 0}, {"AZT", 4, 0}, {"BIT", -12, 0}, {"BDT", 8, 0}, - {"ACST", 9, 30}, {"AEST", 10, 0}, {"AKST", -9, 0}, {"AMST", 5, 0}, - {"AWST", 8, 0}, {"AZOST", -1, 0}, {"BIOT", 6, 0}, {"BRT", -3, 0}, - {"BST", 6, 0}, {"BTT", 6, 0}, {"CAT", 2, 0}, {"CCT", 6, 30}, - {"CET", 1, 0}, {"CEST", 2, 0}, {"CHAST", 12, 45}, {"ChST", 10, 0}, - {"CIST", -8, 0}, {"CKT", -10, 0}, {"CLT", -4, 0}, {"CLST", -3, 0}, - {"COT", -5, 0}, {"COST", -4, 0}, {"CVT", -1, 0}, {"CXT", 7, 0}, - {"EAST", -6, 0}, {"EAT", 3, 0}, {"ECT", -4, 0}, {"EEST", 3, 0}, - {"EET", 2, 0}, {"FJT", 12, 0}, {"FKST", -4, 0}, {"GALT", -6, 0}, - {"GET", 4, 0}, {"GFT", -3, 0}, {"GILT", 7, 0}, {"GIT", -9, 0}, - {"GST", -2, 0}, {"GYT", -4, 0}, {"HAST", -10, 0}, {"HKT", 8, 0}, - {"HMT", 5, 0}, {"IRKT", 8, 0}, {"IRST", 3, 30}, {"IST", 2, 0}, - {"JST", 9, 0}, {"KRAT", 7, 0}, {"KST", 9, 0}, {"LHST", 10, 30}, - {"LINT", 14, 0}, {"MAGT", 11, 0}, {"MIT", -9, 30}, {"MSK", 3, 0}, - {"MUT", 4, 0}, {"NDT", -2, 30}, {"NFT", 11, 30}, {"NPT", 5, 45}, - {"NT", -3, 30}, {"OMST", 6, 0}, {"PETT", 12, 0}, {"PHOT", 13, 0}, - {"PKT", 5, 0}, {"RET", 4, 0}, {"SAMT", 4, 0}, {"SAST", 2, 0}, - {"SBT", 11, 0}, {"SCT", 4, 0}, {"SLT", 5, 30}, {"SST", 8, 0}, - {"TAHT", -10, 0}, {"THA", 7, 0}, {"UYT", -3, 0}, {"UYST", -2, 0}, - {"VET", -4, 30}, {"VLAT", 10, 0}, {"WAT", 1, 0}, {"WET", 0, 0}, - {"WEST", 1, 0}, {"YAKT", 9, 0}, {"YEKT", 5, 0} -}; /*See http://en.wikipedia.org/wiki/List_of_time_zone_abbreviations for list*/ - - - -#pragma mark - Parser - -enum { - RSJanuary = 1, - RSFebruary, - RSMarch, - RSApril, - RSMay, - RSJune, - RSJuly, - RSAugust, - RSSeptember, - RSOctober, - RSNovember, - RSDecember -}; - -static NSInteger nextMonthValue(const char *bytes, NSUInteger numberOfBytes, NSUInteger startingIndex, NSUInteger *finalIndex) { - - /*Months are 1-based -- January is 1, Dec is 12. - Lots of short-circuits here. Not strict. GIGO.*/ - - NSUInteger i;// = startingIndex; - NSUInteger numberOfAlphaCharactersFound = 0; - char monthCharacters[3] = {0, 0, 0}; - - for (i = startingIndex; i < numberOfBytes; i++) { - - *finalIndex = i; - char character = bytes[i]; - - BOOL isAlphaCharacter = (BOOL)isalpha(character); - if (!isAlphaCharacter && numberOfAlphaCharactersFound < 1) - continue; - if (!isAlphaCharacter && numberOfAlphaCharactersFound > 0) - break; - - numberOfAlphaCharactersFound++; - if (numberOfAlphaCharactersFound == 1) { - if (character == 'F' || character == 'f') - return RSFebruary; - if (character == 'S' || character == 's') - return RSSeptember; - if (character == 'O' || character == 'o') - return RSOctober; - if (character == 'N' || character == 'n') - return RSNovember; - if (character == 'D' || character == 'd') - return RSDecember; - } - - monthCharacters[numberOfAlphaCharactersFound - 1] = character; - if (numberOfAlphaCharactersFound >=3) - break; - } - - if (numberOfAlphaCharactersFound < 2) - return NSNotFound; - - if (monthCharacters[0] == 'J' || monthCharacters[0] == 'j') { //Jan, Jun, Jul - if (monthCharacters[1] == 'a' || monthCharacters[1] == 'A') - return RSJanuary; - if (monthCharacters[1] == 'u' || monthCharacters[1] == 'U') { - if (monthCharacters[2] == 'n' || monthCharacters[2] == 'N') - return RSJune; - return RSJuly; - } - return RSJanuary; - } - - if (monthCharacters[0] == 'M' || monthCharacters[0] == 'm') { //March, May - if (monthCharacters[2] == 'y' || monthCharacters[2] == 'Y') - return RSMay; - return RSMarch; - } - - if (monthCharacters[0] == 'A' || monthCharacters[0] == 'a') { //April, August - if (monthCharacters[1] == 'u' || monthCharacters[1] == 'U') - return RSAugust; - return RSApril; - } - - return RSJanuary; //should never get here -} - - -static NSInteger nextNumericValue(const char *bytes, NSUInteger numberOfBytes, NSUInteger startingIndex, NSUInteger maximumNumberOfDigits, NSUInteger *finalIndex) { - - /*maximumNumberOfDigits has a maximum limit of 4 (for time zone offsets and years). - *finalIndex will be the index of the last character looked at.*/ - - if (maximumNumberOfDigits > 4) - maximumNumberOfDigits = 4; - - NSUInteger i = 0; - NSUInteger numberOfDigitsFound = 0; - NSInteger digits[4] = {0, 0, 0, 0}; - - for (i = startingIndex; i < numberOfBytes; i++) { - *finalIndex = i; - BOOL isDigit = (BOOL)isdigit(bytes[i]); - if (!isDigit && numberOfDigitsFound < 1) - continue; - if (!isDigit && numberOfDigitsFound > 0) - break; - digits[numberOfDigitsFound] = bytes[i] - 48; // '0' is 48 - numberOfDigitsFound++; - if (numberOfDigitsFound >= maximumNumberOfDigits) - break; - } - - if (numberOfDigitsFound < 1) - return NSNotFound; - if (numberOfDigitsFound == 1) - return digits[0]; - if (numberOfDigitsFound == 2) - return (digits[0] * 10) + digits[1]; - if (numberOfDigitsFound == 3) - return (digits[0] * 100) + (digits[1] * 10) + digits[2]; - return (digits[0] * 1000) + (digits[1] * 100) + (digits[2] * 10) + digits[3]; -} - - -static BOOL hasAtLeastOneAlphaCharacter(const char *s) { - - NSUInteger length = strlen(s); - NSUInteger i = 0; - - for (i = 0; i < length; i++) { - if (isalpha(s[i])) - return YES; - } - - return NO; -} - - -#pragma mark - Time Zones and offsets - -static NSInteger offsetInSecondsForTimeZoneAbbreviation(const char *abbreviation) { - - /*Linear search should be fine. It's a C array, and short (under 100 items). - Most common time zones are at the beginning of the array. (We can tweak this as needed.)*/ - - NSUInteger i; - - for (i = 0; i < kNumberOfTimeZones; i++) { - - RSTimeZoneAbbreviationAndOffset zone = timeZoneTable[i]; - if (strcmp(abbreviation, zone.abbreviation) == 0) { - if (zone.offsetHours < 0) - return (zone.offsetHours * 60 * 60) - (zone.offsetMinutes * 60); - return (zone.offsetHours * 60 * 60) + (zone.offsetMinutes * 60); - } - } - - return 0; -} - - -static NSInteger offsetInSecondsForOffsetCharacters(const char *timeZoneCharacters) { - - BOOL isPlus = timeZoneCharacters[0] == '+'; - NSUInteger finalIndex = 0; - NSInteger hours = nextNumericValue(timeZoneCharacters, strlen(timeZoneCharacters), 0, 2, &finalIndex); - NSInteger minutes = nextNumericValue(timeZoneCharacters, strlen(timeZoneCharacters), finalIndex + 1, 2, &finalIndex); - - if (hours == NSNotFound) - hours = 0; - if (minutes == NSNotFound) - minutes = 0; - if (hours == 0 && minutes == 0) - return 0; - - NSInteger seconds = (hours * 60 * 60) + (minutes * 60); - if (!isPlus) - seconds = 0 - seconds; - return seconds; -} - - -static const char *rs_GMT = "GMT"; -static const char *rs_UTC = "UTC"; - -static NSInteger parsedTimeZoneOffset(const char *bytes, NSUInteger numberOfBytes, NSUInteger startingIndex) { - - /*Examples: GMT Z +0000 -0000 +07:00 -0700 PDT EST - Parse into char[5] -- drop any colon characters. If numeric, calculate seconds from GMT. - If alpha, special-case GMT and Z, otherwise look up in time zone list to get offset.*/ - - char timeZoneCharacters[6] = {0, 0, 0, 0, 0, 0}; //nil-terminated last character - NSUInteger i = 0; - NSUInteger numberOfCharactersFound = 0; - - for (i = startingIndex; i < numberOfBytes; i++) { - char ch = bytes[i]; - if (ch == ':' || ch == ' ') - continue; - if (isdigit(ch) || isalpha(ch) || ch == '+' || ch == '-') { - numberOfCharactersFound++; - timeZoneCharacters[numberOfCharactersFound - 1] = ch; - } - if (numberOfCharactersFound >= 5) - break; - } - - if (numberOfCharactersFound < 1 || timeZoneCharacters[0] == 'Z' || timeZoneCharacters[0] == 'z') - return 0; - if (strcasestr(timeZoneCharacters, rs_GMT) != nil || strcasestr(timeZoneCharacters, rs_UTC)) - return 0; - - if (hasAtLeastOneAlphaCharacter(timeZoneCharacters)) - return offsetInSecondsForTimeZoneAbbreviation(timeZoneCharacters); - return offsetInSecondsForOffsetCharacters(timeZoneCharacters); -} - - -#pragma mark - Date Creation - -static NSDate *dateWithYearMonthDayHourMinuteSecondAndTimeZoneOffset(NSInteger year, NSInteger month, NSInteger day, NSInteger hour, NSInteger minute, NSInteger second, NSInteger milliseconds, NSInteger timeZoneOffset) { - - struct tm timeInfo; - timeInfo.tm_sec = (int)second; - timeInfo.tm_min = (int)minute; - timeInfo.tm_hour = (int)hour; - timeInfo.tm_mday = (int)day; - timeInfo.tm_mon = (int)(month - 1); //It's 1-based coming in - timeInfo.tm_year = (int)(year - 1900); //see time.h -- it's years since 1900 - timeInfo.tm_wday = -1; - timeInfo.tm_yday = -1; - timeInfo.tm_isdst = -1; - timeInfo.tm_gmtoff = 0;//[timeZone secondsFromGMT]; - timeInfo.tm_zone = nil; - - NSTimeInterval rawTime = (NSTimeInterval)(timegm(&timeInfo) - timeZoneOffset); //timegm instead of mktime (which uses local time zone) - if (rawTime == (time_t)ULONG_MAX) { - - /*NSCalendar is super-amazingly-slow (which is partly why RSDateParser exists), so this is used only when the date is far enough in the future (19 January 2038 03:14:08Z on 32-bit systems) that timegm fails. If profiling says that this is a performance issue, then you've got a weird app that needs to work with dates far in the future.*/ - - NSDateComponents *dateComponents = [NSDateComponents new]; - - dateComponents.timeZone = [NSTimeZone timeZoneForSecondsFromGMT:timeZoneOffset]; - dateComponents.year = year; - dateComponents.month = month; - dateComponents.day = day; - dateComponents.hour = hour; - dateComponents.minute = minute; - dateComponents.second = second + (milliseconds / 1000); - - return [[NSCalendar autoupdatingCurrentCalendar] dateFromComponents:dateComponents]; - } - - if (milliseconds > 0) { - rawTime += ((float)milliseconds / 1000.0f); - } - - return [NSDate dateWithTimeIntervalSince1970:rawTime]; -} - - -#pragma mark - Standard Formats - -static NSDate *RSParsePubDateWithBytes(const char *bytes, NSUInteger numberOfBytes) { - - /*@"EEE',' dd MMM yyyy HH':'mm':'ss ZZZ" - @"EEE, dd MMM yyyy HH:mm:ss zzz" - @"dd MMM yyyy HH:mm zzz" - @"dd MMM yyyy HH:mm ZZZ" - @"EEE, dd MMM yyyy" - @"EEE, dd MMM yyyy HH:mm zzz" - etc.*/ - - NSUInteger finalIndex = 0; - NSInteger day = 1; - NSInteger month = RSJanuary; - NSInteger year = 1970; - NSInteger hour = 0; - NSInteger minute = 0; - NSInteger second = 0; - NSInteger timeZoneOffset = 0; - - day = nextNumericValue(bytes, numberOfBytes, 0, 2, &finalIndex); - if (day < 1 || day == NSNotFound) - day = 1; - - month = nextMonthValue(bytes, numberOfBytes, finalIndex + 1, &finalIndex); - year = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 4, &finalIndex); - hour = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex); - if (hour == NSNotFound) - hour = 0; - - minute = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex); - if (minute == NSNotFound) - minute = 0; - - NSUInteger currentIndex = finalIndex + 1; - - BOOL hasSeconds = (currentIndex < numberOfBytes) && (bytes[currentIndex] == ':'); - if (hasSeconds) - second = nextNumericValue(bytes, numberOfBytes, currentIndex, 2, &finalIndex); - - currentIndex = finalIndex + 1; - BOOL hasTimeZone = (currentIndex < numberOfBytes) && (bytes[currentIndex] == ' '); - if (hasTimeZone) - timeZoneOffset = parsedTimeZoneOffset(bytes, numberOfBytes, currentIndex); - - return dateWithYearMonthDayHourMinuteSecondAndTimeZoneOffset(year, month, day, hour, minute, second, 0, timeZoneOffset); -} - - -static NSDate *RSParseW3CWithBytes(const char *bytes, NSUInteger numberOfBytes) { - - /*@"yyyy'-'MM'-'dd'T'HH':'mm':'ss" - @"yyyy-MM-dd'T'HH:mm:sszzz" - @"yyyy-MM-dd'T'HH:mm:ss'.'SSSzzz" - etc.*/ - - NSUInteger finalIndex = 0; - NSInteger day = 1; - NSInteger month = RSJanuary; - NSInteger year = 1970; - NSInteger hour = 0; - NSInteger minute = 0; - NSInteger second = 0; - NSInteger milliseconds = 0; - NSInteger timeZoneOffset = 0; - - year = nextNumericValue(bytes, numberOfBytes, 0, 4, &finalIndex); - month = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex); - day = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex); - hour = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex); - minute = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex); - second = nextNumericValue(bytes, numberOfBytes, finalIndex + 1, 2, &finalIndex); - - NSUInteger currentIndex = finalIndex + 1; - BOOL hasMilliseconds = (currentIndex < numberOfBytes) && (bytes[currentIndex] == '.'); - if (hasMilliseconds) { - milliseconds = nextNumericValue(bytes, numberOfBytes, currentIndex, 3, &finalIndex); - currentIndex = finalIndex + 1; - } - - timeZoneOffset = parsedTimeZoneOffset(bytes, numberOfBytes, currentIndex); - - return dateWithYearMonthDayHourMinuteSecondAndTimeZoneOffset(year, month, day, hour, minute, second, milliseconds, timeZoneOffset); -} - - -static BOOL dateIsPubDate(const char *bytes, NSUInteger numberOfBytes) { - - NSUInteger i = 0; - - for (i = 0; i < numberOfBytes; i++) { - if (bytes[i] == ' ' || bytes[i] == ',') - return YES; - } - - return NO; -} - - -static BOOL dateIsW3CDate(const char *bytes, NSUInteger numberOfBytes) { - - // Something like 2010-11-17T08:40:07-05:00 - // But might be missing T character in the middle. - // Looks for four digits in a row followed by a -. - - for (NSUInteger i = 0; i < numberOfBytes; i++) { - char ch = bytes[i]; - if (ch == ' ' || ch == '\r' || ch == '\n' || ch == '\t') { - continue; - } - if (numberOfBytes - i < 5) { - return NO; - } - return isdigit(ch) && isdigit(bytes[i + 1]) && isdigit(bytes[i + 2]) && isdigit(bytes[i + 3]) && bytes[i + 4] == '-'; - } - - return NO; -} - -static BOOL numberOfBytesIsOutsideReasonableRange(NSUInteger numberOfBytes) { - return numberOfBytes < 6 || numberOfBytes > 150; -} - - -#pragma mark - API - -NSDate *RSDateWithBytes(const char *bytes, NSUInteger numberOfBytes) { - - if (numberOfBytesIsOutsideReasonableRange(numberOfBytes)) - return nil; - - if (dateIsW3CDate(bytes, numberOfBytes)) { - return RSParseW3CWithBytes(bytes, numberOfBytes); - } - if (dateIsPubDate(bytes, numberOfBytes)) - return RSParsePubDateWithBytes(bytes, numberOfBytes); - - // Fallback, in case our detection fails. - return RSParseW3CWithBytes(bytes, numberOfBytes); -} - - -NSDate *RSDateWithString(NSString *dateString) { - - const char *utf8String = [dateString UTF8String]; - return RSDateWithBytes(utf8String, strlen(utf8String)); -} - diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLLinkParser.h b/Modules/ParserObjC/Sources/ParserObjC/RSHTMLLinkParser.h deleted file mode 100755 index 67c7f9f6c..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLLinkParser.h +++ /dev/null @@ -1,35 +0,0 @@ -// -// RSHTMLLinkParser.h -// RSParser -// -// Created by Brent Simmons on 8/7/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -NS_ASSUME_NONNULL_BEGIN - -/*Returns all some_text as RSHTMLLink object array.*/ - -@class ParserData; -@class RSHTMLLink; - -@interface RSHTMLLinkParser : NSObject - -+ (NSArray *)htmlLinksWithParserData:(ParserData *)parserData; - -@end - - -@interface RSHTMLLink : NSObject - -// Any of these, even urlString, may be nil, because HTML can be bad. - -@property (nonatomic, nullable, readonly) NSString *urlString; //absolute -@property (nonatomic, nullable, readonly) NSString *text; -@property (nonatomic, nullable, readonly) NSString *title; //title attribute inside anchor tag - -@end - -NS_ASSUME_NONNULL_END diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLLinkParser.m b/Modules/ParserObjC/Sources/ParserObjC/RSHTMLLinkParser.m deleted file mode 100755 index 624e33569..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLLinkParser.m +++ /dev/null @@ -1,154 +0,0 @@ -// -// RSHTMLLinkParser.m -// RSParser -// -// Created by Brent Simmons on 8/7/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - - -#import "RSHTMLLinkParser.h" -#import "RSSAXHTMLParser.h" -#import "RSSAXParser.h" -#import "RSParserInternal.h" -#import "ParserData.h" - -#import - - - -@interface RSHTMLLinkParser() - -@property (nonatomic, readonly) NSMutableArray *links; -@property (nonatomic, readonly) ParserData *parserData; -@property (nonatomic, readonly) NSMutableArray *dictionaries; -@property (nonatomic, readonly) NSURL *baseURL; - -@end - - -@interface RSHTMLLink() - -@property (nonatomic, readwrite) NSString *urlString; //absolute -@property (nonatomic, readwrite) NSString *text; -@property (nonatomic, readwrite) NSString *title; //title attribute inside anchor tag - -@end - - -@implementation RSHTMLLinkParser - - -#pragma mark - Class Methods - -+ (NSArray *)htmlLinksWithParserData:(ParserData *)parserData { - - RSHTMLLinkParser *parser = [[self alloc] initWithParserData:parserData]; - return parser.links; -} - - -#pragma mark - Init - -- (instancetype)initWithParserData:(ParserData *)parserData { - - NSParameterAssert(parserData.data); - NSParameterAssert(parserData.url); - - self = [super init]; - if (!self) { - return nil; - } - - _links = [NSMutableArray new]; - _parserData = parserData; - _dictionaries = [NSMutableArray new]; - _baseURL = [NSURL URLWithString:parserData.url]; - - [self parse]; - - return self; -} - - -#pragma mark - Parse - -- (void)parse { - - RSSAXHTMLParser *parser = [[RSSAXHTMLParser alloc] initWithDelegate:self]; - [parser parseData:self.parserData.data]; - [parser finishParsing]; -} - - -- (RSHTMLLink *)currentLink { - - return self.links.lastObject; -} - - -static NSString *kHrefKey = @"href"; - -- (NSString *)urlStringFromDictionary:(NSDictionary *)d { - - NSString *href = [d rsparser_objectForCaseInsensitiveKey:kHrefKey]; - if (!href) { - return nil; - } - - NSURL *absoluteURL = [NSURL URLWithString:href relativeToURL:self.baseURL]; - return absoluteURL.absoluteString; -} - - -static NSString *kTitleKey = @"title"; - -- (NSString *)titleFromDictionary:(NSDictionary *)d { - - return [d rsparser_objectForCaseInsensitiveKey:kTitleKey]; -} - - -- (void)handleLinkAttributes:(NSDictionary *)d { - - RSHTMLLink *link = self.currentLink; - link.urlString = [self urlStringFromDictionary:d]; - link.title = [self titleFromDictionary:d]; -} - - -static const char *kAnchor = "a"; -static const NSInteger kAnchorLength = 2; - -- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLStartElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes { - - if (!RSSAXEqualTags(localName, kAnchor, kAnchorLength)) { - return; - } - - RSHTMLLink *link = [RSHTMLLink new]; - [self.links addObject:link]; - - NSDictionary *d = [SAXParser attributesDictionary:attributes]; - if (!RSParserObjectIsEmpty(d)) { - [self handleLinkAttributes:d]; - } - - [SAXParser beginStoringCharacters]; -} - - -- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName { - - if (!RSSAXEqualTags(localName, kAnchor, kAnchorLength)) { - return; - } - - self.currentLink.text = SAXParser.currentStringWithTrimmedWhitespace; -} - -@end - -@implementation RSHTMLLink - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadata.h b/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadata.h deleted file mode 100755 index 0010740a1..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadata.h +++ /dev/null @@ -1,98 +0,0 @@ -// -// RSHTMLMetadata.h -// RSParser -// -// Created by Brent Simmons on 3/6/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; -@import CoreGraphics; - -@class RSHTMLMetadataFeedLink; -@class RSHTMLMetadataAppleTouchIcon; -@class RSHTMLMetadataFavicon; -@class RSHTMLOpenGraphProperties; -@class RSHTMLOpenGraphImage; -@class RSHTMLTag; -@class RSHTMLTwitterProperties; - -NS_ASSUME_NONNULL_BEGIN - -__attribute__((swift_attr("@Sendable"))) -@interface RSHTMLMetadata : NSObject - -- (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray *)tags; - -@property (nonatomic, readonly) NSString *baseURLString; -@property (nonatomic, readonly) NSArray *tags; - -@property (nonatomic, readonly) NSArray *faviconLinks DEPRECATED_MSG_ATTRIBUTE("Use the favicons property instead."); -@property (nonatomic, readonly) NSArray *favicons; -@property (nonatomic, readonly) NSArray *appleTouchIcons; -@property (nonatomic, readonly) NSArray *feedLinks; - -@property (nonatomic, readonly) RSHTMLOpenGraphProperties *openGraphProperties; -@property (nonatomic, readonly) RSHTMLTwitterProperties *twitterProperties; - -@end - - -@interface RSHTMLMetadataAppleTouchIcon : NSObject - -@property (nonatomic, readonly) NSString *rel; -@property (nonatomic, nullable, readonly) NSString *sizes; -@property (nonatomic, readonly) CGSize size; -@property (nonatomic, nullable, readonly) NSString *urlString; // Absolute. - -@end - - -@interface RSHTMLMetadataFeedLink : NSObject - -@property (nonatomic, nullable, readonly) NSString *title; -@property (nonatomic, nullable, readonly) NSString *type; -@property (nonatomic, nullable, readonly) NSString *urlString; // Absolute. - -@end - -@interface RSHTMLMetadataFavicon : NSObject - -@property (nonatomic, nullable, readonly) NSString *type; -@property (nonatomic, nullable, readonly) NSString *urlString; - -@end - -@interface RSHTMLOpenGraphProperties : NSObject - -// TODO: the rest. At this writing (Nov. 26, 2017) I just care about og:image. -// See http://ogp.me/ - -- (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray *)tags; - -@property (nonatomic, readonly) NSArray *images; - -@end - -@interface RSHTMLOpenGraphImage : NSObject - -@property (nonatomic, nullable, readonly) NSString *url; -@property (nonatomic, nullable, readonly) NSString *secureURL; -@property (nonatomic, nullable, readonly) NSString *mimeType; -@property (nonatomic, readonly) CGFloat width; -@property (nonatomic, readonly) CGFloat height; -@property (nonatomic, nullable, readonly) NSString *altText; - -@end - -@interface RSHTMLTwitterProperties : NSObject - -// TODO: the rest. At this writing (Nov. 26, 2017) I just care about twitter:image:src. - -- (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray *)tags; - -@property (nonatomic, nullable, readonly) NSString *imageURL; // twitter:image:src - -@end - -NS_ASSUME_NONNULL_END diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadata.m b/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadata.m deleted file mode 100755 index 2def0b078..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadata.m +++ /dev/null @@ -1,483 +0,0 @@ -// -// RSHTMLMetadata.m -// RSParser -// -// Created by Brent Simmons on 3/6/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -#import "RSHTMLMetadata.h" -#import "RSParserInternal.h" -#import "RSHTMLTag.h" - - - -static NSString *urlStringFromDictionary(NSDictionary *d); -static NSString *absoluteURLStringWithRelativeURLString(NSString *relativeURLString, NSString *baseURLString); -static NSString *absoluteURLStringWithDictionary(NSDictionary *d, NSString *baseURLString); -static NSArray *objectsOfClassWithTags(Class class, NSArray *tags, NSString *baseURLString); -static NSString *relValue(NSDictionary *d); -static BOOL typeIsFeedType(NSString *type); - -static NSString *kIconRelValue = @"icon"; -static NSString *kHrefKey = @"href"; -static NSString *kSrcKey = @"src"; -static NSString *kAppleTouchIconValue = @"apple-touch-icon"; -static NSString *kAppleTouchIconPrecomposedValue = @"apple-touch-icon-precomposed"; -static NSString *kSizesKey = @"sizes"; -static NSString *kTitleKey = @"title"; -static NSString *kRelKey = @"rel"; -static NSString *kAlternateKey = @"alternate"; -static NSString *kRSSSuffix = @"/rss+xml"; -static NSString *kAtomSuffix = @"/atom+xml"; -static NSString *kJSONSuffix = @"/json"; -static NSString *kTypeKey = @"type"; - -@interface RSHTMLMetadataAppleTouchIcon () - -- (instancetype)initWithTag:(RSHTMLTag *)tag baseURLString:(NSString *)baseURLString; - -@end - - -@interface RSHTMLMetadataFeedLink () - -- (instancetype)initWithTag:(RSHTMLTag *)tag baseURLString:(NSString *)baseURLString; - -@end - -@interface RSHTMLMetadataFavicon () - -- (instancetype)initWithTag:(RSHTMLTag *)tag baseURLString:(NSString *)baseURLString; - -@end - -@implementation RSHTMLMetadata - -#pragma mark - Init - -- (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray *)tags { - - self = [super init]; - if (!self) { - return nil; - } - - _baseURLString = urlString; - _tags = tags; - - _favicons = [self resolvedFaviconLinks]; - - NSArray *appleTouchIconTags = [self appleTouchIconTags]; - _appleTouchIcons = objectsOfClassWithTags([RSHTMLMetadataAppleTouchIcon class], appleTouchIconTags, urlString); - - NSArray *feedLinkTags = [self feedLinkTags]; - _feedLinks = objectsOfClassWithTags([RSHTMLMetadataFeedLink class], feedLinkTags, urlString); - - _openGraphProperties = [[RSHTMLOpenGraphProperties alloc] initWithURLString:urlString tags:tags]; - _twitterProperties = [[RSHTMLTwitterProperties alloc] initWithURLString:urlString tags:tags]; - - return self; -} - -#pragma mark - Private - -- (NSArray *)linkTagsWithMatchingRel:(NSString *)valueToMatch { - - // Case-insensitive; matches a whitespace-delimited word - - NSMutableArray *tags = [NSMutableArray array]; - - for (RSHTMLTag *tag in self.tags) { - - if (tag.type != RSHTMLTagTypeLink || RSParserStringIsEmpty(urlStringFromDictionary(tag.attributes))) { - continue; - } - NSString *oneRelValue = relValue(tag.attributes); - if (oneRelValue) { - NSArray *relValues = [oneRelValue componentsSeparatedByCharactersInSet:NSCharacterSet.whitespaceAndNewlineCharacterSet]; - - for (NSString *relValue in relValues) { - if ([relValue compare:valueToMatch options:NSCaseInsensitiveSearch] == NSOrderedSame) { - [tags addObject:tag]; - break; - } - } - } - } - - return tags; -} - - -- (NSArray *)appleTouchIconTags { - - NSMutableArray *tags = [NSMutableArray new]; - - for (RSHTMLTag *tag in self.tags) { - - if (tag.type != RSHTMLTagTypeLink) { - continue; - } - NSString *oneRelValue = relValue(tag.attributes).lowercaseString; - if ([oneRelValue isEqualToString:kAppleTouchIconValue] || [oneRelValue isEqualToString:kAppleTouchIconPrecomposedValue]) { - [tags addObject:tag]; - } - } - - return tags; -} - - -- (NSArray *)feedLinkTags { - - NSMutableArray *tags = [NSMutableArray new]; - - for (RSHTMLTag *tag in self.tags) { - - if (tag.type != RSHTMLTagTypeLink) { - continue; - } - - NSDictionary *oneDictionary = tag.attributes; - NSString *oneRelValue = relValue(oneDictionary).lowercaseString; - if (![oneRelValue isEqualToString:kAlternateKey]) { - continue; - } - - NSString *oneType = [oneDictionary rsparser_objectForCaseInsensitiveKey:kTypeKey]; - if (!typeIsFeedType(oneType)) { - continue; - } - - if (RSParserStringIsEmpty(urlStringFromDictionary(oneDictionary))) { - continue; - } - - [tags addObject:tag]; - } - - return tags; -} - -- (NSArray *)faviconLinks { - NSMutableArray *urls = [NSMutableArray array]; - - for (RSHTMLMetadataFavicon *favicon in self.favicons) { - [urls addObject:favicon.urlString]; - } - - return urls; -} - -- (NSArray *)resolvedFaviconLinks { - NSArray *tags = [self linkTagsWithMatchingRel:kIconRelValue]; - NSMutableArray *links = [NSMutableArray array]; - NSMutableSet *seenHrefs = [NSMutableSet setWithCapacity:tags.count]; - - for (RSHTMLTag *tag in tags) { - RSHTMLMetadataFavicon *link = [[RSHTMLMetadataFavicon alloc] initWithTag:tag baseURLString:self.baseURLString]; - NSString *urlString = link.urlString; - if (urlString == nil) { - continue; - } - if (![seenHrefs containsObject:urlString]) { - [links addObject:link]; - [seenHrefs addObject:urlString]; - } - } - - return links; -} - -@end - - -static NSString *relValue(NSDictionary *d) { - - return [d rsparser_objectForCaseInsensitiveKey:kRelKey]; -} - - -static NSString *urlStringFromDictionary(NSDictionary *d) { - - NSString *urlString = [d rsparser_objectForCaseInsensitiveKey:kHrefKey]; - if (urlString) { - return urlString; - } - - return [d rsparser_objectForCaseInsensitiveKey:kSrcKey]; -} - - -static NSString *absoluteURLStringWithRelativeURLString(NSString *relativeURLString, NSString *baseURLString) { - - NSURL *url = [NSURL URLWithString:baseURLString]; - if (!url) { - return nil; - } - - NSURL *absoluteURL = [NSURL URLWithString:relativeURLString relativeToURL:url]; - return absoluteURL.absoluteURL.standardizedURL.absoluteString; -} - - -static NSString *absoluteURLStringWithDictionary(NSDictionary *d, NSString *baseURLString) { - - NSString *urlString = urlStringFromDictionary(d); - if (RSParserStringIsEmpty(urlString)) { - return nil; - } - return absoluteURLStringWithRelativeURLString(urlString, baseURLString); -} - - -static NSArray *objectsOfClassWithTags(Class class, NSArray *tags, NSString *baseURLString) { - - NSMutableArray *objects = [NSMutableArray new]; - - for (RSHTMLTag *tag in tags) { - - id oneObject = [[class alloc] initWithTag:tag baseURLString:baseURLString]; - if (oneObject) { - [objects addObject:oneObject]; - } - } - - return objects; -} - - -static BOOL typeIsFeedType(NSString *type) { - - type = type.lowercaseString; - return [type hasSuffix:kRSSSuffix] || [type hasSuffix:kAtomSuffix] || [type hasSuffix:kJSONSuffix]; -} - - -@implementation RSHTMLMetadataAppleTouchIcon - -- (instancetype)initWithTag:(RSHTMLTag *)tag baseURLString:(NSString *)baseURLString { - - self = [super init]; - if (!self) { - return nil; - } - - NSDictionary *d = tag.attributes; - _urlString = absoluteURLStringWithDictionary(d, baseURLString); - _sizes = [d rsparser_objectForCaseInsensitiveKey:kSizesKey]; - _rel = [d rsparser_objectForCaseInsensitiveKey:kRelKey]; - - _size = CGSizeZero; - if (_sizes) { - NSArray *components = [_sizes componentsSeparatedByString:@"x"]; - if (components.count == 2) { - CGFloat width = [components[0] floatValue]; - CGFloat height = [components[1] floatValue]; - _size = CGSizeMake(width, height); - } - } - - return self; -} - -@end - - -@implementation RSHTMLMetadataFeedLink - -- (instancetype)initWithTag:(RSHTMLTag *)tag baseURLString:(NSString *)baseURLString { - - self = [super init]; - if (!self) { - return nil; - } - - NSDictionary *d = tag.attributes; - _urlString = absoluteURLStringWithDictionary(d, baseURLString); - _title = [d rsparser_objectForCaseInsensitiveKey:kTitleKey]; - _type = [d rsparser_objectForCaseInsensitiveKey:kTypeKey]; - - return self; -} - -@end - -@implementation RSHTMLMetadataFavicon - -- (instancetype)initWithTag:(RSHTMLTag *)tag baseURLString:(NSString *)baseURLString { - - self = [super init]; - if (!self) { - return nil; - } - - NSDictionary *d = tag.attributes; - _urlString = absoluteURLStringWithDictionary(d, baseURLString); - _type = [d rsparser_objectForCaseInsensitiveKey:kTypeKey]; - - return self; -} - -@end - -@interface RSHTMLOpenGraphImage () - -@property (nonatomic, readwrite) NSString *url; -@property (nonatomic, readwrite) NSString *secureURL; -@property (nonatomic, readwrite) NSString *mimeType; -@property (nonatomic, readwrite) CGFloat width; -@property (nonatomic, readwrite) CGFloat height; -@property (nonatomic, readwrite) NSString *altText; - -@end - -@implementation RSHTMLOpenGraphImage - - -@end - -@interface RSHTMLOpenGraphProperties () - -@property (nonatomic) NSMutableArray *ogImages; -@end - -@implementation RSHTMLOpenGraphProperties - -- (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray *)tags { - - self = [super init]; - if (!self) { - return nil; - } - - _ogImages = [NSMutableArray new]; - - [self parseTags:tags]; - return self; -} - - -- (RSHTMLOpenGraphImage *)currentImage { - - return self.ogImages.lastObject; -} - - -- (RSHTMLOpenGraphImage *)pushImage { - - RSHTMLOpenGraphImage *image = [RSHTMLOpenGraphImage new]; - [self.ogImages addObject:image]; - return image; -} - -- (RSHTMLOpenGraphImage *)ensureImage { - - RSHTMLOpenGraphImage *image = [self currentImage]; - if (image != nil) { - return image; - } - return [self pushImage]; -} - - -- (NSArray *)images { - - return self.ogImages; -} - -static NSString *ogPrefix = @"og:"; -static NSString *ogImage = @"og:image"; -static NSString *ogImageURL = @"og:image:url"; -static NSString *ogImageSecureURL = @"og:image:secure_url"; -static NSString *ogImageType = @"og:image:type"; -static NSString *ogImageWidth = @"og:image:width"; -static NSString *ogImageHeight = @"og:image:height"; -static NSString *ogImageAlt = @"og:image:alt"; -static NSString *ogPropertyKey = @"property"; -static NSString *ogContentKey = @"content"; - -- (void)parseTags:(NSArray *)tags { - - for (RSHTMLTag *tag in tags) { - - if (tag.type != RSHTMLTagTypeMeta) { - continue; - } - - NSString *propertyName = tag.attributes[ogPropertyKey]; - if (!propertyName || ![propertyName hasPrefix:ogPrefix]) { - continue; - } - NSString *content = tag.attributes[ogContentKey]; - if (!content) { - continue; - } - - if ([propertyName isEqualToString:ogImage]) { - RSHTMLOpenGraphImage *image = [self currentImage]; - if (!image || image.url) { // Most likely case, since og:image will probably appear before other image attributes. - image = [self pushImage]; - } - image.url = content; - } - - else if ([propertyName isEqualToString:ogImageURL]) { - [self ensureImage].url = content; - } - else if ([propertyName isEqualToString:ogImageSecureURL]) { - [self ensureImage].secureURL = content; - } - else if ([propertyName isEqualToString:ogImageType]) { - [self ensureImage].mimeType = content; - } - else if ([propertyName isEqualToString:ogImageAlt]) { - [self ensureImage].altText = content; - } - else if ([propertyName isEqualToString:ogImageWidth]) { - [self ensureImage].width = [content floatValue]; - } - else if ([propertyName isEqualToString:ogImageHeight]) { - [self ensureImage].height = [content floatValue]; - } - } -} - -@end - -@implementation RSHTMLTwitterProperties - -static NSString *twitterNameKey = @"name"; -static NSString *twitterContentKey = @"content"; -static NSString *twitterImageSrc = @"twitter:image:src"; - -- (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray *)tags { - - self = [super init]; - if (!self) { - return nil; - } - - for (RSHTMLTag *tag in tags) { - - if (tag.type != RSHTMLTagTypeMeta) { - continue; - } - NSString *name = tag.attributes[twitterNameKey]; - if (!name || ![name isEqualToString:twitterImageSrc]) { - continue; - } - NSString *content = tag.attributes[twitterContentKey]; - if (!content || content.length < 1) { - continue; - } - _imageURL = content; - break; - } - - return self; -} - -@end - diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadataParser.h b/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadataParser.h deleted file mode 100755 index f9361905c..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadataParser.h +++ /dev/null @@ -1,24 +0,0 @@ -// -// RSHTMLMetadataParser.h -// RSParser -// -// Created by Brent Simmons on 3/6/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - - -@class RSHTMLMetadata; -@class ParserData; - -NS_ASSUME_NONNULL_BEGIN - -@interface RSHTMLMetadataParser : NSObject - -+ (RSHTMLMetadata *)HTMLMetadataWithParserData:(ParserData *)parserData; - - -@end - -NS_ASSUME_NONNULL_END diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadataParser.m b/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadataParser.m deleted file mode 100755 index 254fd109d..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSHTMLMetadataParser.m +++ /dev/null @@ -1,151 +0,0 @@ -// -// RSHTMLMetadataParser.m -// RSParser -// -// Created by Brent Simmons on 3/6/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -#import "RSHTMLMetadataParser.h" -#import "RSHTMLMetadata.h" -#import "RSSAXHTMLParser.h" -#import "RSSAXHTMLParser.h" -#import "RSSAXParser.h" -#import "RSParserInternal.h" -#import "ParserData.h" -#import "RSHTMLTag.h" - -#import - - -@interface RSHTMLMetadataParser () - -@property (nonatomic, readonly) ParserData *parserData; -@property (nonatomic, readwrite) RSHTMLMetadata *metadata; -@property (nonatomic) NSMutableArray *tags; -@property (nonatomic) BOOL didFinishParsing; -@property (nonatomic) BOOL shouldScanPastHeadSection; - -@end - - -@implementation RSHTMLMetadataParser - - -#pragma mark - Class Methods - -+ (RSHTMLMetadata *)HTMLMetadataWithParserData:(ParserData *)parserData { - - RSHTMLMetadataParser *parser = [[self alloc] initWithParserData:parserData]; - return parser.metadata; -} - - -#pragma mark - Init - -- (instancetype)initWithParserData:(ParserData *)parserData { - - NSParameterAssert(parserData.data); - NSParameterAssert(parserData.url); - - self = [super init]; - if (!self) { - return nil; - } - - _parserData = parserData; - _tags = [NSMutableArray new]; - - // YouTube has a weird bug where, on some pages, it puts the feed link tag after the head section, in the body section. - // This allows for a special case where we continue to scan after the head section. - // (Yes, this match could yield false positives, but it’s harmless.) - _shouldScanPastHeadSection = [parserData.url rangeOfString:@"youtube" options:NSCaseInsensitiveSearch].location != NSNotFound; - - [self parse]; - - return self; -} - - -#pragma mark - Parse - -- (void)parse { - - RSSAXHTMLParser *parser = [[RSSAXHTMLParser alloc] initWithDelegate:self]; - [parser parseData:self.parserData.data]; - [parser finishParsing]; - - self.metadata = [[RSHTMLMetadata alloc] initWithURLString:self.parserData.url tags:self.tags]; -} - - -static NSString *kHrefKey = @"href"; -static NSString *kSrcKey = @"src"; -static NSString *kRelKey = @"rel"; - -- (NSString *)linkForDictionary:(NSDictionary *)d { - - NSString *link = [d rsparser_objectForCaseInsensitiveKey:kHrefKey]; - if (link) { - return link; - } - - return [d rsparser_objectForCaseInsensitiveKey:kSrcKey]; -} - -- (void)handleLinkAttributes:(NSDictionary *)d { - - if (RSParserStringIsEmpty([d rsparser_objectForCaseInsensitiveKey:kRelKey])) { - return; - } - if (RSParserStringIsEmpty([self linkForDictionary:d])) { - return; - } - - RSHTMLTag *tag = [RSHTMLTag linkTagWithAttributes:d]; - [self.tags addObject:tag]; -} - -- (void)handleMetaAttributes:(NSDictionary *)d { - - RSHTMLTag *tag = [RSHTMLTag metaTagWithAttributes:d]; - [self.tags addObject:tag]; -} - -#pragma mark - RSSAXHTMLParserDelegate - -static const char *kBody = "body"; -static const NSInteger kBodyLength = 5; -static const char *kLink = "link"; -static const NSInteger kLinkLength = 5; -static const char *kMeta = "meta"; -static const NSInteger kMetaLength = 5; - -- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLStartElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes { - - if (self.didFinishParsing) { - return; - } - - if (RSSAXEqualTags(localName, kBody, kBodyLength) && !self.shouldScanPastHeadSection) { - self.didFinishParsing = YES; - return; - } - - if (RSSAXEqualTags(localName, kLink, kLinkLength)) { - NSDictionary *d = [SAXParser attributesDictionary:attributes]; - if (!RSParserObjectIsEmpty(d)) { - [self handleLinkAttributes:d]; - } - return; - } - - if (RSSAXEqualTags(localName, kMeta, kMetaLength)) { - NSDictionary *d = [SAXParser attributesDictionary:attributes]; - if (!RSParserObjectIsEmpty(d)) { - [self handleMetaAttributes:d]; - } - } -} - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSParsedArticle.h b/Modules/ParserObjC/Sources/ParserObjC/RSParsedArticle.h deleted file mode 100755 index a2bfb3175..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSParsedArticle.h +++ /dev/null @@ -1,37 +0,0 @@ -// -// RSParsedArticle.h -// RSParser -// -// Created by Brent Simmons on 12/6/14. -// Copyright (c) 2014 Ranchero Software LLC. All rights reserved. -// - -@import Foundation; - -@class RSParsedEnclosure; -@class RSParsedAuthor; - -@interface RSParsedArticle : NSObject - -- (nonnull instancetype)initWithFeedURL:(NSString * _Nonnull)feedURL; - -@property (nonatomic, readonly, nonnull) NSString *feedURL; -@property (nonatomic, nonnull) NSString *articleID; //guid, if present, or calculated from other attributes. Should be unique to the feed, but not necessarily unique across different feeds. (Not suitable for a database ID.) - -@property (nonatomic, nullable) NSString *guid; -@property (nonatomic, nullable) NSString *title; -@property (nonatomic, nullable) NSString *body; -@property (nonatomic, nullable) NSString *link; -@property (nonatomic, nullable) NSString *permalink; -@property (nonatomic, nullable) NSSet *authors; -@property (nonatomic, nullable) NSSet *enclosures; -@property (nonatomic, nullable) NSDate *datePublished; -@property (nonatomic, nullable) NSDate *dateModified; -@property (nonatomic, nonnull) NSDate *dateParsed; -@property (nonatomic, nullable) NSString *language; - -- (void)addEnclosure:(RSParsedEnclosure *_Nonnull)enclosure; -- (void)addAuthor:(RSParsedAuthor *_Nonnull)author; - -@end - diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSParsedArticle.m b/Modules/ParserObjC/Sources/ParserObjC/RSParsedArticle.m deleted file mode 100755 index b94930a79..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSParsedArticle.m +++ /dev/null @@ -1,134 +0,0 @@ -// -// RSParsedArticle.m -// RSParser -// -// Created by Brent Simmons on 12/6/14. -// Copyright (c) 2014 Ranchero Software LLC. All rights reserved. -// - - -#import "RSParsedArticle.h" -#import "RSParserInternal.h" -#import "NSString+RSParser.h" -#import "RSParsedAuthor.h" -#import "RSParsedEnclosure.h" - - - -@implementation RSParsedArticle - - -#pragma mark - Init - -- (instancetype)initWithFeedURL:(NSString *)feedURL { - - NSParameterAssert(feedURL != nil); - - self = [super init]; - if (!self) { - return nil; - } - - _feedURL = feedURL; - _dateParsed = [NSDate date]; - - return self; -} - - -#pragma mark - Enclosures - -- (void)addEnclosure:(RSParsedEnclosure *)enclosure { - - if (self.enclosures) { - self.enclosures = [self.enclosures setByAddingObject:enclosure]; - } - else { - self.enclosures = [NSSet setWithObject:enclosure]; - } -} - -#pragma mark - Authors - -- (void)addAuthor:(RSParsedAuthor *)author { - - if (self.authors) { - self.authors = [self.authors setByAddingObject:author]; - } - else { - self.authors = [NSSet setWithObject:author]; - } -} - -#pragma mark - articleID - -- (NSString *)articleID { - - if (self.guid) { - return self.guid; - } - - if (!_articleID) { - _articleID = [self calculatedArticleID]; - } - - return _articleID; -} - - -- (NSString *)calculatedArticleID { - - /*Concatenate a combination of properties when no guid. Then hash the result. - In general, feeds should have guids. When they don't, re-runs are very likely, - because there's no other 100% reliable way to determine identity. - This is intended to create an ID unique inside a feed, but not globally unique. - Not suitable for a database ID, in other words.*/ - - NSMutableString *s = [NSMutableString stringWithString:@""]; - - NSString *datePublishedTimeStampString = nil; - if (self.datePublished) { - datePublishedTimeStampString = [NSString stringWithFormat:@"%.0f", self.datePublished.timeIntervalSince1970]; - } - - // Ideally we have a permalink and a pubDate. Either one would probably be a good guid, but together they should be rock-solid. (In theory. Feeds are buggy, though.) - if (!RSParserStringIsEmpty(self.permalink) && datePublishedTimeStampString) { - [s appendString:self.permalink]; - [s appendString:datePublishedTimeStampString]; - } - - else if (!RSParserStringIsEmpty(self.link) && datePublishedTimeStampString) { - [s appendString:self.link]; - [s appendString:datePublishedTimeStampString]; - } - - else if (!RSParserStringIsEmpty(self.title) && datePublishedTimeStampString) { - [s appendString:self.title]; - [s appendString:datePublishedTimeStampString]; - } - - else if (datePublishedTimeStampString) { - [s appendString:datePublishedTimeStampString]; - } - - else if (!RSParserStringIsEmpty(self.permalink)) { - [s appendString:self.permalink]; - } - - else if (!RSParserStringIsEmpty(self.link)) { - [s appendString:self.link]; - } - - else if (!RSParserStringIsEmpty(self.title)) { - [s appendString:self.title]; - } - - else if (!RSParserStringIsEmpty(self.body)) { - [s appendString:self.body]; - } - - return [s rsparser_md5Hash]; -} - -@end - diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSParsedFeed.h b/Modules/ParserObjC/Sources/ParserObjC/RSParsedFeed.h deleted file mode 100755 index 80be90fed..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSParsedFeed.h +++ /dev/null @@ -1,23 +0,0 @@ -// -// RSParsedFeed.h -// RSParser -// -// Created by Brent Simmons on 7/12/15. -// Copyright © 2015 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -@class RSParsedArticle; - -@interface RSParsedFeed : NSObject - -- (nonnull instancetype)initWithURLString:(NSString * _Nonnull)urlString title:(NSString * _Nullable)title link:(NSString * _Nullable)link language:(NSString * _Nullable)language articles:(NSArray * _Nonnull)articles; - -@property (nonatomic, readonly, nonnull) NSString *urlString; -@property (nonatomic, readonly, nullable) NSString *title; -@property (nonatomic, readonly, nullable) NSString *link; -@property (nonatomic, readonly, nullable) NSString *language; -@property (nonatomic, readonly, nonnull) NSSet *articles; - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSParsedFeed.m b/Modules/ParserObjC/Sources/ParserObjC/RSParsedFeed.m deleted file mode 100755 index ef0c42e76..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSParsedFeed.m +++ /dev/null @@ -1,32 +0,0 @@ -// -// RSParsedFeed.m -// RSParser -// -// Created by Brent Simmons on 7/12/15. -// Copyright © 2015 Ranchero Software, LLC. All rights reserved. -// - -#import "RSParsedFeed.h" - - - -@implementation RSParsedFeed - -- (instancetype)initWithURLString:(NSString *)urlString title:(NSString *)title link:(NSString *)link language:(NSString *)language articles:(NSSet *)articles { - - self = [super init]; - if (!self) { - return nil; - } - - _urlString = urlString; - _title = title; - _link = link; - _language = language; - _articles = articles; - - return self; -} - - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSParserInternal.h b/Modules/ParserObjC/Sources/ParserObjC/RSParserInternal.h deleted file mode 100755 index 76209e076..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSParserInternal.h +++ /dev/null @@ -1,24 +0,0 @@ -// -// RSParserInternal.h -// RSParser -// -// Created by Brent Simmons on 12/26/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -NS_ASSUME_NONNULL_BEGIN - -BOOL RSParserObjectIsEmpty(id _Nullable obj); -BOOL RSParserStringIsEmpty(NSString * _Nullable s); - - -@interface NSDictionary (RSParserInternal) - -- (nullable id)rsparser_objectForCaseInsensitiveKey:(NSString *)key; - -@end - -NS_ASSUME_NONNULL_END - diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSParserInternal.m b/Modules/ParserObjC/Sources/ParserObjC/RSParserInternal.m deleted file mode 100755 index 4ba6f8a97..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSParserInternal.m +++ /dev/null @@ -1,61 +0,0 @@ -// -// RSParserInternal.m -// RSParser -// -// Created by Brent Simmons on 12/26/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - - -#import "RSParserInternal.h" -#import - - -static BOOL RSParserIsNil(id obj) { - - return obj == nil || obj == [NSNull null]; -} - -BOOL RSParserObjectIsEmpty(id obj) { - - if (RSParserIsNil(obj)) { - return YES; - } - - if ([obj respondsToSelector:@selector(count)]) { - return [obj count] < 1; - } - - if ([obj respondsToSelector:@selector(length)]) { - return [obj length] < 1; - } - - return NO; /*Shouldn't get here very often.*/ -} - -BOOL RSParserStringIsEmpty(NSString *s) { - - return RSParserIsNil(s) || s.length < 1; -} - - -@implementation NSDictionary (RSParserInternal) - -- (nullable id)rsparser_objectForCaseInsensitiveKey:(NSString *)key { - - id obj = self[key]; - if (obj) { - return obj; - } - - for (NSString *oneKey in self.allKeys) { - - if ([oneKey isKindOfClass:[NSString class]] && [key caseInsensitiveCompare:oneKey] == NSOrderedSame) { - return self[oneKey]; - } - } - - return nil; -} - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSRSSParser.h b/Modules/ParserObjC/Sources/ParserObjC/RSRSSParser.h deleted file mode 100755 index 26e97d0a2..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSRSSParser.h +++ /dev/null @@ -1,19 +0,0 @@ -// -// RSRSSParser.h -// RSParser -// -// Created by Brent Simmons on 1/6/15. -// Copyright (c) 2015 Ranchero Software LLC. All rights reserved. -// - -@import Foundation; - -@class ParserData; -@class RSParsedFeed; - -@interface RSRSSParser : NSObject - -+ (RSParsedFeed *)parseFeedWithData:(ParserData *)parserData; - - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSRSSParser.m b/Modules/ParserObjC/Sources/ParserObjC/RSRSSParser.m deleted file mode 100755 index 455320ab2..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSRSSParser.m +++ /dev/null @@ -1,523 +0,0 @@ -// -// RSRSSParser.m -// RSParser -// -// Created by Brent Simmons on 1/6/15. -// Copyright (c) 2015 Ranchero Software LLC. All rights reserved. -// - -#import "RSRSSParser.h" -#import "RSSAXParser.h" -#import "RSParsedFeed.h" -#import "RSParsedArticle.h" -#import "RSParserInternal.h" -#import "NSString+RSParser.h" -#import "RSDateParser.h" -#import "ParserData.h" -#import "RSParsedEnclosure.h" -#import "RSParsedAuthor.h" - - - -#import - - -@interface RSRSSParser () - -@property (nonatomic) NSData *feedData; -@property (nonatomic) NSString *urlString; -@property (nonatomic) NSDictionary *currentAttributes; -@property (nonatomic) RSSAXParser *parser; -@property (nonatomic) NSMutableArray *articles; -@property (nonatomic) BOOL parsingArticle; -@property (nonatomic) BOOL parsingAuthor; -@property (nonatomic, readonly) RSParsedArticle *currentArticle; -@property (nonatomic) BOOL parsingChannelImage; -@property (nonatomic, readonly) NSDate *currentDate; -@property (nonatomic) BOOL endRSSFound; -@property (nonatomic) NSString *link; -@property (nonatomic) NSString *title; -@property (nonatomic) NSDate *dateParsed; -@property (nonatomic) BOOL isRDF; -@property (nonatomic) NSString *language; - -@end - - -@implementation RSRSSParser - -#pragma mark - Class Methods - -+ (RSParsedFeed *)parseFeedWithData:(ParserData *)parserData { - - RSRSSParser *parser = [[[self class] alloc] initWithParserData:parserData]; - return [parser parseFeed]; -} - -#pragma mark - Init - -- (instancetype)initWithParserData:(ParserData *)parserData { - - self = [super init]; - if (!self) { - return nil; - } - - _feedData = parserData.data; - _urlString = parserData.url; - _parser = [[RSSAXParser alloc] initWithDelegate:self]; - _articles = [NSMutableArray new]; - - return self; -} - -#pragma mark - API - -- (RSParsedFeed *)parseFeed { - - [self parse]; - - RSParsedFeed *parsedFeed = [[RSParsedFeed alloc] initWithURLString:self.urlString title:self.title link:self.link language:self.language articles:self.articles]; - - return parsedFeed; -} - - -#pragma mark - Constants - -static NSString *kIsPermaLinkKey = @"isPermaLink"; -static NSString *kURLKey = @"url"; -static NSString *kLengthKey = @"length"; -static NSString *kTypeKey = @"type"; -static NSString *kFalseValue = @"false"; -static NSString *kTrueValue = @"true"; -static NSString *kContentEncodedKey = @"content:encoded"; -static NSString *kDCDateKey = @"dc:date"; -static NSString *kDCCreatorKey = @"dc:creator"; -static NSString *kRDFAboutKey = @"rdf:about"; - -static const char *kItem = "item"; -static const NSInteger kItemLength = 5; - -static const char *kImage = "image"; -static const NSInteger kImageLength = 6; - -static const char *kLink = "link"; -static const NSInteger kLinkLength = 5; - -static const char *kTitle = "title"; -static const NSInteger kTitleLength = 6; - -static const char *kDC = "dc"; -static const NSInteger kDCLength = 3; - -static const char *kCreator = "creator"; -static const NSInteger kCreatorLength = 8; - -static const char *kDate = "date"; -static const NSInteger kDateLength = 5; - -static const char *kContent = "content"; -static const NSInteger kContentLength = 8; - -static const char *kEncoded = "encoded"; -static const NSInteger kEncodedLength = 8; - -static const char *kGuid = "guid"; -static const NSInteger kGuidLength = 5; - -static const char *kPubDate = "pubDate"; -static const NSInteger kPubDateLength = 8; - -static const char *kAuthor = "author"; -static const NSInteger kAuthorLength = 7; - -static const char *kDescription = "description"; -static const NSInteger kDescriptionLength = 12; - -static const char *kRSS = "rss"; -static const NSInteger kRSSLength = 4; - -static const char *kURL = "url"; -static const NSInteger kURLLength = 4; - -static const char *kLength = "length"; -static const NSInteger kLengthLength = 7; - -static const char *kType = "type"; -static const NSInteger kTypeLength = 5; - -static const char *kIsPermaLink = "isPermaLink"; -static const NSInteger kIsPermaLinkLength = 12; - -static const char *kRDF = "rdf"; -static const NSInteger kRDFlength = 4; - -static const char *kAbout = "about"; -static const NSInteger kAboutLength = 6; - -static const char *kFalse = "false"; -static const NSInteger kFalseLength = 6; - -static const char *kTrue = "true"; -static const NSInteger kTrueLength = 5; - -static const char *kUppercaseRDF = "RDF"; -static const NSInteger kUppercaseRDFLength = 4; - -static const char *kEnclosure = "enclosure"; -static const NSInteger kEnclosureLength = 10; - -static const char *kLanguage = "language"; -static const NSInteger kLanguageLength = 9; - -#pragma mark - Parsing - -- (void)parse { - - self.dateParsed = [NSDate date]; - - @autoreleasepool { - [self.parser parseData:self.feedData]; - [self.parser finishParsing]; - } -} - - -- (void)addArticle { - - RSParsedArticle *article = [[RSParsedArticle alloc] initWithFeedURL:self.urlString]; - article.dateParsed = self.dateParsed; - - [self.articles addObject:article]; -} - - -- (RSParsedArticle *)currentArticle { - - return self.articles.lastObject; -} - - -- (void)addFeedElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix { - - if (prefix != NULL) { - return; - } - - if (RSSAXEqualTags(localName, kLink, kLinkLength)) { - if (!self.link) { - self.link = [self currentString]; - } - } - - else if (RSSAXEqualTags(localName, kTitle, kTitleLength)) { - self.title = [self currentString]; - } - - else if (RSSAXEqualTags(localName, kLanguage, kLanguageLength)) { - self.language = [self currentString]; - } -} - -- (void)addAuthorWithString:(NSString *)authorString { - - if (RSParserStringIsEmpty(authorString)) { - return; - } - - RSParsedAuthor *author = [RSParsedAuthor authorWithSingleString:[self currentString]]; - [self.currentArticle addAuthor:author]; -} - -- (void)addDCElement:(const xmlChar *)localName { - - if (RSSAXEqualTags(localName, kCreator, kCreatorLength)) { - [self addAuthorWithString:[self currentString]]; - } - else if (RSSAXEqualTags(localName, kDate, kDateLength)) { - self.currentArticle.datePublished = self.currentDate; - } -} - - -- (void)addGuid { - - NSString *guid = [self currentString]; - self.currentArticle.guid = guid; - - NSString *isPermaLinkValue = [self.currentAttributes rsparser_objectForCaseInsensitiveKey:@"ispermalink"]; - if (!isPermaLinkValue || ![isPermaLinkValue isEqualToString:@"false"]) { - if ([self stringIsProbablyAURLOrRelativePath:guid]) { - self.currentArticle.permalink = [self urlString:guid]; - } - } -} - -- (void)addEnclosure { - - NSDictionary *attributes = self.currentAttributes; - NSString *url = attributes[kURLKey]; - if (!url || url.length < 1) { - return; - } - - RSParsedEnclosure *enclosure = [[RSParsedEnclosure alloc] init]; - enclosure.url = url; - enclosure.length = [attributes[kLengthKey] integerValue]; - enclosure.mimeType = attributes[kTypeKey]; - - [self.currentArticle addEnclosure:enclosure]; -} - -- (BOOL)stringIsProbablyAURLOrRelativePath:(NSString *)s { - - /*The RSS guid is defined as a permalink, except when it appears like this: - some—identifier - However, people often seem to think it’s *not* a permalink by default, even - though it is. So we try to detect the situation where the value is not a URL string, - and not even a relative path. This may need to evolve over time as we find - feeds broken in different ways.*/ - - if (![s rsparser_contains:@"/"]) { - // This seems to be just about the best possible check. - // Bad guids are often just integers, for instance. - return NO; - } - - if ([s.lowercaseString hasPrefix:@"tag:"]) { // A common non-URL guid form - return NO; - } - return YES; -} - -- (NSString *)urlString:(NSString *)s { - - /*Resolve against home page URL (if available) or feed URL.*/ - - if ([[s lowercaseString] hasPrefix:@"http"]) { - return s; - } - - if (!self.link) { - //TODO: get feed URL and use that to resolve URL.*/ - return s; - } - - NSURL *baseURL = [NSURL URLWithString:self.link]; - if (!baseURL) { - return s; - } - - NSURL *resolvedURL = [NSURL URLWithString:s relativeToURL:baseURL]; - if (resolvedURL.absoluteString) { - return resolvedURL.absoluteString; - } - - return s; -} - - -- (NSString *)currentString { - - return self.parser.currentStringWithTrimmedWhitespace; -} - - -- (void)addArticleElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix { - - if (RSSAXEqualTags(prefix, kDC, kDCLength)) { - - [self addDCElement:localName]; - return; - } - - if (RSSAXEqualTags(prefix, kContent, kContentLength) && RSSAXEqualTags(localName, kEncoded, kEncodedLength)) { - NSString *s = [self currentString]; - if (!RSParserStringIsEmpty(s)) { - self.currentArticle.body = s; - } - return; - } - - if (prefix != NULL) { - return; - } - - if (RSSAXEqualTags(localName, kGuid, kGuidLength)) { - [self addGuid]; - } - else if (RSSAXEqualTags(localName, kPubDate, kPubDateLength)) { - self.currentArticle.datePublished = self.currentDate; - } - else if (RSSAXEqualTags(localName, kAuthor, kAuthorLength)) { - [self addAuthorWithString:[self currentString]]; - } - else if (RSSAXEqualTags(localName, kLink, kLinkLength)) { - self.currentArticle.link = [self urlString:[self currentString]]; - } - else if (RSSAXEqualTags(localName, kDescription, kDescriptionLength)) { - - if (!self.currentArticle.body) { - self.currentArticle.body = [self currentString]; - } - } - else if (!self.parsingAuthor && RSSAXEqualTags(localName, kTitle, kTitleLength)) { - NSString *articleTitle = [self currentString]; - if (articleTitle != nil) { - self.currentArticle.title = articleTitle; - } - } - else if (RSSAXEqualTags(localName, kEnclosure, kEnclosureLength)) { - [self addEnclosure]; - } -} - - -- (NSDate *)currentDate { - - return RSDateWithBytes(self.parser.currentCharacters.bytes, self.parser.currentCharacters.length); -} - - -#pragma mark - RSSAXParserDelegate - -- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes { - - if (self.endRSSFound) { - return; - } - - if (RSSAXEqualTags(localName, kUppercaseRDF, kUppercaseRDFLength)) { - self.isRDF = YES; - return; - } - - NSDictionary *xmlAttributes = nil; - if ((self.isRDF && RSSAXEqualTags(localName, kItem, kItemLength)) || RSSAXEqualTags(localName, kGuid, kGuidLength) || RSSAXEqualTags(localName, kEnclosure, kEnclosureLength)) { - xmlAttributes = [self.parser attributesDictionary:attributes numberOfAttributes:numberOfAttributes]; - } - if (self.currentAttributes != xmlAttributes) { - self.currentAttributes = xmlAttributes; - } - - if (!prefix && RSSAXEqualTags(localName, kItem, kItemLength)) { - - [self addArticle]; - self.parsingArticle = YES; - - if (self.isRDF && xmlAttributes && xmlAttributes[kRDFAboutKey]) { /*RSS 1.0 guid*/ - self.currentArticle.guid = xmlAttributes[kRDFAboutKey]; - self.currentArticle.permalink = self.currentArticle.guid; - } - } - - else if (!prefix && RSSAXEqualTags(localName, kImage, kImageLength)) { - self.parsingChannelImage = YES; - } - else if (!prefix && RSSAXEqualTags(localName, kAuthor, kAuthorLength)) { - if (self.parsingArticle) { - self.parsingAuthor = true; - } - } - - if (!self.parsingChannelImage) { - [self.parser beginStoringCharacters]; - } -} - - -- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri { - - if (self.endRSSFound) { - return; - } - - if (self.isRDF && RSSAXEqualTags(localName, kUppercaseRDF, kUppercaseRDFLength)) { - self.endRSSFound = YES; - } - - else if (RSSAXEqualTags(localName, kRSS, kRSSLength)) { - self.endRSSFound = YES; - } - - else if (RSSAXEqualTags(localName, kImage, kImageLength)) { - self.parsingChannelImage = NO; - } - - else if (RSSAXEqualTags(localName, kItem, kItemLength)) { - self.parsingArticle = NO; - } - - else if (self.parsingArticle) { - [self addArticleElement:localName prefix:prefix]; - if (RSSAXEqualTags(localName, kAuthor, kAuthorLength)) { - self.parsingAuthor = NO; - } - } - - else if (!self.parsingChannelImage) { - [self addFeedElement:localName prefix:prefix]; - } -} - - -- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const xmlChar *)name prefix:(const xmlChar *)prefix { - - if (RSSAXEqualTags(prefix, kRDF, kRDFlength)) { - - if (RSSAXEqualTags(name, kAbout, kAboutLength)) { - return kRDFAboutKey; - } - - return nil; - } - - if (prefix) { - return nil; - } - - if (RSSAXEqualTags(name, kIsPermaLink, kIsPermaLinkLength)) { - return kIsPermaLinkKey; - } - - if (RSSAXEqualTags(name, kURL, kURLLength)) { - return kURLKey; - } - - if (RSSAXEqualTags(name, kLength, kLengthLength)) { - return kLengthKey; - } - - if (RSSAXEqualTags(name, kType, kTypeLength)) { - return kTypeKey; - } - - return nil; -} - - -static BOOL equalBytes(const void *bytes1, const void *bytes2, NSUInteger length) { - - return memcmp(bytes1, bytes2, length) == 0; -} - - -- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForValue:(const void *)bytes length:(NSUInteger)length { - - static const NSUInteger falseLength = kFalseLength - 1; - static const NSUInteger trueLength = kTrueLength - 1; - - if (length == falseLength && equalBytes(bytes, kFalse, falseLength)) { - return kFalseValue; - } - - if (length == trueLength && equalBytes(bytes, kTrue, trueLength)) { - return kTrueValue; - } - - return nil; -} - - -@end diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSSAXHTMLParser.h b/Modules/ParserObjC/Sources/ParserObjC/RSSAXHTMLParser.h deleted file mode 100755 index f67d60cf6..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSSAXHTMLParser.h +++ /dev/null @@ -1,55 +0,0 @@ -// -// RSSAXHTMLParser.h -// RSParser -// -// Created by Brent Simmons on 3/6/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - -NS_ASSUME_NONNULL_BEGIN - -@class RSSAXHTMLParser; - -@protocol RSSAXHTMLParserDelegate - -@optional - -- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLStartElement:(const unsigned char *)localName attributes:(const unsigned char *_Nullable*_Nullable)attributes; - -- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLEndElement:(nullable const unsigned char *)localName; - -// Length is guaranteed to be greater than 0. -- (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLCharactersFound:(nullable const unsigned char *)characters length:(NSUInteger)length; - -- (void)saxParserDidReachEndOfDocument:(RSSAXHTMLParser *)SAXParser; // If canceled, may not get called (but might). - -@end - - -@interface RSSAXHTMLParser : NSObject - - -- (instancetype)initWithDelegate:(id)delegate; - -- (void)parseData:(NSData *)data; -- (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes; -- (void)finishParsing; -- (void)cancel; - -@property (nullable, nonatomic, strong, readonly) NSData *currentCharacters; // nil if not storing characters. UTF-8 encoded. -@property (nullable, nonatomic, strong, readonly) NSString *currentString; // Convenience to get string version of currentCharacters. -@property (nullable, nonatomic, strong, readonly) NSString *currentStringWithTrimmedWhitespace; - -- (void)beginStoringCharacters; // Delegate can call from XMLStartElement. Characters will be available in XMLEndElement as currentCharacters property. Storing characters is stopped after each XMLEndElement. - -// Delegate can call from within XMLStartElement. - -- (nullable NSDictionary *)attributesDictionary:(const unsigned char *_Nullable*_Nullable)attributes; - - -@end - -NS_ASSUME_NONNULL_END - diff --git a/Modules/ParserObjC/Sources/ParserObjC/RSSAXHTMLParser.m b/Modules/ParserObjC/Sources/ParserObjC/RSSAXHTMLParser.m deleted file mode 100755 index 5df2d84fd..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/RSSAXHTMLParser.m +++ /dev/null @@ -1,321 +0,0 @@ -// -// RSSAXHTMLParser.m -// RSParser -// -// Created by Brent Simmons on 3/6/16. -// Copyright © 2016 Ranchero Software, LLC. All rights reserved. -// - -#import "RSSAXHTMLParser.h" -#import "RSSAXParser.h" -#import "RSParserInternal.h" - -#import -#import -#import - - - -@interface RSSAXHTMLParser () - -@property (nonatomic) id delegate; -@property (nonatomic, assign) htmlParserCtxtPtr context; -@property (nonatomic, assign) BOOL storingCharacters; -@property (nonatomic) NSMutableData *characters; -@property (nonatomic) BOOL delegateRespondsToStartElementMethod; -@property (nonatomic) BOOL delegateRespondsToEndElementMethod; -@property (nonatomic) BOOL delegateRespondsToCharactersFoundMethod; -@property (nonatomic) BOOL delegateRespondsToEndOfDocumentMethod; - -@end - - -@implementation RSSAXHTMLParser - - -+ (void)initialize { - - RSSAXInitLibXMLParser(); -} - - -#pragma mark - Init - -- (instancetype)initWithDelegate:(id)delegate { - - self = [super init]; - if (self == nil) - return nil; - - _delegate = delegate; - - if ([_delegate respondsToSelector:@selector(saxParser:XMLStartElement:attributes:)]) { - _delegateRespondsToStartElementMethod = YES; - } - if ([_delegate respondsToSelector:@selector(saxParser:XMLEndElement:)]) { - _delegateRespondsToEndElementMethod = YES; - } - if ([_delegate respondsToSelector:@selector(saxParser:XMLCharactersFound:length:)]) { - _delegateRespondsToCharactersFoundMethod = YES; - } - if ([_delegate respondsToSelector:@selector(saxParserDidReachEndOfDocument:)]) { - _delegateRespondsToEndOfDocumentMethod = YES; - } - - return self; -} - - -#pragma mark - Dealloc - -- (void)dealloc { - - if (_context != nil) { - htmlFreeParserCtxt(_context); - _context = nil; - } - _delegate = nil; -} - - -#pragma mark - API - -static xmlSAXHandler saxHandlerStruct; - -- (void)parseData:(NSData *)data { - - [self parseBytes:data.bytes numberOfBytes:data.length]; -} - - -- (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes { - - if (self.context == nil) { - - xmlCharEncoding characterEncoding = xmlDetectCharEncoding(bytes, (int)numberOfBytes); - self.context = htmlCreatePushParserCtxt(&saxHandlerStruct, (__bridge void *)self, nil, 0, nil, characterEncoding); - htmlCtxtUseOptions(self.context, XML_PARSE_RECOVER | XML_PARSE_NONET | HTML_PARSE_COMPACT); - } - - @autoreleasepool { - htmlParseChunk(self.context, (const char *)bytes, (int)numberOfBytes, 0); - } -} - - -- (void)finishParsing { - - NSAssert(self.context != nil, nil); - if (self.context == nil) - return; - - @autoreleasepool { - htmlParseChunk(self.context, nil, 0, 1); - htmlFreeParserCtxt(self.context); - self.context = nil; - self.characters = nil; - } -} - - -- (void)cancel { - - @autoreleasepool { - xmlStopParser(self.context); - } -} - - - -- (void)beginStoringCharacters { - self.storingCharacters = YES; - self.characters = [NSMutableData new]; -} - - -- (void)endStoringCharacters { - self.storingCharacters = NO; - self.characters = nil; -} - - -- (NSData *)currentCharacters { - - if (!self.storingCharacters) { - return nil; - } - - return self.characters; -} - - -- (NSString *)currentString { - - NSData *d = self.currentCharacters; - if (RSParserObjectIsEmpty(d)) { - return nil; - } - - return [[NSString alloc] initWithData:d encoding:NSUTF8StringEncoding]; -} - - -- (NSString *)currentStringWithTrimmedWhitespace { - - return [self.currentString stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]]; -} - - -#pragma mark - Attributes Dictionary - -- (NSDictionary *)attributesDictionary:(const xmlChar **)attributes { - - if (!attributes) { - return nil; - } - - NSMutableDictionary *d = [NSMutableDictionary new]; - - NSInteger ix = 0; - NSString *currentKey = nil; - while (true) { - - const xmlChar *oneAttribute = attributes[ix]; - ix++; - - if (!currentKey && !oneAttribute) { - break; - } - - if (!currentKey) { - currentKey = [NSString stringWithUTF8String:(const char *)oneAttribute]; - } - else { - NSString *value = nil; - if (oneAttribute) { - value = [NSString stringWithUTF8String:(const char *)oneAttribute]; - } - - d[currentKey] = value ? value : @""; - currentKey = nil; - } - } - - return [d copy]; -} - - -#pragma mark - Callbacks - -- (void)xmlEndDocument { - - @autoreleasepool { - if (self.delegateRespondsToEndOfDocumentMethod) { - [self.delegate saxParserDidReachEndOfDocument:self]; - } - - [self endStoringCharacters]; - } -} - - -- (void)xmlCharactersFound:(const xmlChar *)ch length:(NSUInteger)length { - - if (length < 1) { - return; - } - - @autoreleasepool { - if (self.storingCharacters) { - [self.characters appendBytes:(const void *)ch length:length]; - } - - if (self.delegateRespondsToCharactersFoundMethod) { - [self.delegate saxParser:self XMLCharactersFound:ch length:length]; - } - } -} - - -- (void)xmlStartElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes { - - @autoreleasepool { - if (self.delegateRespondsToStartElementMethod) { - - [self.delegate saxParser:self XMLStartElement:localName attributes:attributes]; - } - } -} - - -- (void)xmlEndElement:(const xmlChar *)localName { - - @autoreleasepool { - if (self.delegateRespondsToEndElementMethod) { - [self.delegate saxParser:self XMLEndElement:localName]; - } - - [self endStoringCharacters]; - } -} - - -@end - - -static void startElementSAX(void *context, const xmlChar *localname, const xmlChar **attributes) { - - [(__bridge RSSAXHTMLParser *)context xmlStartElement:localname attributes:attributes]; -} - - -static void endElementSAX(void *context, const xmlChar *localname) { - [(__bridge RSSAXHTMLParser *)context xmlEndElement:localname]; -} - - -static void charactersFoundSAX(void *context, const xmlChar *ch, int len) { - [(__bridge RSSAXHTMLParser *)context xmlCharactersFound:ch length:(NSUInteger)len]; -} - - -static void endDocumentSAX(void *context) { - [(__bridge RSSAXHTMLParser *)context xmlEndDocument]; -} - - -static htmlSAXHandler saxHandlerStruct = { - nil, /* internalSubset */ - nil, /* isStandalone */ - nil, /* hasInternalSubset */ - nil, /* hasExternalSubset */ - nil, /* resolveEntity */ - nil, /* getEntity */ - nil, /* entityDecl */ - nil, /* notationDecl */ - nil, /* attributeDecl */ - nil, /* elementDecl */ - nil, /* unparsedEntityDecl */ - nil, /* setDocumentLocator */ - nil, /* startDocument */ - endDocumentSAX, /* endDocument */ - startElementSAX, /* startElement*/ - endElementSAX, /* endElement */ - nil, /* reference */ - charactersFoundSAX, /* characters */ - nil, /* ignorableWhitespace */ - nil, /* processingInstruction */ - nil, /* comment */ - nil, /* warning */ - nil, /* error */ - nil, /* fatalError //: unused error() get all the errors */ - nil, /* getParameterEntity */ - nil, /* cdataBlock */ - nil, /* externalSubset */ - XML_SAX2_MAGIC, - nil, - nil, /* startElementNs */ - nil, /* endElementNs */ - nil /* serror */ -}; - diff --git a/Modules/ParserObjC/Sources/ParserObjC/include/RSParser.h b/Modules/ParserObjC/Sources/ParserObjC/include/RSParser.h deleted file mode 100644 index c9bd0008f..000000000 --- a/Modules/ParserObjC/Sources/ParserObjC/include/RSParser.h +++ /dev/null @@ -1,56 +0,0 @@ -// -// RSParser.h -// RSParser -// -// Created by Brent Simmons on 6/20/17. -// Copyright © 2017 Ranchero Software, LLC. All rights reserved. -// - -@import Foundation; - - -#import "../ParserData.h" -#import "../RSDateParser.h" - -// OPML - -#import "../RSOPMLParser.h" -#import "../RSOPMLDocument.h" -#import "../RSOPMLItem.h" -#import "../RSOPMLAttributes.h" -#import "../RSOPMLFeedSpecifier.h" -#import "../RSOPMLError.h" - -// For writing your own XML parser. - -#import "../RSSAXParser.h" - -// You should use FeedParser (Swift) instead of these two specific parsers -// and the objects they create. -// But they’re available if you want them. - -#import "../RSRSSParser.h" -#import "../RSAtomParser.h" -#import "../RSParsedFeed.h" -#import "../RSParsedArticle.h" -#import "../RSParsedEnclosure.h" -#import "../RSParsedAuthor.h" - -// HTML - -#import "../RSHTMLMetadataParser.h" -#import "../RSHTMLMetadata.h" -#import "../RSHTMLLinkParser.h" -#import "../RSSAXHTMLParser.h" // For writing your own HTML parser. -#import "../RSHTMLTag.h" - -// Utilities - -#import "../NSData+RSParser.h" -#import "../NSString+RSParser.h" - - - - - - diff --git a/NetNewsWire.xcodeproj/project.pbxproj b/NetNewsWire.xcodeproj/project.pbxproj index 18d63ad1d..6b0474782 100644 --- a/NetNewsWire.xcodeproj/project.pbxproj +++ b/NetNewsWire.xcodeproj/project.pbxproj @@ -385,12 +385,8 @@ 8454C3F8263F3AD400E3F9C7 /* IconImageCache.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8454C3F2263F2D8700E3F9C7 /* IconImageCache.swift */; }; 8456116B2BBD145200507B73 /* Parser in Frameworks */ = {isa = PBXBuildFile; productRef = 8456116A2BBD145200507B73 /* Parser */; }; 8456116C2BBD145200507B73 /* Parser in Embed Frameworks */ = {isa = PBXBuildFile; productRef = 8456116A2BBD145200507B73 /* Parser */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; }; - 8456116E2BBD145200507B73 /* ParserObjC in Frameworks */ = {isa = PBXBuildFile; productRef = 8456116D2BBD145200507B73 /* ParserObjC */; }; - 8456116F2BBD145200507B73 /* ParserObjC in Embed Frameworks */ = {isa = PBXBuildFile; productRef = 8456116D2BBD145200507B73 /* ParserObjC */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; }; 845611712BBD145D00507B73 /* Parser in Frameworks */ = {isa = PBXBuildFile; productRef = 845611702BBD145D00507B73 /* Parser */; }; 845611722BBD145D00507B73 /* Parser in Embed Frameworks */ = {isa = PBXBuildFile; productRef = 845611702BBD145D00507B73 /* Parser */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; }; - 845611742BBD145D00507B73 /* ParserObjC in Frameworks */ = {isa = PBXBuildFile; productRef = 845611732BBD145D00507B73 /* ParserObjC */; }; - 845611752BBD145D00507B73 /* ParserObjC in Embed Frameworks */ = {isa = PBXBuildFile; productRef = 845611732BBD145D00507B73 /* ParserObjC */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; }; 845A29221FC9251E007B49E3 /* SidebarCellLayout.swift in Sources */ = {isa = PBXBuildFile; fileRef = 845A29211FC9251E007B49E3 /* SidebarCellLayout.swift */; }; 845A29241FC9255E007B49E3 /* SidebarCellAppearance.swift in Sources */ = {isa = PBXBuildFile; fileRef = 845A29231FC9255E007B49E3 /* SidebarCellAppearance.swift */; }; 845EE7B11FC2366500854A1F /* StarredFeedDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 845EE7B01FC2366500854A1F /* StarredFeedDelegate.swift */; }; @@ -494,7 +490,6 @@ 84DC5FFE2BCE37A300F04682 /* AppDelegate+Shared.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84DC5FFD2BCE37A300F04682 /* AppDelegate+Shared.swift */; }; 84DC60002BCE37A300F04682 /* AppDelegate+Shared.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84DC5FFD2BCE37A300F04682 /* AppDelegate+Shared.swift */; }; 84DC60022BCE40B200F04682 /* Images in Frameworks */ = {isa = PBXBuildFile; productRef = 84DC60012BCE40B200F04682 /* Images */; }; - 84DC60042BCE40D000F04682 /* ParserObjC in Frameworks */ = {isa = PBXBuildFile; productRef = 84DC60032BCE40D000F04682 /* ParserObjC */; }; 84DCA5122BABB75600792720 /* FoundationExtras in Frameworks */ = {isa = PBXBuildFile; productRef = 84DCA5112BABB75600792720 /* FoundationExtras */; }; 84DCA5142BABB76100792720 /* AppKitExtras in Frameworks */ = {isa = PBXBuildFile; productRef = 84DCA5132BABB76100792720 /* AppKitExtras */; }; 84DCA51E2BABB79900792720 /* FoundationExtras in Frameworks */ = {isa = PBXBuildFile; productRef = 84DCA51D2BABB79900792720 /* FoundationExtras */; }; @@ -686,7 +681,6 @@ 8426DBC02BFDAEF200E98109 /* Web in Embed Frameworks */, 513F32782593EE6F0003048F /* Secrets in Embed Frameworks */, 513F327B2593EE6F0003048F /* SyncDatabase in Embed Frameworks */, - 845611752BBD145D00507B73 /* ParserObjC in Embed Frameworks */, 513F32722593EE6F0003048F /* Articles in Embed Frameworks */, 513F32812593EF180003048F /* Account in Embed Frameworks */, 8426DBB92BFDAD9200E98109 /* Core in Embed Frameworks */, @@ -741,7 +735,6 @@ 8426DBC32BFDAEFC00E98109 /* Web in Embed Frameworks */, 513277442590FBB60064F1E7 /* Account in Embed Frameworks */, 5132775F2590FC640064F1E7 /* Articles in Embed Frameworks */, - 8456116F2BBD145200507B73 /* ParserObjC in Embed Frameworks */, 513277662590FC780064F1E7 /* Secrets in Embed Frameworks */, 513277652590FC640064F1E7 /* SyncDatabase in Embed Frameworks */, 8426DBB82BFDAD8500E98109 /* Core in Embed Frameworks */, @@ -1097,7 +1090,6 @@ 84A059EE2C3A4A570041209B /* FMDB */ = {isa = PBXFileReference; lastKnownFileType = wrapper; path = FMDB; sourceTree = ""; }; 84A059EF2C3A4A5B0041209B /* Web */ = {isa = PBXFileReference; lastKnownFileType = wrapper; path = Web; sourceTree = ""; }; 84A059F02C3A4A5F0041209B /* Parser */ = {isa = PBXFileReference; lastKnownFileType = wrapper; path = Parser; sourceTree = ""; }; - 84A059F12C3A4A620041209B /* ParserObjC */ = {isa = PBXFileReference; lastKnownFileType = wrapper; path = ParserObjC; sourceTree = ""; }; 84A059F22C3A4A670041209B /* Core */ = {isa = PBXFileReference; lastKnownFileType = wrapper; path = Core; sourceTree = ""; }; 84A059F32C3A4A6C0041209B /* Tree */ = {isa = PBXFileReference; lastKnownFileType = wrapper; path = Tree; sourceTree = ""; }; 84A059F42C3A4AA30041209B /* UIKitExtras */ = {isa = PBXFileReference; lastKnownFileType = wrapper; path = UIKitExtras; sourceTree = ""; }; @@ -1244,7 +1236,6 @@ 841CECDE2BAD06D10001EE72 /* Tree in Frameworks */, 51BC2F3824D3439A00E90810 /* Account in Frameworks */, 8426DBC82BFDAF4300E98109 /* Web in Frameworks */, - 84DC60042BCE40D000F04682 /* ParserObjC in Frameworks */, 84D9582C2BABE53B0053E7B2 /* FoundationExtras in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; @@ -1291,7 +1282,6 @@ 84C1A8582BBBA5BD006E3E96 /* Web in Frameworks */, 516B695F24D2F33B00B5702F /* Account in Frameworks */, 84A699152BC34F3D00605AB8 /* ArticleExtractor in Frameworks */, - 845611742BBD145D00507B73 /* ParserObjC in Frameworks */, 845611712BBD145D00507B73 /* Parser in Frameworks */, 513F32712593EE6F0003048F /* Articles in Frameworks */, 513F32772593EE6F0003048F /* Secrets in Frameworks */, @@ -1329,7 +1319,6 @@ 841CECD82BAD04B20001EE72 /* Tree in Frameworks */, 8426DBC22BFDAEFC00E98109 /* Web in Frameworks */, 8456116B2BBD145200507B73 /* Parser in Frameworks */, - 8456116E2BBD145200507B73 /* ParserObjC in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -2052,7 +2041,6 @@ 84A059EE2C3A4A570041209B /* FMDB */, 84A059EF2C3A4A5B0041209B /* Web */, 84A059F02C3A4A5F0041209B /* Parser */, - 84A059F12C3A4A620041209B /* ParserObjC */, 84A059F22C3A4A670041209B /* Core */, 84A059F32C3A4A6C0041209B /* Tree */, 84A059F42C3A4AA30041209B /* UIKitExtras */, @@ -2493,7 +2481,6 @@ 84D9582B2BABE53B0053E7B2 /* FoundationExtras */, 841CECDD2BAD06D10001EE72 /* Tree */, 84DC60012BCE40B200F04682 /* Images */, - 84DC60032BCE40D000F04682 /* ParserObjC */, 8426DBC72BFDAF4300E98109 /* Web */, ); productName = "NetNewsWire iOS Share Extension"; @@ -2608,7 +2595,6 @@ 841CECDB2BAD04BF0001EE72 /* Tree */, 84C1A8572BBBA5BD006E3E96 /* Web */, 845611702BBD145D00507B73 /* Parser */, - 845611732BBD145D00507B73 /* ParserObjC */, 8410C4A42BC1E28200D4F799 /* ReaderAPI */, 84A699142BC34F3D00605AB8 /* ArticleExtractor */, 84DC5FFB2BCE31DB00F04682 /* Images */, @@ -2657,7 +2643,6 @@ 8438C2DA2BABE0B00040C9EE /* CoreResources */, 841CECD72BAD04B20001EE72 /* Tree */, 8456116A2BBD145200507B73 /* Parser */, - 8456116D2BBD145200507B73 /* ParserObjC */, 84A699162BC34F4400605AB8 /* ArticleExtractor */, 84DC5FF92BCE31D200F04682 /* Images */, 8426DBC12BFDAEFC00E98109 /* Web */, @@ -4200,18 +4185,10 @@ isa = XCSwiftPackageProductDependency; productName = Parser; }; - 8456116D2BBD145200507B73 /* ParserObjC */ = { - isa = XCSwiftPackageProductDependency; - productName = ParserObjC; - }; 845611702BBD145D00507B73 /* Parser */ = { isa = XCSwiftPackageProductDependency; productName = Parser; }; - 845611732BBD145D00507B73 /* ParserObjC */ = { - isa = XCSwiftPackageProductDependency; - productName = ParserObjC; - }; 8479ABE22B9E906E00F84C4D /* Database */ = { isa = XCSwiftPackageProductDependency; productName = Database; @@ -4260,10 +4237,6 @@ isa = XCSwiftPackageProductDependency; productName = Images; }; - 84DC60032BCE40D000F04682 /* ParserObjC */ = { - isa = XCSwiftPackageProductDependency; - productName = ParserObjC; - }; 84DCA5112BABB75600792720 /* FoundationExtras */ = { isa = XCSwiftPackageProductDependency; productName = FoundationExtras; diff --git a/Shared/AppDelegate+Shared.swift b/Shared/AppDelegate+Shared.swift index 626ff7f2e..ebfe1a8d4 100644 --- a/Shared/AppDelegate+Shared.swift +++ b/Shared/AppDelegate+Shared.swift @@ -8,7 +8,6 @@ import Foundation import Images -import ParserObjC import Account extension AppDelegate: FaviconDownloaderDelegate, FeedIconDownloaderDelegate { diff --git a/Shared/HTMLMetadata/HTMLMetadataDownloader.swift b/Shared/HTMLMetadata/HTMLMetadataDownloader.swift index da78fee44..87268b3cd 100644 --- a/Shared/HTMLMetadata/HTMLMetadataDownloader.swift +++ b/Shared/HTMLMetadata/HTMLMetadataDownloader.swift @@ -9,7 +9,6 @@ import Foundation import Web import Parser -import ParserObjC extension RSHTMLMetadata: @unchecked Sendable {}