mirror of
https://github.com/Ranchero-Software/NetNewsWire
synced 2025-08-12 06:26:36 +00:00
Create separate SAX target.
This commit is contained in:
@@ -12,6 +12,10 @@ let package = Package(
|
||||
name: "Parser",
|
||||
type: .dynamic,
|
||||
targets: ["Parser"]),
|
||||
.library(
|
||||
name: "SAX",
|
||||
type: .dynamic,
|
||||
targets: ["SAX"])
|
||||
],
|
||||
dependencies: [
|
||||
],
|
||||
@@ -20,6 +24,14 @@ let package = Package(
|
||||
// Targets can depend on other targets in this package, and on products in packages this package depends on.
|
||||
.target(
|
||||
name: "Parser",
|
||||
dependencies: [
|
||||
"SAX"
|
||||
],
|
||||
swiftSettings: [
|
||||
.enableExperimentalFeature("StrictConcurrency")
|
||||
]),
|
||||
.target(
|
||||
name: "SAX",
|
||||
dependencies: [],
|
||||
swiftSettings: [
|
||||
.enableExperimentalFeature("StrictConcurrency")
|
||||
|
||||
@@ -76,7 +76,7 @@ private extension OPMLParser {
|
||||
|
||||
extension OPMLParser: SAXParserDelegate {
|
||||
|
||||
func saxParser(_ saxParser: SAXParser, xmlStartElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafeMutablePointer<XMLPointer?>?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafeMutablePointer<XMLPointer?>?) {
|
||||
func saxParser(_ saxParser: SAXParser, xmlStartElement localName: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafePointer<XMLPointer?>?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafePointer<XMLPointer?>?) {
|
||||
|
||||
if SAXEqualStrings(localName, XMLKey.title) {
|
||||
saxParser.beginStoringCharacters()
|
||||
|
||||
54
Modules/Parser/Sources/SAX/SAXHTMLParser.swift
Normal file
54
Modules/Parser/Sources/SAX/SAXHTMLParser.swift
Normal file
@@ -0,0 +1,54 @@
|
||||
////
|
||||
//// SAXHTMLParser.swift
|
||||
////
|
||||
////
|
||||
//// Created by Brent Simmons on 8/26/24.
|
||||
////
|
||||
//
|
||||
//import Foundation
|
||||
//import libxml2
|
||||
//
|
||||
//protocol SAXHTMLParserDelegate: AnyObject {
|
||||
//
|
||||
// func saxParser(_: SAXHTMLParser, XMLStartElement localName: XMLPointer, attributes: UnsafePointer<XMLPointer?>?)
|
||||
//
|
||||
// func saxParser(_: SAXHTMLParser, XMLEndElement localName: XMLPointer?)
|
||||
//
|
||||
// // Length is guaranteed to be greater than 0.
|
||||
// func saxParser(_: SAXHTMLParser, XMLCharactersFound characters: XMLPointer?, length: Int)
|
||||
//}
|
||||
//
|
||||
//final class SAXHTMLParser {
|
||||
//
|
||||
// fileprivate let delegate: SAXHTMLParserDelegate
|
||||
// private var data: Data
|
||||
//
|
||||
// init(delegate: SAXHTMLParserDelegate, data: Data) {
|
||||
//
|
||||
// self.delegate = delegate
|
||||
// self.data = data
|
||||
// }
|
||||
//
|
||||
// func parse() {
|
||||
//
|
||||
// guard !data.isEmpty else {
|
||||
// return
|
||||
// }
|
||||
//
|
||||
// data.withUnsafeBytes { bufferPointer in
|
||||
//
|
||||
// guard let bytes = bufferPointer.bindMemory(to: xmlChar.self).baseAddress else {
|
||||
// return
|
||||
// }
|
||||
//
|
||||
// let characterEncoding = xmlDetectCharEncoding(bytes, Int32(data.count))
|
||||
// let context = htmlCreatePushParserCtxt(&saxHandlerStruct, Unmanaged.passUnretained(self).toOpaque(), nil, 0, nil, characterEncoding)
|
||||
// htmlCtxtUseOptions(context, Int32(XML_PARSE_RECOVER | XML_PARSE_NONET | HTML_PARSE_COMPACT))
|
||||
//
|
||||
// htmlParseChunk(context, bytes, Int32(data.count), 0)
|
||||
//
|
||||
// htmlParseChunk(context, nil, 0, 1)
|
||||
// htmlFreeParserCtxt(context)
|
||||
// }
|
||||
// }
|
||||
//}
|
||||
@@ -12,7 +12,7 @@ typealias XMLPointer = UnsafePointer<xmlChar>
|
||||
|
||||
protocol SAXParserDelegate {
|
||||
|
||||
func saxParser(_: SAXParser, xmlStartElement: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafeMutablePointer<XMLPointer?>?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafeMutablePointer<XMLPointer?>?)
|
||||
func saxParser(_: SAXParser, xmlStartElement: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafePointer<XMLPointer?>?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafePointer<XMLPointer?>?)
|
||||
|
||||
func saxParser(_: SAXParser, xmlEndElement: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?)
|
||||
|
||||
@@ -69,8 +69,8 @@ final class SAXParser {
|
||||
xmlCtxtUseOptions(context, Int32(XML_PARSE_RECOVER.rawValue | XML_PARSE_NOENT.rawValue))
|
||||
|
||||
data.withUnsafeBytes { bufferPointer in
|
||||
if let bytes = bufferPointer.bindMemory(to: CChar.self).baseAddress {
|
||||
xmlParseChunk(context, bytes, CInt(data.count), 0)
|
||||
if let bytes = bufferPointer.bindMemory(to: xmlChar.self).baseAddress {
|
||||
xmlParseChunk(context, bytes, Int32(data.count), 0)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -110,16 +110,16 @@ final class SAXParser {
|
||||
var attributeName = String(cString: attribute)
|
||||
if let prefix {
|
||||
let attributePrefix = String(cString: prefix)
|
||||
attributeName = "\(attributePrefix):\(attributeName!)"
|
||||
attributeName = "\(attributePrefix):\(attributeName)"
|
||||
}
|
||||
|
||||
guard let valueStart = attributes[j + 3], let valueEnd = attributes[j + 4] else {
|
||||
continue
|
||||
}
|
||||
let valueCount = valueEnd - valueStart
|
||||
var value = String(bytes: UnsafeRawBufferPointer(start: valueStart, count: Int(valueCount)), encoding: .utf8)
|
||||
let value = String(bytes: UnsafeRawBufferPointer(start: valueStart, count: Int(valueCount)), encoding: .utf8)
|
||||
|
||||
if let value, let attributeName {
|
||||
if let value {
|
||||
dictionary[attributeName] = value
|
||||
}
|
||||
|
||||
@@ -129,12 +129,6 @@ final class SAXParser {
|
||||
|
||||
return dictionary
|
||||
}
|
||||
|
||||
func stringNoCopy(_ bytes: XMLPointer) -> String {
|
||||
|
||||
let length = strlen(bytes)
|
||||
return NSString(bytesNoCopy: bytes, length: length, encoding: .utf8, freeWhenDone: false) as String
|
||||
}
|
||||
}
|
||||
|
||||
private extension SAXParser {
|
||||
@@ -148,7 +142,7 @@ private extension SAXParser {
|
||||
delegate.saxParser(self, xmlCharactersFound: xmlCharacters, count: count)
|
||||
}
|
||||
|
||||
func startElement(_ name: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafeMutablePointer<XMLPointer?>?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafeMutablePointer<XMLPointer?>?) {
|
||||
func startElement(_ name: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafePointer<XMLPointer?>?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafePointer<XMLPointer?>?) {
|
||||
|
||||
delegate.saxParser(self, xmlStartElement: name, prefix: prefix, uri: uri, namespaceCount: namespaceCount, namespaces: namespaces, attributeCount: attributeCount, attributesDefaultedCount: attributesDefaultedCount, attributes: attributes)
|
||||
}
|
||||
@@ -160,7 +154,7 @@ private extension SAXParser {
|
||||
}
|
||||
}
|
||||
|
||||
private func startElement(_ context: UnsafeMutableRawPointer?, name: XMLPointer?, prefix: XMLPointer?, URI: XMLPointer?, nb_namespaces: CInt, namespaces: UnsafeMutablePointer<XMLPointer?>?, nb_attributes: CInt, nb_defaulted: CInt, attributes: UnsafeMutablePointer<XMLPointer?>?) {
|
||||
private func startElement(_ context: UnsafeMutableRawPointer?, name: XMLPointer?, prefix: XMLPointer?, URI: XMLPointer?, nb_namespaces: CInt, namespaces: UnsafePointer<XMLPointer?>?, nb_attributes: CInt, nb_defaulted: CInt, attributes: UnsafeMutablePointer<XMLPointer?>?) {
|
||||
|
||||
guard let context, let name else {
|
||||
return
|
||||
@@ -10,9 +10,9 @@ import libxml2
|
||||
|
||||
func SAXEqualStrings(_ s1: XMLPointer, _ s2: XMLPointer, length: Int? = nil) -> Bool {
|
||||
|
||||
if length == nil {
|
||||
return Bool(xmlStrEqual(s1, s2))
|
||||
if let length {
|
||||
return xmlStrncmp(s1, s2, Int32(length)) == 0
|
||||
}
|
||||
|
||||
return xmlStrncmp(s1, s2, length) == 0
|
||||
return xmlStrEqual(s1, s2) != 0
|
||||
}
|
||||
@@ -1,26 +0,0 @@
|
||||
//
|
||||
// RSOPMLParser.h
|
||||
// RSParser
|
||||
//
|
||||
// Created by Brent Simmons on 7/12/15.
|
||||
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
@import Foundation;
|
||||
|
||||
|
||||
@class ParserData;
|
||||
@class RSOPMLDocument;
|
||||
|
||||
typedef void (^OPMLParserCallback)(RSOPMLDocument *opmlDocument, NSError *error);
|
||||
|
||||
// Parses on background thread; calls back on main thread.
|
||||
void RSParseOPML(ParserData *parserData, OPMLParserCallback callback);
|
||||
|
||||
|
||||
@interface RSOPMLParser: NSObject
|
||||
|
||||
+ (RSOPMLDocument *)parseOPMLWithParserData:(ParserData *)parserData error:(NSError **)error;
|
||||
|
||||
@end
|
||||
|
||||
@@ -1,310 +0,0 @@
|
||||
//
|
||||
// RSOPMLParser.m
|
||||
// RSParser
|
||||
//
|
||||
// Created by Brent Simmons on 7/12/15.
|
||||
// Copyright © 2015 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
#import "RSOPMLParser.h"
|
||||
#import "RSSAXParser.h"
|
||||
#import "RSOPMLItem.h"
|
||||
#import "RSOPMLDocument.h"
|
||||
#import "RSOPMLAttributes.h"
|
||||
#import "RSOPMLError.h"
|
||||
#import "RSOPMLParser.h"
|
||||
#import "ParserData.h"
|
||||
|
||||
#import <libxml/xmlstring.h>
|
||||
|
||||
|
||||
|
||||
@interface RSOPMLParser () <RSSAXParserDelegate>
|
||||
|
||||
@property (nonatomic, readwrite) RSOPMLDocument *OPMLDocument;
|
||||
@property (nonatomic, readwrite) NSError *error;
|
||||
@property (nonatomic) NSMutableArray *itemStack;
|
||||
|
||||
@end
|
||||
|
||||
void RSParseOPML(ParserData *parserData, OPMLParserCallback callback) {
|
||||
|
||||
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
|
||||
|
||||
@autoreleasepool {
|
||||
NSError *error = nil;
|
||||
RSOPMLDocument *opmlDocument = [RSOPMLParser parseOPMLWithParserData:parserData error:&error];
|
||||
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
callback(opmlDocument, error);
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@implementation RSOPMLParser
|
||||
|
||||
#pragma mark - Class Methods
|
||||
|
||||
+ (RSOPMLDocument *)parseOPMLWithParserData:(ParserData *)parserData error:(NSError **)error {
|
||||
|
||||
RSOPMLParser *parser = [[RSOPMLParser alloc] initWithParserData:parserData];
|
||||
|
||||
RSOPMLDocument *document = parser.OPMLDocument;
|
||||
document.url = parserData.url;
|
||||
if (parser.error && error) {
|
||||
*error = parser.error;
|
||||
return nil;
|
||||
}
|
||||
return document;
|
||||
}
|
||||
|
||||
#pragma mark - Init
|
||||
|
||||
- (instancetype)initWithParserData:(ParserData *)parserData {
|
||||
|
||||
self = [super init];
|
||||
if (!self) {
|
||||
return nil;
|
||||
}
|
||||
|
||||
[self parse:parserData];
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - Private
|
||||
|
||||
- (void)parse:(ParserData *)parserData {
|
||||
|
||||
@autoreleasepool {
|
||||
|
||||
if (![self canParseData:parserData.data]) {
|
||||
|
||||
NSString *filename = nil;
|
||||
NSURL *url = [NSURL URLWithString:parserData.url];
|
||||
if (url && url.isFileURL) {
|
||||
filename = url.path.lastPathComponent;
|
||||
}
|
||||
if ([parserData.url hasPrefix:@"http"]) {
|
||||
filename = parserData.url;
|
||||
}
|
||||
if (!filename) {
|
||||
filename = parserData.url;
|
||||
}
|
||||
self.error = RSOPMLWrongFormatError(filename);
|
||||
return;
|
||||
}
|
||||
|
||||
RSSAXParser *parser = [[RSSAXParser alloc] initWithDelegate:self];
|
||||
|
||||
self.itemStack = [NSMutableArray new];
|
||||
self.OPMLDocument = [RSOPMLDocument new];
|
||||
[self pushItem:self.OPMLDocument];
|
||||
|
||||
[parser parseData:parserData.data];
|
||||
[parser finishParsing];
|
||||
}
|
||||
}
|
||||
|
||||
- (BOOL)canParseData:(NSData *)d {
|
||||
|
||||
// Check for <opml and <outline near the top.
|
||||
|
||||
@autoreleasepool {
|
||||
|
||||
NSString *s = [[NSString alloc] initWithBytesNoCopy:(void *)d.bytes length:d.length encoding:NSUTF8StringEncoding freeWhenDone:NO];
|
||||
if (!s) {
|
||||
NSDictionary *options = @{NSStringEncodingDetectionSuggestedEncodingsKey : @[@(NSUTF8StringEncoding)]};
|
||||
(void)[NSString stringEncodingForData:d encodingOptions:options convertedString:&s usedLossyConversion:nil];
|
||||
}
|
||||
if (!s) {
|
||||
return NO;
|
||||
}
|
||||
|
||||
static const NSInteger numberOfCharactersToSearch = 4096;
|
||||
NSRange rangeToSearch = NSMakeRange(0, numberOfCharactersToSearch);
|
||||
if (s.length < numberOfCharactersToSearch) {
|
||||
rangeToSearch.length = s.length;
|
||||
}
|
||||
|
||||
NSRange opmlRange = [s rangeOfString:@"<opml" options:NSCaseInsensitiveSearch range:rangeToSearch];
|
||||
if (opmlRange.length < 1) {
|
||||
return NO;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return YES;
|
||||
}
|
||||
|
||||
- (void)pushItem:(RSOPMLItem *)item {
|
||||
|
||||
[self.itemStack addObject:item];
|
||||
}
|
||||
|
||||
|
||||
- (void)popItem {
|
||||
|
||||
NSAssert(self.itemStack.count > 0, nil);
|
||||
|
||||
/*If itemStack is empty, bad things are happening.
|
||||
But we still shouldn't crash in production.*/
|
||||
|
||||
if (self.itemStack.count > 0) {
|
||||
[self.itemStack removeLastObject];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
- (RSOPMLItem *)currentItem {
|
||||
|
||||
return self.itemStack.lastObject;
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - RSSAXParserDelegate
|
||||
|
||||
static const char *kOutline = "outline";
|
||||
static const char kOutlineLength = 8;
|
||||
|
||||
- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes {
|
||||
|
||||
if (RSSAXEqualTags(localName, kTitle, kTitleLength)) {
|
||||
[SAXParser beginStoringCharacters];
|
||||
return;
|
||||
}
|
||||
|
||||
if (!RSSAXEqualTags(localName, kOutline, kOutlineLength)) {
|
||||
return;
|
||||
}
|
||||
|
||||
RSOPMLItem *item = [RSOPMLItem new];
|
||||
item.attributes = [SAXParser attributesDictionary:attributes numberOfAttributes:numberOfAttributes];
|
||||
|
||||
[[self currentItem] addChild:item];
|
||||
[self pushItem:item];
|
||||
}
|
||||
|
||||
|
||||
- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri {
|
||||
|
||||
if (RSSAXEqualTags(localName, kTitle, kTitleLength)) {
|
||||
RSOPMLItem* item = [self currentItem];
|
||||
if ([item isKindOfClass:[RSOPMLDocument class]]) {
|
||||
((RSOPMLDocument *)item).title = SAXParser.currentStringWithTrimmedWhitespace;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (RSSAXEqualTags(localName, kOutline, kOutlineLength)) {
|
||||
[self popItem];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static const char *kText = "text";
|
||||
static const NSInteger kTextLength = 5;
|
||||
|
||||
static const char *kTitle = "title";
|
||||
static const NSInteger kTitleLength = 6;
|
||||
|
||||
static const char *kDescription = "description";
|
||||
static const NSInteger kDescriptionLength = 12;
|
||||
|
||||
static const char *kType = "type";
|
||||
static const NSInteger kTypeLength = 5;
|
||||
|
||||
static const char *kVersion = "version";
|
||||
static const NSInteger kVersionLength = 8;
|
||||
|
||||
static const char *kHTMLURL = "htmlUrl";
|
||||
static const NSInteger kHTMLURLLength = 8;
|
||||
|
||||
static const char *kXMLURL = "xmlUrl";
|
||||
static const NSInteger kXMLURLLength = 7;
|
||||
|
||||
- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const xmlChar *)name prefix:(const xmlChar *)prefix {
|
||||
|
||||
if (prefix) {
|
||||
return nil;
|
||||
}
|
||||
|
||||
size_t nameLength = strlen((const char *)name);
|
||||
|
||||
if (nameLength == kTextLength - 1) {
|
||||
if (RSSAXEqualTags(name, kText, kTextLength)) {
|
||||
return OPMLTextKey;
|
||||
}
|
||||
if (RSSAXEqualTags(name, kType, kTypeLength)) {
|
||||
return OPMLTypeKey;
|
||||
}
|
||||
}
|
||||
|
||||
else if (nameLength == kTitleLength - 1) {
|
||||
if (RSSAXEqualTags(name, kTitle, kTitleLength)) {
|
||||
return OPMLTitleKey;
|
||||
}
|
||||
}
|
||||
|
||||
else if (nameLength == kXMLURLLength - 1) {
|
||||
if (RSSAXEqualTags(name, kXMLURL, kXMLURLLength)) {
|
||||
return OPMLXMLURLKey;
|
||||
}
|
||||
}
|
||||
|
||||
else if (nameLength == kVersionLength - 1) {
|
||||
if (RSSAXEqualTags(name, kVersion, kVersionLength)) {
|
||||
return OPMLVersionKey;
|
||||
}
|
||||
if (RSSAXEqualTags(name, kHTMLURL, kHTMLURLLength)) {
|
||||
return OPMLHMTLURLKey;
|
||||
}
|
||||
}
|
||||
|
||||
else if (nameLength == kDescriptionLength - 1) {
|
||||
if (RSSAXEqualTags(name, kDescription, kDescriptionLength)) {
|
||||
return OPMLDescriptionKey;
|
||||
}
|
||||
}
|
||||
|
||||
return nil;
|
||||
}
|
||||
|
||||
|
||||
static const char *kRSSUppercase = "RSS";
|
||||
static const char *kRSSLowercase = "rss";
|
||||
static const NSUInteger kRSSLength = 3;
|
||||
static NSString *RSSUppercaseValue = @"RSS";
|
||||
static NSString *RSSLowercaseValue = @"rss";
|
||||
static NSString *emptyString = @"";
|
||||
|
||||
static BOOL equalBytes(const void *bytes1, const void *bytes2, NSUInteger length) {
|
||||
|
||||
return memcmp(bytes1, bytes2, length) == 0;
|
||||
}
|
||||
|
||||
- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForValue:(const void *)bytes length:(NSUInteger)length {
|
||||
|
||||
|
||||
if (length < 1) {
|
||||
return emptyString;
|
||||
}
|
||||
|
||||
if (length == kRSSLength) {
|
||||
|
||||
if (equalBytes(bytes, kRSSUppercase, kRSSLength)) {
|
||||
return RSSUppercaseValue;
|
||||
}
|
||||
else if (equalBytes(bytes, kRSSLowercase, kRSSLength)) {
|
||||
return RSSLowercaseValue;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return nil;
|
||||
}
|
||||
|
||||
|
||||
@end
|
||||
@@ -1,69 +0,0 @@
|
||||
//
|
||||
// RSSAXParser.h
|
||||
// RSParser
|
||||
//
|
||||
// Created by Brent Simmons on 3/25/15.
|
||||
// Copyright (c) 2015 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
@import Foundation;
|
||||
|
||||
/*Thread-safe, not re-entrant.
|
||||
|
||||
Calls to the delegate will happen on the same thread where the parser runs.
|
||||
|
||||
This is a low-level streaming XML parser, a thin wrapper for libxml2's SAX parser. It doesn't do much Foundation-ifying quite on purpose -- because the goal is performance and low memory use.
|
||||
|
||||
This class is not meant to be sub-classed. Use the delegate methods.
|
||||
*/
|
||||
|
||||
|
||||
@class RSSAXParser;
|
||||
|
||||
@protocol RSSAXParserDelegate <NSObject>
|
||||
|
||||
@optional
|
||||
|
||||
- (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const unsigned char *)localName prefix:(const unsigned char *)prefix uri:(const unsigned char *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const unsigned char **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const unsigned char **)attributes;
|
||||
|
||||
- (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const unsigned char *)localName prefix:(const unsigned char *)prefix uri:(const unsigned char *)uri;
|
||||
|
||||
// Length is guaranteed to be greater than 0.
|
||||
- (void)saxParser:(RSSAXParser *)SAXParser XMLCharactersFound:(const unsigned char *)characters length:(NSUInteger)length;
|
||||
|
||||
- (void)saxParserDidReachEndOfDocument:(RSSAXParser *)SAXParser; /*If canceled, may not get called (but might).*/
|
||||
|
||||
- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const unsigned char *)name prefix:(const unsigned char *)prefix; /*Okay to return nil. Prefix may be nil.*/
|
||||
|
||||
- (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForValue:(const void *)bytes length:(NSUInteger)length;
|
||||
|
||||
@end
|
||||
|
||||
|
||||
void RSSAXInitLibXMLParser(void); // Needed by RSSAXHTMLParser.
|
||||
|
||||
/*For use by delegate.*/
|
||||
|
||||
BOOL RSSAXEqualTags(const unsigned char *localName, const char *tag, NSInteger tagLength);
|
||||
|
||||
|
||||
@interface RSSAXParser : NSObject
|
||||
|
||||
- (instancetype)initWithDelegate:(id<RSSAXParserDelegate>)delegate;
|
||||
|
||||
- (void)parseData:(NSData *)data;
|
||||
- (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes;
|
||||
- (void)finishParsing;
|
||||
- (void)cancel;
|
||||
|
||||
@property (nonatomic, strong, readonly) NSData *currentCharacters; /*nil if not storing characters. UTF-8 encoded.*/
|
||||
@property (nonatomic, strong, readonly) NSString *currentString; /*Convenience to get string version of currentCharacters.*/
|
||||
@property (nonatomic, strong, readonly) NSString *currentStringWithTrimmedWhitespace;
|
||||
|
||||
- (void)beginStoringCharacters; /*Delegate can call from XMLStartElement. Characters will be available in XMLEndElement as currentCharacters property. Storing characters is stopped after each XMLEndElement.*/
|
||||
|
||||
/*Delegate can call from within XMLStartElement. Returns nil if numberOfAttributes < 1.*/
|
||||
|
||||
- (NSDictionary *)attributesDictionary:(const unsigned char **)attributes numberOfAttributes:(NSInteger)numberOfAttributes;
|
||||
|
||||
@end
|
||||
@@ -1,353 +0,0 @@
|
||||
//
|
||||
// RSSAXParser.m
|
||||
// RSParser
|
||||
//
|
||||
// Created by Brent Simmons on 3/25/15.
|
||||
// Copyright (c) 2015 Ranchero Software, LLC. All rights reserved.
|
||||
//
|
||||
|
||||
#import "RSSAXParser.h"
|
||||
#import "RSParserInternal.h"
|
||||
|
||||
#import <libxml/parser.h>
|
||||
#import <libxml/tree.h>
|
||||
#import <libxml/xmlstring.h>
|
||||
|
||||
|
||||
|
||||
@interface RSSAXParser ()
|
||||
|
||||
@property (nonatomic, weak) id<RSSAXParserDelegate> delegate;
|
||||
@property (nonatomic, assign) xmlParserCtxtPtr context;
|
||||
@property (nonatomic, assign) BOOL storingCharacters;
|
||||
@property (nonatomic) NSMutableData *characters;
|
||||
@property (nonatomic) BOOL delegateRespondsToInternedStringMethod;
|
||||
@property (nonatomic) BOOL delegateRespondsToInternedStringForValueMethod;
|
||||
@property (nonatomic) BOOL delegateRespondsToStartElementMethod;
|
||||
@property (nonatomic) BOOL delegateRespondsToEndElementMethod;
|
||||
@property (nonatomic) BOOL delegateRespondsToCharactersFoundMethod;
|
||||
@property (nonatomic) BOOL delegateRespondsToEndOfDocumentMethod;
|
||||
|
||||
@end
|
||||
|
||||
|
||||
@implementation RSSAXParser
|
||||
|
||||
+ (void)initialize {
|
||||
|
||||
RSSAXInitLibXMLParser();
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - Init
|
||||
|
||||
- (instancetype)initWithDelegate:(id<RSSAXParserDelegate>)delegate {
|
||||
|
||||
self = [super init];
|
||||
if (self == nil)
|
||||
return nil;
|
||||
|
||||
_delegate = delegate;
|
||||
|
||||
if ([_delegate respondsToSelector:@selector(saxParser:internedStringForName:prefix:)]) {
|
||||
_delegateRespondsToInternedStringMethod = YES;
|
||||
}
|
||||
if ([_delegate respondsToSelector:@selector(saxParser:internedStringForValue:length:)]) {
|
||||
_delegateRespondsToInternedStringForValueMethod = YES;
|
||||
}
|
||||
if ([_delegate respondsToSelector:@selector(saxParser:XMLStartElement:prefix:uri:numberOfNamespaces:namespaces:numberOfAttributes:numberDefaulted:attributes:)]) {
|
||||
_delegateRespondsToStartElementMethod = YES;
|
||||
}
|
||||
if ([_delegate respondsToSelector:@selector(saxParser:XMLEndElement:prefix:uri:)]) {
|
||||
_delegateRespondsToEndElementMethod = YES;
|
||||
}
|
||||
if ([_delegate respondsToSelector:@selector(saxParser:XMLCharactersFound:length:)]) {
|
||||
_delegateRespondsToCharactersFoundMethod = YES;
|
||||
}
|
||||
if ([_delegate respondsToSelector:@selector(saxParserDidReachEndOfDocument:)]) {
|
||||
_delegateRespondsToEndOfDocumentMethod = YES;
|
||||
}
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - Dealloc
|
||||
|
||||
- (void)dealloc {
|
||||
if (_context != nil) {
|
||||
xmlFreeParserCtxt(_context);
|
||||
_context = nil;
|
||||
}
|
||||
_delegate = nil;
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - API
|
||||
|
||||
static xmlSAXHandler saxHandlerStruct;
|
||||
|
||||
- (void)parseData:(NSData *)data {
|
||||
|
||||
[self parseBytes:data.bytes numberOfBytes:data.length];
|
||||
}
|
||||
|
||||
|
||||
- (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes {
|
||||
|
||||
if (self.context == nil) {
|
||||
|
||||
self.context = xmlCreatePushParserCtxt(&saxHandlerStruct, (__bridge void *)self, nil, 0, nil);
|
||||
xmlCtxtUseOptions(self.context, XML_PARSE_RECOVER | XML_PARSE_NOENT);
|
||||
}
|
||||
|
||||
@autoreleasepool {
|
||||
xmlParseChunk(self.context, (const char *)bytes, (int)numberOfBytes, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
- (void)finishParsing {
|
||||
|
||||
NSAssert(self.context != nil, nil);
|
||||
if (self.context == nil)
|
||||
return;
|
||||
|
||||
@autoreleasepool {
|
||||
xmlParseChunk(self.context, nil, 0, 1);
|
||||
xmlFreeParserCtxt(self.context);
|
||||
self.context = nil;
|
||||
self.characters = nil;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
- (void)cancel {
|
||||
|
||||
@autoreleasepool {
|
||||
xmlStopParser(self.context);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
- (void)beginStoringCharacters {
|
||||
self.storingCharacters = YES;
|
||||
self.characters = [NSMutableData new];
|
||||
}
|
||||
|
||||
|
||||
- (void)endStoringCharacters {
|
||||
self.storingCharacters = NO;
|
||||
self.characters = nil;
|
||||
}
|
||||
|
||||
|
||||
- (NSData *)currentCharacters {
|
||||
|
||||
if (!self.storingCharacters) {
|
||||
return nil;
|
||||
}
|
||||
|
||||
return self.characters;
|
||||
}
|
||||
|
||||
|
||||
- (NSString *)currentString {
|
||||
|
||||
NSData *d = self.currentCharacters;
|
||||
if (RSParserObjectIsEmpty(d)) {
|
||||
return nil;
|
||||
}
|
||||
|
||||
return [[NSString alloc] initWithData:d encoding:NSUTF8StringEncoding];
|
||||
}
|
||||
|
||||
|
||||
- (NSString *)currentStringWithTrimmedWhitespace {
|
||||
|
||||
return [self.currentString stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]];
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - Attributes Dictionary
|
||||
|
||||
- (NSDictionary *)attributesDictionary:(const xmlChar **)attributes numberOfAttributes:(NSInteger)numberOfAttributes {
|
||||
|
||||
if (numberOfAttributes < 1 || !attributes) {
|
||||
return nil;
|
||||
}
|
||||
|
||||
NSMutableDictionary *d = [NSMutableDictionary new];
|
||||
|
||||
@autoreleasepool {
|
||||
NSInteger i = 0, j = 0;
|
||||
for (i = 0, j = 0; i < numberOfAttributes; i++, j+=5) {
|
||||
|
||||
NSUInteger lenValue = (NSUInteger)(attributes[j + 4] - attributes[j + 3]);
|
||||
NSString *value = nil;
|
||||
|
||||
if (self.delegateRespondsToInternedStringForValueMethod) {
|
||||
value = [self.delegate saxParser:self internedStringForValue:(const void *)attributes[j + 3] length:lenValue];
|
||||
}
|
||||
if (!value) {
|
||||
value = [[NSString alloc] initWithBytes:(const void *)attributes[j + 3] length:lenValue encoding:NSUTF8StringEncoding];
|
||||
}
|
||||
|
||||
NSString *attributeName = nil;
|
||||
|
||||
if (self.delegateRespondsToInternedStringMethod) {
|
||||
attributeName = [self.delegate saxParser:self internedStringForName:(const xmlChar *)attributes[j] prefix:(const xmlChar *)attributes[j + 1]];
|
||||
}
|
||||
|
||||
if (!attributeName) {
|
||||
attributeName = [NSString stringWithUTF8String:(const char *)attributes[j]];
|
||||
if (attributes[j + 1]) {
|
||||
NSString *attributePrefix = [NSString stringWithUTF8String:(const char *)attributes[j + 1]];
|
||||
attributeName = [NSString stringWithFormat:@"%@:%@", attributePrefix, attributeName];
|
||||
}
|
||||
}
|
||||
|
||||
if (value && attributeName) {
|
||||
d[attributeName] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return d;
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - Equal Tags
|
||||
|
||||
BOOL RSSAXEqualTags(const xmlChar *localName, const char *tag, NSInteger tagLength) {
|
||||
|
||||
if (!localName) {
|
||||
return NO;
|
||||
}
|
||||
return !strncmp((const char *)localName, tag, (size_t)tagLength);
|
||||
}
|
||||
|
||||
|
||||
#pragma mark - Callbacks
|
||||
|
||||
- (void)xmlEndDocument {
|
||||
|
||||
@autoreleasepool {
|
||||
if (self.delegateRespondsToEndOfDocumentMethod) {
|
||||
[self.delegate saxParserDidReachEndOfDocument:self];
|
||||
}
|
||||
|
||||
[self endStoringCharacters];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
- (void)xmlCharactersFound:(const xmlChar *)ch length:(NSUInteger)length {
|
||||
|
||||
if (length < 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
@autoreleasepool {
|
||||
if (self.storingCharacters) {
|
||||
[self.characters appendBytes:(const void *)ch length:length];
|
||||
}
|
||||
|
||||
if (self.delegateRespondsToCharactersFoundMethod) {
|
||||
[self.delegate saxParser:self XMLCharactersFound:ch length:length];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
- (void)xmlStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(int)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(int)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes {
|
||||
|
||||
@autoreleasepool {
|
||||
if (self.delegateRespondsToStartElementMethod) {
|
||||
|
||||
[self.delegate saxParser:self XMLStartElement:localName prefix:prefix uri:uri numberOfNamespaces:numberOfNamespaces namespaces:namespaces numberOfAttributes:numberOfAttributes numberDefaulted:numberDefaulted attributes:attributes];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
- (void)xmlEndElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri {
|
||||
|
||||
@autoreleasepool {
|
||||
if (self.delegateRespondsToEndElementMethod) {
|
||||
[self.delegate saxParser:self XMLEndElement:localName prefix:prefix uri:uri];
|
||||
}
|
||||
|
||||
[self endStoringCharacters];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@end
|
||||
|
||||
|
||||
static void startElementSAX(void *context, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI, int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int nb_defaulted, const xmlChar **attributes) {
|
||||
|
||||
[(__bridge RSSAXParser *)context xmlStartElement:localname prefix:prefix uri:URI numberOfNamespaces:nb_namespaces namespaces:namespaces numberOfAttributes:nb_attributes numberDefaulted:nb_defaulted attributes:attributes];
|
||||
}
|
||||
|
||||
|
||||
static void endElementSAX(void *context, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI) {
|
||||
[(__bridge RSSAXParser *)context xmlEndElement:localname prefix:prefix uri:URI];
|
||||
}
|
||||
|
||||
|
||||
static void charactersFoundSAX(void *context, const xmlChar *ch, int len) {
|
||||
[(__bridge RSSAXParser *)context xmlCharactersFound:ch length:(NSUInteger)len];
|
||||
}
|
||||
|
||||
|
||||
static void endDocumentSAX(void *context) {
|
||||
[(__bridge RSSAXParser *)context xmlEndDocument];
|
||||
}
|
||||
|
||||
|
||||
static xmlSAXHandler saxHandlerStruct = {
|
||||
nil, /* internalSubset */
|
||||
nil, /* isStandalone */
|
||||
nil, /* hasInternalSubset */
|
||||
nil, /* hasExternalSubset */
|
||||
nil, /* resolveEntity */
|
||||
nil, /* getEntity */
|
||||
nil, /* entityDecl */
|
||||
nil, /* notationDecl */
|
||||
nil, /* attributeDecl */
|
||||
nil, /* elementDecl */
|
||||
nil, /* unparsedEntityDecl */
|
||||
nil, /* setDocumentLocator */
|
||||
nil, /* startDocument */
|
||||
endDocumentSAX, /* endDocument */
|
||||
nil, /* startElement*/
|
||||
nil, /* endElement */
|
||||
nil, /* reference */
|
||||
charactersFoundSAX, /* characters */
|
||||
nil, /* ignorableWhitespace */
|
||||
nil, /* processingInstruction */
|
||||
nil, /* comment */
|
||||
nil, /* warning */
|
||||
nil, /* error */
|
||||
nil, /* fatalError //: unused error() get all the errors */
|
||||
nil, /* getParameterEntity */
|
||||
nil, /* cdataBlock */
|
||||
nil, /* externalSubset */
|
||||
XML_SAX2_MAGIC,
|
||||
nil,
|
||||
startElementSAX, /* startElementNs */
|
||||
endElementSAX, /* endElementNs */
|
||||
nil /* serror */
|
||||
};
|
||||
|
||||
|
||||
void RSSAXInitLibXMLParser(void) {
|
||||
|
||||
static dispatch_once_t onceToken;
|
||||
dispatch_once(&onceToken, ^{
|
||||
xmlInitParser();
|
||||
});
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user