mirror of
https://github.com/Ranchero-Software/NetNewsWire
synced 2025-08-12 06:26:36 +00:00
Create separate SAX target.
This commit is contained in:
19
Modules/Parser/Sources/SAX/ParserData.swift
Normal file
19
Modules/Parser/Sources/SAX/ParserData.swift
Normal file
@@ -0,0 +1,19 @@
|
||||
//
|
||||
// ParserData.swift
|
||||
//
|
||||
//
|
||||
// Created by Brent Simmons on 8/18/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
public struct ParserData: Sendable {
|
||||
|
||||
let url: String
|
||||
let data: Data
|
||||
|
||||
public init(url: String, data: Data) {
|
||||
self.url = url
|
||||
self.data = data
|
||||
}
|
||||
}
|
||||
54
Modules/Parser/Sources/SAX/SAXHTMLParser.swift
Normal file
54
Modules/Parser/Sources/SAX/SAXHTMLParser.swift
Normal file
@@ -0,0 +1,54 @@
|
||||
////
|
||||
//// SAXHTMLParser.swift
|
||||
////
|
||||
////
|
||||
//// Created by Brent Simmons on 8/26/24.
|
||||
////
|
||||
//
|
||||
//import Foundation
|
||||
//import libxml2
|
||||
//
|
||||
//protocol SAXHTMLParserDelegate: AnyObject {
|
||||
//
|
||||
// func saxParser(_: SAXHTMLParser, XMLStartElement localName: XMLPointer, attributes: UnsafePointer<XMLPointer?>?)
|
||||
//
|
||||
// func saxParser(_: SAXHTMLParser, XMLEndElement localName: XMLPointer?)
|
||||
//
|
||||
// // Length is guaranteed to be greater than 0.
|
||||
// func saxParser(_: SAXHTMLParser, XMLCharactersFound characters: XMLPointer?, length: Int)
|
||||
//}
|
||||
//
|
||||
//final class SAXHTMLParser {
|
||||
//
|
||||
// fileprivate let delegate: SAXHTMLParserDelegate
|
||||
// private var data: Data
|
||||
//
|
||||
// init(delegate: SAXHTMLParserDelegate, data: Data) {
|
||||
//
|
||||
// self.delegate = delegate
|
||||
// self.data = data
|
||||
// }
|
||||
//
|
||||
// func parse() {
|
||||
//
|
||||
// guard !data.isEmpty else {
|
||||
// return
|
||||
// }
|
||||
//
|
||||
// data.withUnsafeBytes { bufferPointer in
|
||||
//
|
||||
// guard let bytes = bufferPointer.bindMemory(to: xmlChar.self).baseAddress else {
|
||||
// return
|
||||
// }
|
||||
//
|
||||
// let characterEncoding = xmlDetectCharEncoding(bytes, Int32(data.count))
|
||||
// let context = htmlCreatePushParserCtxt(&saxHandlerStruct, Unmanaged.passUnretained(self).toOpaque(), nil, 0, nil, characterEncoding)
|
||||
// htmlCtxtUseOptions(context, Int32(XML_PARSE_RECOVER | XML_PARSE_NONET | HTML_PARSE_COMPACT))
|
||||
//
|
||||
// htmlParseChunk(context, bytes, Int32(data.count), 0)
|
||||
//
|
||||
// htmlParseChunk(context, nil, 0, 1)
|
||||
// htmlFreeParserCtxt(context)
|
||||
// }
|
||||
// }
|
||||
//}
|
||||
202
Modules/Parser/Sources/SAX/SAXParser.swift
Normal file
202
Modules/Parser/Sources/SAX/SAXParser.swift
Normal file
@@ -0,0 +1,202 @@
|
||||
//
|
||||
// SAXParser.swift.
|
||||
//
|
||||
//
|
||||
// Created by Brent Simmons on 8/12/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import libxml2
|
||||
|
||||
typealias XMLPointer = UnsafePointer<xmlChar>
|
||||
|
||||
protocol SAXParserDelegate {
|
||||
|
||||
func saxParser(_: SAXParser, xmlStartElement: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafePointer<XMLPointer?>?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafePointer<XMLPointer?>?)
|
||||
|
||||
func saxParser(_: SAXParser, xmlEndElement: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?)
|
||||
|
||||
func saxParser(_: SAXParser, xmlCharactersFound: XMLPointer, count: Int)
|
||||
}
|
||||
|
||||
final class SAXParser {
|
||||
|
||||
fileprivate let delegate: SAXParserDelegate
|
||||
|
||||
var currentCharacters: Data? { // UTF-8 encoded
|
||||
|
||||
guard storingCharacters else {
|
||||
return nil
|
||||
}
|
||||
return characters
|
||||
}
|
||||
|
||||
// Conveniences to get string version of currentCharacters
|
||||
|
||||
var currentString: String? {
|
||||
|
||||
guard let d = currentCharacters, !d.isEmpty else {
|
||||
return nil
|
||||
}
|
||||
return String(data: d, encoding: .utf8)
|
||||
}
|
||||
|
||||
var currentStringWithTrimmedWhitespace: String? {
|
||||
|
||||
guard let s = currentString else {
|
||||
return nil
|
||||
}
|
||||
return s.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines)
|
||||
}
|
||||
|
||||
private var data: Data
|
||||
private var storingCharacters = false
|
||||
private var characters = Data()
|
||||
|
||||
init(delegate: SAXParserDelegate, data: Data) {
|
||||
|
||||
self.delegate = delegate
|
||||
self.data = data
|
||||
}
|
||||
|
||||
func parse() {
|
||||
|
||||
guard !data.isEmpty else {
|
||||
return
|
||||
}
|
||||
|
||||
let context = xmlCreatePushParserCtxt(&saxHandlerStruct, Unmanaged.passUnretained(self).toOpaque(), nil, 0, nil)
|
||||
xmlCtxtUseOptions(context, Int32(XML_PARSE_RECOVER.rawValue | XML_PARSE_NOENT.rawValue))
|
||||
|
||||
data.withUnsafeBytes { bufferPointer in
|
||||
if let bytes = bufferPointer.bindMemory(to: xmlChar.self).baseAddress {
|
||||
xmlParseChunk(context, bytes, Int32(data.count), 0)
|
||||
}
|
||||
}
|
||||
|
||||
xmlParseChunk(context, nil, 0, 1)
|
||||
xmlFreeParserCtxt(context)
|
||||
}
|
||||
|
||||
/// Delegate can call from xmlStartElement. Characters will be available in xmlEndElement as currentCharacters property. Storing characters is stopped after each xmlEndElement.
|
||||
func beginStoringCharacters() {
|
||||
|
||||
storingCharacters = true
|
||||
characters.count = 0
|
||||
}
|
||||
|
||||
func endStoringCharacters() {
|
||||
|
||||
storingCharacters = false
|
||||
characters.count = 0
|
||||
}
|
||||
|
||||
func attributesDictionary(_ attributes: UnsafePointer<XMLPointer?>?, attributeCount: Int) -> [String: String]? {
|
||||
|
||||
guard attributeCount > 0, let attributes else {
|
||||
return nil
|
||||
}
|
||||
|
||||
var dictionary = [String: String]()
|
||||
|
||||
let fieldCount = 5
|
||||
var i = 0, j = 0
|
||||
while i < attributeCount {
|
||||
|
||||
guard let attribute = attributes[j] else {
|
||||
continue
|
||||
}
|
||||
let prefix = attributes[j + 1]
|
||||
var attributeName = String(cString: attribute)
|
||||
if let prefix {
|
||||
let attributePrefix = String(cString: prefix)
|
||||
attributeName = "\(attributePrefix):\(attributeName)"
|
||||
}
|
||||
|
||||
guard let valueStart = attributes[j + 3], let valueEnd = attributes[j + 4] else {
|
||||
continue
|
||||
}
|
||||
let valueCount = valueEnd - valueStart
|
||||
let value = String(bytes: UnsafeRawBufferPointer(start: valueStart, count: Int(valueCount)), encoding: .utf8)
|
||||
|
||||
if let value {
|
||||
dictionary[attributeName] = value
|
||||
}
|
||||
|
||||
i += 1
|
||||
j += fieldCount
|
||||
}
|
||||
|
||||
return dictionary
|
||||
}
|
||||
}
|
||||
|
||||
private extension SAXParser {
|
||||
|
||||
func charactersFound(_ xmlCharacters: XMLPointer, count: Int) {
|
||||
|
||||
if storingCharacters {
|
||||
characters.append(xmlCharacters, count: count)
|
||||
}
|
||||
|
||||
delegate.saxParser(self, xmlCharactersFound: xmlCharacters, count: count)
|
||||
}
|
||||
|
||||
func startElement(_ name: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?, namespaceCount: Int, namespaces: UnsafePointer<XMLPointer?>?, attributeCount: Int, attributesDefaultedCount: Int, attributes: UnsafePointer<XMLPointer?>?) {
|
||||
|
||||
delegate.saxParser(self, xmlStartElement: name, prefix: prefix, uri: uri, namespaceCount: namespaceCount, namespaces: namespaces, attributeCount: attributeCount, attributesDefaultedCount: attributesDefaultedCount, attributes: attributes)
|
||||
}
|
||||
|
||||
func endElement(_ name: XMLPointer, prefix: XMLPointer?, uri: XMLPointer?) {
|
||||
|
||||
delegate.saxParser(self, xmlEndElement: name, prefix: prefix, uri: uri)
|
||||
endStoringCharacters()
|
||||
}
|
||||
}
|
||||
|
||||
private func startElement(_ context: UnsafeMutableRawPointer?, name: XMLPointer?, prefix: XMLPointer?, URI: XMLPointer?, nb_namespaces: CInt, namespaces: UnsafePointer<XMLPointer?>?, nb_attributes: CInt, nb_defaulted: CInt, attributes: UnsafeMutablePointer<XMLPointer?>?) {
|
||||
|
||||
guard let context, let name else {
|
||||
return
|
||||
}
|
||||
|
||||
let parser = parser(from: context)
|
||||
parser.startElement(name, prefix: prefix, uri: URI, namespaceCount: Int(nb_namespaces), namespaces: namespaces, attributeCount: Int(nb_attributes), attributesDefaultedCount: Int(nb_defaulted), attributes: attributes)
|
||||
}
|
||||
|
||||
private func endElement(_ context: UnsafeMutableRawPointer?, name: XMLPointer?, prefix: XMLPointer?, URI: XMLPointer?) {
|
||||
|
||||
guard let context, let name else {
|
||||
return
|
||||
}
|
||||
|
||||
let parser = parser(from: context)
|
||||
parser.endElement(name, prefix: prefix, uri: URI)
|
||||
}
|
||||
|
||||
private func charactersFound(_ context: UnsafeMutableRawPointer?, ch: XMLPointer?, len: CInt) {
|
||||
|
||||
guard let context, let ch, len > 0 else {
|
||||
return
|
||||
}
|
||||
|
||||
let parser = parser(from: context)
|
||||
parser.charactersFound(ch, count: Int(len))
|
||||
}
|
||||
|
||||
private func parser(from context: UnsafeMutableRawPointer) -> SAXParser {
|
||||
|
||||
Unmanaged<SAXParser>.fromOpaque(context).takeUnretainedValue()
|
||||
}
|
||||
|
||||
nonisolated(unsafe) private var saxHandlerStruct: xmlSAXHandler = {
|
||||
|
||||
var handler = xmlSAXHandler()
|
||||
|
||||
handler.characters = charactersFound
|
||||
handler.startElement = startElement
|
||||
handler.endElement = endElement
|
||||
|
||||
return handler
|
||||
}()
|
||||
|
||||
18
Modules/Parser/Sources/SAX/SAXUtilities.swift
Normal file
18
Modules/Parser/Sources/SAX/SAXUtilities.swift
Normal file
@@ -0,0 +1,18 @@
|
||||
//
|
||||
// File.swift
|
||||
//
|
||||
//
|
||||
// Created by Brent Simmons on 8/26/24.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import libxml2
|
||||
|
||||
func SAXEqualStrings(_ s1: XMLPointer, _ s2: XMLPointer, length: Int? = nil) -> Bool {
|
||||
|
||||
if let length {
|
||||
return xmlStrncmp(s1, s2, Int32(length)) == 0
|
||||
}
|
||||
|
||||
return xmlStrEqual(s1, s2) != 0
|
||||
}
|
||||
Reference in New Issue
Block a user