From b09250f7dc072daa5072d0099705a7ae102c5639 Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sat, 14 Sep 2024 10:59:55 -0700 Subject: [PATCH] Continue progress on AtomParser. --- .../FeedParser/Feeds/XML/AtomParser.swift | 23 +++++++++++++------ Modules/Parser/Sources/SAX/SAXUtilities.swift | 8 +++++++ 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift index 368a7588a..edc154c8d 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift @@ -81,7 +81,7 @@ private extension AtomParser { func addFeedLanguage() { } - + func addArticle() { let article = RSSArticle(feedURL) articles.append(article) @@ -94,11 +94,7 @@ private extension AtomParser { return } - let name: String? = { - let data = Data(bytes: localName, count: strlen(localName)) - return String(data: data, encoding: .utf8) - }() - guard let name else { + guard let name = String(xmlPointer: localName) else { assertionFailure("Unexpected failure converting XMLPointer to String in addXHTMLTag.") return } @@ -190,7 +186,20 @@ extension AtomParser: SAXParserDelegate { public func saxParser(_ saxParser: SAXParser, xmlCharactersFound: XMLPointer, count: Int) { - // Required method. + guard parsingXHTML else { + return + } + guard var s = String(xmlPointer: xmlCharactersFound, count: count) else { + return + } + + // libxml decodes all entities; we need to re-encode certain characters + // (<, >, and &) when inside XHTML text content. + s = s.replacingOccurrences(of: "<", with: "&;lt;") + s = s.replacingOccurrences(of: ">", with: "&;gt;") + s = s.replacingOccurrences(of: "&", with: "&") + + xhtmlString = s } } diff --git a/Modules/Parser/Sources/SAX/SAXUtilities.swift b/Modules/Parser/Sources/SAX/SAXUtilities.swift index 10ba86f0a..3bb680e89 100644 --- a/Modules/Parser/Sources/SAX/SAXUtilities.swift +++ b/Modules/Parser/Sources/SAX/SAXUtilities.swift @@ -31,3 +31,11 @@ public func SAXEqualTags(_ localName: XMLPointer, _ tag: ContiguousArray) return localName[tagCount - 1] == 0 } } + +public extension String { + + init?(xmlPointer: XMLPointer, count: Int? = nil) { + let d = Data(bytes: xmlPointer, count: count ?? strlen(xmlPointer)) + self.init(data: d, encoding: .utf8) + } +}