diff --git a/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift b/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift index 368a7588a..edc154c8d 100644 --- a/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift +++ b/Modules/Parser/Sources/FeedParser/Feeds/XML/AtomParser.swift @@ -81,7 +81,7 @@ private extension AtomParser { func addFeedLanguage() { } - + func addArticle() { let article = RSSArticle(feedURL) articles.append(article) @@ -94,11 +94,7 @@ private extension AtomParser { return } - let name: String? = { - let data = Data(bytes: localName, count: strlen(localName)) - return String(data: data, encoding: .utf8) - }() - guard let name else { + guard let name = String(xmlPointer: localName) else { assertionFailure("Unexpected failure converting XMLPointer to String in addXHTMLTag.") return } @@ -190,7 +186,20 @@ extension AtomParser: SAXParserDelegate { public func saxParser(_ saxParser: SAXParser, xmlCharactersFound: XMLPointer, count: Int) { - // Required method. + guard parsingXHTML else { + return + } + guard var s = String(xmlPointer: xmlCharactersFound, count: count) else { + return + } + + // libxml decodes all entities; we need to re-encode certain characters + // (<, >, and &) when inside XHTML text content. + s = s.replacingOccurrences(of: "<", with: "&;lt;") + s = s.replacingOccurrences(of: ">", with: "&;gt;") + s = s.replacingOccurrences(of: "&", with: "&") + + xhtmlString = s } } diff --git a/Modules/Parser/Sources/SAX/SAXUtilities.swift b/Modules/Parser/Sources/SAX/SAXUtilities.swift index 10ba86f0a..3bb680e89 100644 --- a/Modules/Parser/Sources/SAX/SAXUtilities.swift +++ b/Modules/Parser/Sources/SAX/SAXUtilities.swift @@ -31,3 +31,11 @@ public func SAXEqualTags(_ localName: XMLPointer, _ tag: ContiguousArray) return localName[tagCount - 1] == 0 } } + +public extension String { + + init?(xmlPointer: XMLPointer, count: Int? = nil) { + let d = Data(bytes: xmlPointer, count: count ?? strlen(xmlPointer)) + self.init(data: d, encoding: .utf8) + } +}