From 9b508e068b68536fe16de4e8ee11ba82855494f6 Mon Sep 17 00:00:00 2001 From: Stuart Breckenridge Date: Wed, 1 Jan 2025 11:48:27 +0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=A9=B9=20Change=20regex=20pattern=20to=20?= =?UTF-8?q?use=20[\\s\\S]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 📝 Code commentary updated to reflect change to `[\\s\\S]`. ✅ Tested on both default and Beehiiv feeds. --- .../Sources/RSCore/Shared/String+RSCore.swift | 21 ++++++------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/RSCore/Sources/RSCore/Shared/String+RSCore.swift b/RSCore/Sources/RSCore/Shared/String+RSCore.swift index 9b4dc32cc..237a268d0 100644 --- a/RSCore/Sources/RSCore/Shared/String+RSCore.swift +++ b/RSCore/Sources/RSCore/Shared/String+RSCore.swift @@ -189,37 +189,28 @@ public extension String { /// Removes an HTML tag and everything between its start and end tags. /// - /// The regex pattern `.*?` explanation: + /// The regex pattern `[\\s\\S]*?` explanation: /// - `<` matches the literal `<` character. /// - `tag` matches the literal parameter provided to the function, e.g., `style`. /// - `>` matches the literal `>` character. - /// - `.*?` - /// - `.` matches _any_ character **except** a new line + /// - `[\\s\\S]*?` + /// - `[\\s\\S]` matches _any_ character, including new lines. /// - `*` will match zero or more of the preceeding character, in this case _any_ - /// character + /// character. /// - `?` switches the matching mode to [lazy](https://javascript.info/regexp-greedy-and-lazy) /// so it will match as few as characters as possible before satisfying the rest of the pattern. /// - `<` matches the literal `<` character. /// - `/` matches the literal `/` character. - /// - `tag` matches the literal parameter provided to the function, e.g., `style` + /// - `tag` matches the literal parameter provided to the function, e.g., `style`. /// - `>` matches the literal `>` character. /// - /// /// - Parameter tag: The tag to remove. /// /// - Returns: A new copy of `self` with the tag removed. /// /// - Note: Doesn't work correctly with nested tags of the same name. private func removingTagAndContents(_ tag: String) -> String { - let pattern = "<\(tag)>.*?<\\/\(tag)>" - if let regex = try? NSRegularExpression(pattern: pattern, options: [.dotMatchesLineSeparators, .caseInsensitive]) { - let range = NSRange(location: 0, length: self.utf16.count) - let modifiedString = regex.stringByReplacingMatches(in: self, options: [], range: range, withTemplate: "") - return modifiedString - } else { - // If the above regex fails, fall back to the original method. - return self.replacingOccurrences(of: "<\(tag).+?", with: "", options: [.regularExpression, .caseInsensitive]) - } + return self.replacingOccurrences(of: "<\(tag)>[\\s\\S]*?", with: "", options: [.regularExpression, .caseInsensitive]) } /// Strips HTML from a string.