Switch to new Parser.

This commit is contained in:
Brent Simmons
2024-11-15 22:59:51 -08:00
parent c3f063ae4a
commit 85d1a8fe7a
79 changed files with 187 additions and 219 deletions

View File

@@ -8,7 +8,7 @@
import Foundation
import Articles
import RSParser
import Parser
struct ArticleStringFormatter {
@@ -66,7 +66,7 @@ struct ArticleStringFormatter {
s = s.replacingOccurrences(of: "\t", with: "")
if !forHTML {
s = s.rsparser_stringByDecodingHTMLEntities()
s = HTMLEntityDecoder.decodedString(s)
}
s = s.trimmingWhitespace
@@ -98,7 +98,7 @@ struct ArticleStringFormatter {
if let cachedBody = summaryCache[key] {
return cachedBody
}
var s = body.rsparser_stringByDecodingHTMLEntities()
var s = HTMLEntityDecoder.decodedString(body)
s = s.strippingHTML(maxCharacters: 250)
s = s.trimmingWhitespace
s = s.collapsingWhitespace

View File

@@ -6,7 +6,7 @@
// Copyright © 2020 Ranchero Software. All rights reserved.
//
import RSParser
import Parser
#if canImport(AppKit)
import AppKit
@@ -310,6 +310,6 @@ private struct CountedSet<Element> where Element: Hashable {
private extension String {
var decodedEntity: String {
// It's possible the implementation will change, but for now it just calls this.
(self as NSString).rsparser_stringByDecodingHTMLEntities() as String
HTMLEntityDecoder.decodedString(self)
}
}

View File

@@ -8,7 +8,7 @@
import Foundation
import CoreServices
import RSParser
import Parser
import UniformTypeIdentifiers
// The favicon URLs may be specified in the head section of the home page.
@@ -20,7 +20,7 @@ struct FaviconURLFinder {
/// - homePageURL: The page to search.
/// - completion: A closure called when the links have been found.
/// - urls: An array of favicon URLs as strings.
static func findFaviconURLs(with homePageURL: String, _ completion: @escaping (_ urls: [String]?) -> Void) {
static func findFaviconURLs(with homePageURL: String, _ completion: @escaping ([String]?) -> Void) {
guard let _ = URL(string: homePageURL) else {
completion(nil)
@@ -29,7 +29,13 @@ struct FaviconURLFinder {
// If the favicon has an explicit type, check that for an ignored type; otherwise, check the file extension.
HTMLMetadataDownloader.downloadMetadata(for: homePageURL) { (htmlMetadata) in
let faviconURLs = htmlMetadata?.favicons.compactMap {
guard let favicons = htmlMetadata?.favicons else {
completion(nil)
return
}
let faviconURLs = favicons.compactMap {
shouldAllowFavicon($0) ? $0.urlString : nil
}
@@ -39,7 +45,7 @@ struct FaviconURLFinder {
private static let ignoredTypes = [UTType.svg]
private static func shouldAllowFavicon(_ favicon: RSHTMLMetadataFavicon) -> Bool {
private static func shouldAllowFavicon(_ favicon: HTMLMetadataFavicon) -> Bool {
// Check mime type.
if let mimeType = favicon.type, let utType = UTType(mimeType: mimeType) {

View File

@@ -8,13 +8,13 @@
import Foundation
import RSWeb
import RSParser
import Parser
struct HTMLMetadataDownloader {
static let serialDispatchQueue = DispatchQueue(label: "HTMLMetadataDownloader")
static func downloadMetadata(for url: String, _ completion: @escaping (RSHTMLMetadata?) -> Void) {
static func downloadMetadata(for url: String, _ completion: @escaping (HTMLMetadata?) -> Void) {
guard let actualURL = URL(string: url) else {
completion(nil)
return
@@ -32,9 +32,9 @@ struct HTMLMetadataDownloader {
}
}
private static func parseMetadata(with parserData: ParserData, _ completion: @escaping (RSHTMLMetadata?) -> Void) {
private static func parseMetadata(with parserData: ParserData, _ completion: @escaping (HTMLMetadata?) -> Void) {
serialDispatchQueue.async {
let htmlMetadata = RSHTMLMetadataParser.htmlMetadata(with: parserData)
let htmlMetadata = HTMLMetadataParser.metadata(with: parserData)
DispatchQueue.main.async {
completion(htmlMetadata)
}

View File

@@ -11,7 +11,7 @@ import Articles
import Account
import RSCore
import RSWeb
import RSParser
import Parser
extension Notification.Name {
@@ -214,7 +214,7 @@ private extension FeedIconDownloader {
}
}
func pullIconURL(from metadata: RSHTMLMetadata, homePageURL: String, feed: Feed) {
func pullIconURL(from metadata: HTMLMetadata, homePageURL: String, feed: Feed) {
if let url = metadata.bestWebsiteIconURL() {
cacheIconURL(for: homePageURL, url)

View File

@@ -7,65 +7,32 @@
//
import Foundation
import RSParser
import Parser
extension RSHTMLMetadata {
func largestOpenGraphImageURL() -> String? {
let openGraphImages = openGraphProperties.images
guard !openGraphImages.isEmpty else {
return nil
}
var bestImage: RSHTMLOpenGraphImage? = nil
for image in openGraphImages {
if image.width / image.height > 2 {
continue
}
if bestImage == nil {
bestImage = image
continue
}
if image.height > bestImage!.height && image.width > bestImage!.width {
bestImage = image
}
}
guard let url = bestImage?.secureURL ?? bestImage?.url else {
return nil
}
// Bad ones we should ignore.
let badURLs = Set(["https://s0.wp.com/i/blank.jpg"])
guard !badURLs.contains(url) else {
return nil
}
return url
}
extension HTMLMetadata {
func largestAppleTouchIcon() -> String? {
let icons = appleTouchIcons
guard !icons.isEmpty else {
guard let icons = appleTouchIcons, !icons.isEmpty else {
return nil
}
var bestImage: RSHTMLMetadataAppleTouchIcon? = nil
var bestImage: HTMLMetadataAppleTouchIcon? = nil
for image in icons {
if image.size.width / image.size.height > 2 {
continue
if let size = image.size {
if size.width / size.height > 2 {
continue
}
}
if bestImage == nil {
bestImage = image
continue
}
if image.size.height > bestImage!.size.height && image.size.width > bestImage!.size.width {
bestImage = image;
if let size = image.size, let bestImageSize = bestImage!.size {
if size.height > bestImageSize.height && size.width > bestImageSize.width {
bestImage = image;
}
}
}
@@ -80,19 +47,10 @@ extension RSHTMLMetadata {
return appleTouchIcon
}
if let openGraphImageURL = largestOpenGraphImageURL() {
if let openGraphImageURL = openGraphProperties?.image?.url {
return openGraphImageURL
}
return twitterProperties.imageURL
}
func bestFeaturedImageURL() -> String? {
if let openGraphImageURL = largestOpenGraphImageURL() {
return openGraphImageURL
}
return twitterProperties.imageURL
return twitterProperties?.imageURL
}
}

View File

@@ -9,7 +9,7 @@
import Foundation
import os.log
import RSCore
import RSParser
import Parser
import Account
final class ExtensionContainersFile {