diff --git a/Modules/Parser/Sources/HTMLParser/HTMLMetadata.swift b/Modules/Parser/Sources/HTMLParser/HTMLMetadata.swift
new file mode 100644
index 000000000..0e36b93d7
--- /dev/null
+++ b/Modules/Parser/Sources/HTMLParser/HTMLMetadata.swift
@@ -0,0 +1,392 @@
+//
+// HTMLMetadata.swift
+//
+//
+// Created by Brent Simmons on 9/22/24.
+//
+
+import Foundation
+import SAX
+
+public final class HTMLMetadata {
+
+ public let baseURLString: String
+ public let tags: [HTMLTag]
+ public let favicons: [HTMLMetadataFavicon]?
+ public let appleTouchIcons: [HTMLMetadataAppleTouchIcon]?
+ public let feedLinks: [HTMLMetadataFeedLink]?
+ public let openGraphProperties: HTMLOpenGraphProperties?
+ public let twitterProperties: HTMLTwitterProperties?
+
+ init(_ urlString: String, _ tags: [HTMLTag]) {
+
+ self.baseURLString = urlString
+ self.tags = tags
+
+ self.favicons = Self.resolvedFaviconLinks(urlString, tags)
+
+ if let appleTouchIconTags = Self.appleTouchIconTags(tags) {
+ self.appleTouchIcons = appleTouchIconTags.map { htmlTag in
+ HTMLMetadataAppleTouchIcon(urlString, htmlTag)
+ }
+ }
+ else {
+ self.appleTouchIcons = nil
+ }
+
+ if let feedLinkTags = Self.feedLinkTags(tags) {
+ self.feedLinks = feedLinkTags.map { htmlTag in
+ HTMLMetadataFeedLink(urlString, htmlTag)
+ }
+ }
+ else {
+ self.feedLinks = nil
+ }
+
+ self.openGraphProperties = HTMLOpenGraphProperties(urlString, tags)
+ self.twitterProperties = HTMLTwitterProperties(urlString, tags)
+ }
+
+ static func resolvedFaviconLinks(_ baseURLString: String, _ tags: [HTMLTag]) -> [HTMLMetadataFavicon]? {
+
+ let linkTags = linkTagsWithMatchingRel("icon")
+ var seenHrefs = [String]()
+
+ let favicons = linkTags.compactMap { htmlTag in
+
+ let favicon = HTMLMetadataFavicon(baseURLString, htmlTag)
+ guard let urlString = favicon.urlString else {
+ return nil
+ }
+ guard !seenHrefs.contains(urlString) else {
+ return nil
+ }
+ seenHrefs.append(urlString)
+ return favicon
+ }
+
+ return favicons.isEmpty ? nil : favicons
+ }
+
+ static func appleTouchIconTags(_ tags: [HTMLTag]) -> [HTMLTag]? {
+
+ guard let linkTags = linkTags(tags) else {
+ return nil
+ }
+
+ let appleTouchIconTags = tagsMatchingRelValues(["apple-touch-icon", "apple-touch-icon-precomposed"], tags)
+ return appleTouchIconTags.isEmpty ? nil : appleTouchIconTags
+ }
+
+ static func feedLinkTags(_ tags: [HTMLTag]) -> [HTMLTag]? {
+
+ let alternateLinkTags = linkTagsWithMatchingRel("alternate", tags) else {
+ return nil
+ }
+
+ let feedLinkTags = alternateLinkTags.filter { tag in
+
+ guard let attributes = tag.attributes, let type = attributes.object(forCaseInsensitiveKey: "type"), typeIsFeedType(type) else {
+ return false
+ }
+ guard let urlString = urlString(from: attributes), !urlString.isEmpty else {
+ return false
+ }
+
+ return true
+ }
+
+ return feedLinkTags.isEmpty ? nil : feedLinkTags
+ }
+
+ static func typeIsFeedType(_ type: String) -> Bool {
+
+ let lowerType = type.lowercased()
+ return lowerType.hasSuffix("/rss+xml") || lowerType.hasSuffix("/atom+xml") || lowerType.hasSuffix("/json")
+ }
+
+ static func linkTags(_ tags: [HTMLTag]) -> [HTMLTag]? {
+
+ let linkTags = tags.filter { $0.tagType == .link }
+ return linkTags.isEmpty ? nil : linkTags
+ }
+
+ static func linkTagsWithMatchingRel(_ valueToMatch: String, _ tags: [HTMLTag]) -> [HTMLTag]? {
+
+ // Case-insensitive; matches a whitespace-delimited word
+
+ guard let linkTags = linkTags(tags) else {
+ return nil
+ }
+
+ let tagsWithURLString = linkTags.filter { tag in
+ guard let urlString = urlStringFromDictionary(tag.attributes), !urlString.isEmpty else {
+ return false
+ }
+ return true
+ }
+ if tagsWithURLString.isEmpty {
+ return nil
+ }
+
+ let matchingTags = tagsMatchingRelValues([valueToMatch], tagsWithURLString)
+ return matchingTags.isEmpty ? nil : matchingTags
+ }
+
+ static func tagsMatchingRelValues(_ valuesToMatch: [String], _ tags: [HTMLTag]) -> [HTMLTag]? {
+
+ let lowerValuesToMatch = valuesToMatch.map { $0.lowercased() }
+
+ let matchingTags: [HTMLTag] = {
+
+ tags.filter { tag in
+
+ guard let relValue = relValue(tag.attributes) else {
+ return false
+ }
+
+ let relValues = relValue.componentsSeparatedByCharactersInSet(.whitespacesAndNewlines)
+ for oneRelValue in relValues {
+ let oneLowerRelValue = oneRelValue.lowercased()
+
+ for lowerValueToMatch in lowerValuesToMatch {
+ if lowerValueToMatch == oneLowerRelValue {
+ return true
+ }
+ }
+ }
+
+ return false
+ }
+ }
+
+ return matchingTags.isEmpty ? nil : matchingTags
+ }
+}
+
+public final class HTMLMetadataAppleTouchIcon {
+
+ public let rel: String?
+ public let sizes: String?
+ public let size: CGSize?
+ public let urlString: String? // Absolute
+
+ init(_ urlString: String, _ tag: HTMLTag) {
+
+ guard let attributes = tag.attributes else {
+ self.rel = nil
+ self.sizes = nil
+ self.size = nil
+ self.urlString = nil
+ return
+ }
+
+ self.rel = attributes.object(forCaseInsensitiveKey: "rel")
+ self.urlString = absoluteURLStringWithDictionary(attributes)
+
+ guard let sizes = attributes.object(forCaseInsensitiveKey: "sizes") else {
+ self.sizes = nil
+ self.size = nil
+ return
+ }
+ self.sizes = sizes
+
+ let size: CGSize? = {
+ let sizeComponents = sizes.components(separatedBy: CharacterSet(charactersIn: "x"))
+ guard sizeComponents.count == 2 else {
+ return nil
+ }
+ let width = Double(sizeComponents[0])
+ let height = Double(sizeComponents[1])
+ return CGSize(width: width, height: height)
+ }()
+
+ self.size = size
+ }
+}
+
+public final class HTMLMetadataFeedLink {
+
+ public let title: String?
+ public let type: String?
+ public let urlString: String? // Absolute
+
+ init(_ urlString: String, _ tag: HTMLTag) {
+
+ guard let attributes = tag.attributes else {
+ self.title = nil
+ self.type = nil
+ self.urlString = nil
+ return
+ }
+
+ self.urlString = absoluteURLStringWithDictionary(attributes, baseURLString)
+ self.title = attributes.object(forCaseInsensitiveKey: "title")
+ self.type = attributes.object(forCaseInsensitiveKey: "type")
+ }
+}
+
+public final class HTMLMetadataFavicon {
+
+ public let type: String?
+ public let urlString: String?
+
+ init(_ urlString: String, _ tag: HTMLTag) {
+
+ guard let attributes = tag.attributes else {
+ self.type = nil
+ self.urlString = nil
+ return
+ }
+
+ self.urlString = absoluteURLStringWithDictionary(attributes, baseURLString)
+ self.type = attributes.object(forCaseInsensitiveKey: "type")
+ }
+}
+
+public final class HTMLOpenGraphProperties {
+
+ // TODO: the rest. At this writing (Nov. 26, 2017) I just care about og:image.
+ // See http://ogp.me/
+
+ public let image: HTMLOpenGraphImage?
+
+ init(_ urlString: String, _ tags: [HTMLTag]) {
+
+ self.image = Self.parse(tags)
+ }
+}
+
+private extension HTMLOpenGraphProperties {
+
+ private static let ogPrefix = "og:"
+
+ struct OGKey {
+ static let property = "property"
+ static let content = "content"
+ }
+
+ struct OGValue {
+ static let ogImage = "og:image"
+ static let ogImageURL = "og:image:url"
+ static let ogImageSecureURL = "og:image:secure_url"
+ static let ogImageType = "og:image:type"
+ static let ogImageAlt = "og:image:alt"
+ static let ogImageWidth = "og:image:width"
+ static let ogImageHeight = "og:image:height"
+ }
+
+ static func parse(_ tags: [HTMLTag]) -> [HTMLOpenGraphImage]? {
+
+ let metaTags = tags.filter { $0.tagType == .meta }
+ if metaTags.isEmpty {
+ return nil
+ }
+
+ // HTMLOpenGraphImage properties to fill in.
+ var url: String?
+ var secureURL: String?
+ var mimeType: String?
+ var width: CGFloat?
+ var height: CGFloat?
+ var altText: String?
+
+ for tag in metaTags {
+
+ guard let attributes = tag.attributes else {
+ continue
+ }
+ guard let propertyName = attributes[OGKey.property], propertyName.hasPrefix(ogPrefix) else {
+ continue
+ }
+ guard let content = attributes[OGKey.content] else {
+ continue
+ }
+
+ if propertyName == OGValue.ogImage {
+ url = content
+ }
+ else if propertyName == OGValue.ogImageURL {
+ url = content
+ }
+ else if propertyName == OGValue.ogImageSecureURL {
+ secureURL = content
+ }
+ else if propertyName == OGValue.ogImageType {
+ mimeType = content
+ }
+ else if propertyName == OGValue.ogImageAlt {
+ altText = content
+ }
+ else if propertyName == OGValue.ogImageWidth {
+ width = CGFloat(content)
+ }
+ else if propertyName == OGValue.ogImageHeight {
+ height = CGFloat(content)
+ }
+ }
+
+ if url == nil && secureURL == nil && mimeType == nil && width == nil && height == nil && altText == nil {
+ return nil
+ }
+
+ return HTMLOpenGraphImage(url: url, secureURL: secureURL, mimeType: mimeType, width: width, height: height, altText: altText)
+ }
+}
+
+public final class HTMLOpenGraphImage {
+
+ public let url : String?
+ public let secureURL: String?
+ public let mimeType: String?
+ public let width: CGFloat?
+ public let height: CGFloat?
+ public let altText: String?
+
+ init(url: String?, secureURL: String?, mimeType: String, width: CGFloat?, height: CGFloat?, altText: String?) {
+
+ self.url = url
+ self.secureURL = secureURL
+ self.mimeType = mimeType
+ self.width = width
+ self.height = height
+ self.altText = altText
+ }
+}
+
+public final class HTMLTwitterProperties {
+
+ public let imageURL: String? // twitter:image:src
+
+ private struct TwitterKey {
+ static let name = "name"
+ static let content = "content"
+ }
+
+ private struct TwitterValue {
+ static let imageSrc = "twitter:image:src"
+ }
+
+ init(_ urlString: String, _ tags: [HTMLTag]) {
+
+ let imageURL: String = {
+ for tag in tags {
+ guard tag.tagType == .meta else {
+ continue
+ }
+ guard let name = tag.attributes?[TwitterKey.name], name == TwitterValue.imageSrc else {
+ continue
+ }
+ guard let content = tag.attributes?[TwitterKey.content], !content.isEmpty else {
+ continue
+ }
+ return content
+ }
+
+ return nil
+ }()
+
+ self.imageURL = imageURL
+ }
+}
+
diff --git a/Modules/Parser/Sources/SAX/HTMLTag.swift b/Modules/Parser/Sources/HTMLParser/HTMLTag.swift
similarity index 63%
rename from Modules/Parser/Sources/SAX/HTMLTag.swift
rename to Modules/Parser/Sources/HTMLParser/HTMLTag.swift
index 1333d9cff..e0bcfad5e 100644
--- a/Modules/Parser/Sources/SAX/HTMLTag.swift
+++ b/Modules/Parser/Sources/HTMLParser/HTMLTag.swift
@@ -7,6 +7,8 @@
import Foundation
+public typealias HTMLTagAttributes = [String: String]
+
public struct HTMLTag: Sendable {
public enum TagType: Sendable {
@@ -15,9 +17,9 @@ public struct HTMLTag: Sendable {
}
public let tagType: TagType
- public let attributes: [String: String]?
+ public let attributes: HTMLTagAttributes?
- public init(tagType: TagType, attributes: [String : String]?) {
+ public init(tagType: TagType, attributes: HTMLTagAttributes?) {
self.tagType = tagType
self.attributes = attributes
}
diff --git a/Modules/Parser/Tests/ParserTests/HTMLLinkTests.swift b/Modules/Parser/Tests/ParserTests/HTMLLinkTests.swift
index c179b8137..ac3c6f362 100644
--- a/Modules/Parser/Tests/ParserTests/HTMLLinkTests.swift
+++ b/Modules/Parser/Tests/ParserTests/HTMLLinkTests.swift
@@ -8,7 +8,6 @@
import XCTest
import HTMLParser
-import SAX
import libxml2
class HTMLLinkTests: XCTestCase {