Parse Open Graph images when parsing metadata from an HTML page.

This commit is contained in:
Brent Simmons
2017-11-26 11:38:03 -08:00
parent e08acc9837
commit ff7695c290
9 changed files with 2644 additions and 53 deletions

View File

@@ -8,11 +8,12 @@
#import <RSParser/RSHTMLMetadata.h>
#import <RSParser/RSParserInternal.h>
#import <RSParser/RSHTMLTag.h>
static NSString *urlStringFromDictionary(NSDictionary *d);
static NSString *absoluteURLStringWithRelativeURLString(NSString *relativeURLString, NSString *baseURLString);
static NSString *absoluteURLStringWithDictionary(NSDictionary *d, NSString *baseURLString);
static NSArray *objectsOfClassWithDictionaries(Class class, NSArray *dictionaries, NSString *baseURLString);
static NSArray *objectsOfClassWithTags(Class class, NSArray *tags, NSString *baseURLString);
static NSString *relValue(NSDictionary *d);
static BOOL typeIsFeedType(NSString *type);
@@ -33,24 +34,23 @@ static NSString *kTypeKey = @"type";
@interface RSHTMLMetadataAppleTouchIcon ()
- (instancetype)initWithDictionary:(NSDictionary *)d baseURLString:(NSString *)baseURLString;
- (instancetype)initWithTag:(RSHTMLTag *)tag baseURLString:(NSString *)baseURLString;
@end
@interface RSHTMLMetadataFeedLink ()
- (instancetype)initWithDictionary:(NSDictionary *)d baseURLString:(NSString *)baseURLString;
- (instancetype)initWithTag:(RSHTMLTag *)tag baseURLString:(NSString *)baseURLString;
@end
@implementation RSHTMLMetadata
#pragma mark - Init
- (instancetype)initWithURLString:(NSString *)urlString dictionaries:(NSArray <NSDictionary *> *)dictionaries {
- (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray <RSHTMLTag *> *)tags {
self = [super init];
if (!self) {
@@ -58,17 +58,19 @@ static NSString *kTypeKey = @"type";
}
_baseURLString = urlString;
_dictionaries = dictionaries;
_faviconLink = [self resolvedLinkFromFirstDictionaryWithMatchingRel:kShortcutIconRelValue];
_tags = tags;
_faviconLink = [self resolvedLinkFromFirstLinkTagWithMatchingRel:kShortcutIconRelValue];
if (_faviconLink == nil) {
_faviconLink = [self resolvedLinkFromFirstDictionaryWithMatchingRel:kIconRelValue];
_faviconLink = [self resolvedLinkFromFirstLinkTagWithMatchingRel:kIconRelValue];
}
NSArray *appleTouchIconDictionaries = [self appleTouchIconDictionaries];
_appleTouchIcons = objectsOfClassWithDictionaries([RSHTMLMetadataAppleTouchIcon class], appleTouchIconDictionaries, urlString);
NSArray *appleTouchIconTags = [self appleTouchIconTags];
_appleTouchIcons = objectsOfClassWithTags([RSHTMLMetadataAppleTouchIcon class], appleTouchIconTags, urlString);
NSArray *feedLinkDictionaries = [self feedLinkDictionaries];
_feedLinks = objectsOfClassWithDictionaries([RSHTMLMetadataFeedLink class], feedLinkDictionaries, urlString);
NSArray *feedLinkTags = [self feedLinkTags];
_feedLinks = objectsOfClassWithTags([RSHTMLMetadataFeedLink class], feedLinkTags, urlString);
_openGraphProperties = [[RSHTMLOpenGraphProperties alloc] initWithURLString:urlString tags:tags];
return self;
}
@@ -76,15 +78,18 @@ static NSString *kTypeKey = @"type";
#pragma mark - Private
- (NSDictionary *)firstDictionaryWithMatchingRel:(NSString *)valueToMatch {
- (RSHTMLTag *)firstLinkTagWithMatchingRel:(NSString *)valueToMatch {
// Case-insensitive.
for (NSDictionary *oneDictionary in self.dictionaries) {
for (RSHTMLTag *tag in self.tags) {
NSString *oneRelValue = relValue(oneDictionary);
if (tag.type != RSHTMLTagTypeLink) {
continue;
}
NSString *oneRelValue = relValue(tag.attributes);
if (oneRelValue && [oneRelValue compare:valueToMatch options:NSCaseInsensitiveSearch] == NSOrderedSame) {
return oneDictionary;
return tag;
}
}
@@ -92,28 +97,36 @@ static NSString *kTypeKey = @"type";
}
- (NSArray *)appleTouchIconDictionaries {
- (NSArray *)appleTouchIconTags {
NSMutableArray *dictionaries = [NSMutableArray new];
NSMutableArray *tags = [NSMutableArray new];
for (NSDictionary *oneDictionary in self.dictionaries) {
for (RSHTMLTag *tag in self.tags) {
NSString *oneRelValue = relValue(oneDictionary).lowercaseString;
if (tag.type != RSHTMLTagTypeLink) {
continue;
}
NSString *oneRelValue = relValue(tag.attributes).lowercaseString;
if ([oneRelValue isEqualToString:kAppleTouchIconValue] || [oneRelValue isEqualToString:kAppleTouchIconPrecomposedValue]) {
[dictionaries addObject:oneDictionary];
[tags addObject:tag];
}
}
return dictionaries;
return tags;
}
- (NSArray *)feedLinkDictionaries {
- (NSArray *)feedLinkTags {
NSMutableArray *dictionaries = [NSMutableArray new];
NSMutableArray *tags = [NSMutableArray new];
for (NSDictionary *oneDictionary in self.dictionaries) {
for (RSHTMLTag *tag in self.tags) {
if (tag.type != RSHTMLTagTypeLink) {
continue;
}
NSDictionary *oneDictionary = tag.attributes;
NSString *oneRelValue = relValue(oneDictionary).lowercaseString;
if (![oneRelValue isEqualToString:kAlternateKey]) {
continue;
@@ -128,20 +141,19 @@ static NSString *kTypeKey = @"type";
continue;
}
[dictionaries addObject:oneDictionary];
[tags addObject:tag];
}
return dictionaries;
return tags;
}
- (NSString *)resolvedLinkFromFirstDictionaryWithMatchingRel:(NSString *)relValue {
- (NSString *)resolvedLinkFromFirstLinkTagWithMatchingRel:(NSString *)relValue {
NSDictionary *d = [self firstDictionaryWithMatchingRel:relValue];
return absoluteURLStringWithDictionary(d, self.baseURLString);
RSHTMLTag *tag = [self firstLinkTagWithMatchingRel:relValue];
return absoluteURLStringWithDictionary(tag.attributes, self.baseURLString);
}
@end
@@ -184,19 +196,19 @@ static NSString *absoluteURLStringWithDictionary(NSDictionary *d, NSString *base
}
static NSArray *objectsOfClassWithDictionaries(Class class, NSArray *dictionaries, NSString *baseURLString) {
static NSArray *objectsOfClassWithTags(Class class, NSArray *tags, NSString *baseURLString) {
NSMutableArray *objects = [NSMutableArray new];
for (NSDictionary *oneDictionary in dictionaries) {
for (RSHTMLTag *tag in tags) {
id oneObject = [[class alloc] initWithDictionary:oneDictionary baseURLString:baseURLString];
id oneObject = [[class alloc] initWithTag:tag baseURLString:baseURLString];
if (oneObject) {
[objects addObject:oneObject];
}
}
return [objects copy];
return objects;
}
@@ -209,14 +221,14 @@ static BOOL typeIsFeedType(NSString *type) {
@implementation RSHTMLMetadataAppleTouchIcon
- (instancetype)initWithDictionary:(NSDictionary *)d baseURLString:(NSString *)baseURLString {
- (instancetype)initWithTag:(RSHTMLTag *)tag baseURLString:(NSString *)baseURLString {
self = [super init];
if (!self) {
return nil;
}
NSDictionary *d = tag.attributes;
_urlString = absoluteURLStringWithDictionary(d, baseURLString);
_sizes = [d rsparser_objectForCaseInsensitiveKey:kSizesKey];
_rel = [d rsparser_objectForCaseInsensitiveKey:kRelKey];
@@ -224,20 +236,19 @@ static BOOL typeIsFeedType(NSString *type) {
return self;
}
@end
@implementation RSHTMLMetadataFeedLink
- (instancetype)initWithDictionary:(NSDictionary *)d baseURLString:(NSString *)baseURLString {
- (instancetype)initWithTag:(RSHTMLTag *)tag baseURLString:(NSString *)baseURLString {
self = [super init];
if (!self) {
return nil;
}
NSDictionary *d = tag.attributes;
_urlString = absoluteURLStringWithDictionary(d, baseURLString);
_title = [d rsparser_objectForCaseInsensitiveKey:kTitleKey];
_type = [d rsparser_objectForCaseInsensitiveKey:kTypeKey];
@@ -245,6 +256,130 @@ static BOOL typeIsFeedType(NSString *type) {
return self;
}
@end
@interface RSHTMLOpenGraphImage ()
@property (nonatomic, readwrite) NSString *url;
@property (nonatomic, readwrite) NSString *secureURL;
@property (nonatomic, readwrite) NSString *mimeType;
@property (nonatomic, readwrite) CGFloat width;
@property (nonatomic, readwrite) CGFloat height;
@property (nonatomic, readwrite) NSString *altText;
@end
@implementation RSHTMLOpenGraphImage
@end
@interface RSHTMLOpenGraphProperties ()
@property (nonatomic) NSMutableArray *ogImages;
@end
@implementation RSHTMLOpenGraphProperties
- (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray <RSHTMLTag *> *)tags {
self = [super init];
if (!self) {
return nil;
}
_ogImages = [NSMutableArray new];
[self parseTags:tags];
return self;
}
- (RSHTMLOpenGraphImage *)currentImage {
return self.ogImages.lastObject;
}
- (RSHTMLOpenGraphImage *)pushImage {
RSHTMLOpenGraphImage *image = [RSHTMLOpenGraphImage new];
[self.ogImages addObject:image];
return image;
}
- (RSHTMLOpenGraphImage *)ensureImage {
RSHTMLOpenGraphImage *image = [self currentImage];
if (image != nil) {
return image;
}
return [self pushImage];
}
- (NSArray *)images {
return self.ogImages;
}
static NSString *ogPrefix = @"og:";
static NSString *ogImage = @"og:image";
static NSString *ogImageURL = @"og:image:url";
static NSString *ogImageSecureURL = @"og:image:secure_url";
static NSString *ogImageType = @"og:image:type";
static NSString *ogImageWidth = @"og:image:width";
static NSString *ogImageHeight = @"og:image:height";
static NSString *ogImageAlt = @"og:image:alt";
static NSString *ogPropertyKey = @"property";
static NSString *ogContentKey = @"content";
- (void)parseTags:(NSArray *)tags {
for (RSHTMLTag *tag in tags) {
if (tag.type != RSHTMLTagTypeMeta) {
continue;
}
NSString *propertyName = tag.attributes[ogPropertyKey];
if (!propertyName || ![propertyName hasPrefix:ogPrefix]) {
continue;
}
NSString *content = tag.attributes[ogContentKey];
if (!content) {
continue;
}
if ([propertyName isEqualToString:ogImage]) {
RSHTMLOpenGraphImage *image = [self currentImage];
if (!image || image.url) { // Most likely case, since og:image will probably appear before other image attributes.
image = [self pushImage];
}
image.url = content;
}
else if ([propertyName isEqualToString:ogImageURL]) {
[self ensureImage].url = content;
}
else if ([propertyName isEqualToString:ogImageSecureURL]) {
[self ensureImage].secureURL = content;
}
else if ([propertyName isEqualToString:ogImageType]) {
[self ensureImage].mimeType = content;
}
else if ([propertyName isEqualToString:ogImageAlt]) {
[self ensureImage].altText = content;
}
else if ([propertyName isEqualToString:ogImageWidth]) {
[self ensureImage].width = [content floatValue];
}
else if ([propertyName isEqualToString:ogImageHeight]) {
[self ensureImage].height = [content floatValue];
}
}
}
@end