mirror of
https://github.com/Ranchero-Software/NetNewsWire
synced 2025-08-12 06:26:36 +00:00
Rewrite HTMLEntityDecoder so that it’s fast.
This commit is contained in:
@@ -16,7 +16,7 @@ final class EntityDecodingTests: XCTestCase {
|
||||
// Bug found by Manton Reece — the ' entity was not getting decoded by NetNewsWire in JSON Feeds from micro.blog.
|
||||
|
||||
let s = "These are the times that try men's souls."
|
||||
let decoded = HTMLEntityDecoder.decodedString(s)
|
||||
let decoded = decodedString(s)
|
||||
|
||||
XCTAssertEqual(decoded, "These are the times that try men's souls.")
|
||||
}
|
||||
@@ -24,7 +24,7 @@ final class EntityDecodingTests: XCTestCase {
|
||||
func testEntityAtBeginning() {
|
||||
|
||||
let s = "'leading single quote"
|
||||
let decoded = HTMLEntityDecoder.decodedString(s)
|
||||
let decoded = decodedString(s)
|
||||
|
||||
XCTAssertEqual(decoded, "'leading single quote")
|
||||
}
|
||||
@@ -32,7 +32,7 @@ final class EntityDecodingTests: XCTestCase {
|
||||
func testEntityAtEnd() {
|
||||
|
||||
let s = "trailing single quote'"
|
||||
let decoded = HTMLEntityDecoder.decodedString(s)
|
||||
let decoded = decodedString(s)
|
||||
|
||||
XCTAssertEqual(decoded, "trailing single quote'")
|
||||
}
|
||||
@@ -40,7 +40,7 @@ final class EntityDecodingTests: XCTestCase {
|
||||
func testEntityInMiddle() {
|
||||
|
||||
let s = "entity ç in middle"
|
||||
let decoded = HTMLEntityDecoder.decodedString(s)
|
||||
let decoded = decodedString(s)
|
||||
|
||||
XCTAssertEqual(decoded, "entity ç in middle")
|
||||
}
|
||||
@@ -48,43 +48,100 @@ final class EntityDecodingTests: XCTestCase {
|
||||
func testMultipleEntitiesInARow() {
|
||||
|
||||
let s = "çèmult……iple 'æ"entities÷♥"
|
||||
let decoded = HTMLEntityDecoder.decodedString(s)
|
||||
let decoded = decodedString(s)
|
||||
|
||||
XCTAssertEqual(decoded, "çèmult……iple 'æ\"entities÷♥")
|
||||
}
|
||||
|
||||
func testFakeoutEntities() {
|
||||
|
||||
var s = "&&;&#;&#x;&#X;& ;&# \t;&\r&&&&&;"
|
||||
XCTAssertEqual(decodedString(s), s)
|
||||
|
||||
s = "#;&#x;&#X;& {"
|
||||
XCTAssertEqual(decodedString(s), s)
|
||||
|
||||
s = " &lsquo "
|
||||
XCTAssertEqual(decodedString(s), s)
|
||||
|
||||
s = "&&&&&&&&&&&&&&&&&&&;;;;;;&;&;&##;#X::&;&;&;&"
|
||||
XCTAssertEqual(decodedString(s), s)
|
||||
}
|
||||
|
||||
func testFakeSquirrelEntities() {
|
||||
|
||||
var s = "&squirrel;"
|
||||
XCTAssertEqual(decodedString(s), s)
|
||||
|
||||
s = "&squirrel;&#squirrel;"
|
||||
XCTAssertEqual(decodedString(s), s)
|
||||
|
||||
s = "&squirrel;&#squirrel;&#xsquirrel;&#Xsquirrel;"
|
||||
XCTAssertEqual(decodedString(s), s)
|
||||
|
||||
s = "'squirrel;"
|
||||
XCTAssertEqual(decodedString(s), s)
|
||||
|
||||
s = "&squirrel;&#squirrel;&#xsquirrel;&#Xsquirrel;'squirrel;"
|
||||
XCTAssertEqual(decodedString(s), s)
|
||||
|
||||
s = "&squirrel;&#squirrel;&#xsquirrel;&#Xsquirrel;'squirrel;&&;;;;&;&;&#squi#;#rrelX::&;&;&;&"
|
||||
XCTAssertEqual(decodedString(s), s)
|
||||
}
|
||||
|
||||
func testLongFakeoutEntities() {
|
||||
|
||||
var s = "&thisIsALongNotRealEntityThatShouldBeHandledPerfectlyWellByTheParserBasicallyIgnored;"
|
||||
XCTAssertEqual(decodedString(s), s)
|
||||
|
||||
s = "�"
|
||||
XCTAssertEqual(decodedString(s), s)
|
||||
|
||||
s = "�"
|
||||
XCTAssertEqual(decodedString(s), s)
|
||||
|
||||
s = "�"
|
||||
XCTAssertEqual(decodedString(s), s)
|
||||
|
||||
s = "�"
|
||||
XCTAssertEqual(decodedString(s), s)
|
||||
|
||||
s = "�"
|
||||
XCTAssertEqual(decodedString(s), s)
|
||||
}
|
||||
|
||||
func testOnlyEntity() {
|
||||
var s = "…"
|
||||
var decoded = HTMLEntityDecoder.decodedString(s)
|
||||
var decoded = decodedString(s)
|
||||
|
||||
XCTAssertEqual(decoded, "…")
|
||||
|
||||
s = "…"
|
||||
decoded = HTMLEntityDecoder.decodedString(s)
|
||||
decoded = decodedString(s)
|
||||
XCTAssertEqual(decoded, "…")
|
||||
|
||||
s = "'"
|
||||
decoded = HTMLEntityDecoder.decodedString(s)
|
||||
decoded = decodedString(s)
|
||||
XCTAssertEqual(decoded, "'")
|
||||
|
||||
s = "§"
|
||||
decoded = HTMLEntityDecoder.decodedString(s)
|
||||
decoded = decodedString(s)
|
||||
XCTAssertEqual(decoded, "§")
|
||||
|
||||
s = "£"
|
||||
decoded = HTMLEntityDecoder.decodedString(s)
|
||||
decoded = decodedString(s)
|
||||
XCTAssertEqual(decoded, "£")
|
||||
}
|
||||
|
||||
func testPerformance() {
|
||||
|
||||
// 0.009 sec on my 2012 iMac.
|
||||
// 0.003 sec on my M1 Mac Studio.
|
||||
let s = stringForResource("DaringFireball", "html")
|
||||
|
||||
self.measure {
|
||||
_ = HTMLEntityDecoder.decodedString(s)
|
||||
_ = decodedString(s)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func stringForResource(_ filename: String, _ fileExtension: String) -> String {
|
||||
@@ -93,3 +150,8 @@ func stringForResource(_ filename: String, _ fileExtension: String) -> String {
|
||||
let path = Bundle.module.path(forResource: filename, ofType: fileExtension)!
|
||||
return try! String(contentsOfFile: path)
|
||||
}
|
||||
|
||||
func decodedString(_ s: String) -> String {
|
||||
|
||||
HTMLEntityDecoder.decodedString(s)!
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user