Wikipedia Save as EPUB: various encoding fixes (#3851)

* Wiki Save as EPUB: various encoding fixes

Fix display of & in article titles
Fix display of &, <, > in TOC entries and in targeted anchor (the mismatch
with the target id made these TOC entries invalid and simply not displayed).
Remove percent-encoded URLs tweaks for crengine now that crengine
correctly supports them (each percent encode handled as an UTF8 byte).
Bump crengine for that.
Don't include <meta name="cover"> when no cover present.

* bump base/crengine
This commit is contained in:
poire-z
2018-04-10 18:30:27 +02:00
committed by Frans de Jonge
parent 3585067796
commit 305e75c5ea
3 changed files with 43 additions and 34 deletions

View File

@@ -529,15 +529,16 @@ function util.unicodeCodepointToUtf8(c)
end
end
-- we need to use an array of arrays to keep them ordered as written
local HTML_ENTITIES_TO_UTF8 = {
["&lt;"] = "<",
["&gt;"] = ">",
["&quot;"] = '"',
["&apos;"] = "'",
["&nbsp;"] = "\xC2\xA0",
["&#(%d+);"] = function(x) return util.unicodeCodepointToUtf8(tonumber(x)) end,
["&#x(%x+);"] = function(x) return util.unicodeCodepointToUtf8(tonumber(x,16)) end,
["&amp;"] = "&", -- must be last
{"&lt;", "<"},
{"&gt;", ">"},
{"&quot;", '"'},
{"&apos;", "'"},
{"&nbsp;", "\xC2\xA0"},
{"&#(%d+);", function(x) return util.unicodeCodepointToUtf8(tonumber(x)) end},
{"&#x(%x+);", function(x) return util.unicodeCodepointToUtf8(tonumber(x,16)) end},
{"&amp;", "&"}, -- must be last
}
--- Replace HTML entities with their UTF8 equivalent in text
--
@@ -546,8 +547,8 @@ local HTML_ENTITIES_TO_UTF8 = {
--- @int string text with HTML entities
--- @treturn string UTF8 text
function util.htmlEntitiesToUtf8(text)
for k,v in pairs(HTML_ENTITIES_TO_UTF8) do
text = text:gsub(k, v)
for _, t in ipairs(HTML_ENTITIES_TO_UTF8) do
text = text:gsub(t[1], t[2])
end
return text
end