mirror of
https://github.com/koreader/koreader.git
synced 2025-08-10 00:52:38 +00:00
[chore] replace utf8 bytes with Unicode escape sequence
This commit is contained in:
@@ -677,14 +677,14 @@ function ReaderDictionary:cleanSelection(text, is_sane)
|
||||
-- (example: pdf selection "qu’autrefois," will be cleaned to "autrefois")
|
||||
--
|
||||
-- Replace no-break space with regular space
|
||||
text = text:gsub("\xC2\xA0", ' ') -- U+00A0 no-break space
|
||||
text = text:gsub("\u{00A0}", ' ')
|
||||
-- Trim any space at start or end
|
||||
text = text:gsub("^%s+", "")
|
||||
text = text:gsub("%s+$", "")
|
||||
if not is_sane then
|
||||
-- Replace extended quote (included in the general puncturation range)
|
||||
-- with plain ascii quote (for french words like "aujourd’hui")
|
||||
text = text:gsub("\xE2\x80\x99", "'") -- U+2019 (right single quotation mark)
|
||||
text = text:gsub("\u{2019}", "'") -- Right single quotation mark
|
||||
-- Strip punctuation characters around selection
|
||||
text = util.stripPunctuation(text)
|
||||
-- Strip some common english grammatical construct
|
||||
|
||||
@@ -386,7 +386,7 @@ local footerTextGeneratorMap = {
|
||||
book_title = function(footer)
|
||||
local doc_info = footer.ui.document:getProps()
|
||||
if doc_info and doc_info.title then
|
||||
local title = doc_info.title:gsub(" ", "\xC2\xA0") -- replace space with no-break-space
|
||||
local title = doc_info.title:gsub(" ", "\u{00A0}") -- replace space with no-break-space
|
||||
local title_widget = TextWidget:new{
|
||||
text = title,
|
||||
max_width = footer._saved_screen_width * footer.settings.book_title_max_width_pct * (1/100),
|
||||
@@ -406,7 +406,7 @@ local footerTextGeneratorMap = {
|
||||
book_chapter = function(footer)
|
||||
local chapter_title = footer.ui.toc:getTocTitleByPage(footer.pageno)
|
||||
if chapter_title and chapter_title ~= "" then
|
||||
chapter_title = chapter_title:gsub(" ", "\xC2\xA0") -- replace space with no-break-space
|
||||
chapter_title = chapter_title:gsub(" ", "\u{00A0}") -- replace space with no-break-space
|
||||
local chapter_widget = TextWidget:new{
|
||||
text = chapter_title,
|
||||
max_width = footer._saved_screen_width * footer.settings.book_chapter_max_width_pct * (1/100),
|
||||
@@ -2037,7 +2037,7 @@ function ReaderFooter:genAllFooterText()
|
||||
if self.settings.item_prefix == "compact_items" then
|
||||
-- remove whitespace from footer items if symbol_type is compact_items
|
||||
-- use a hair-space to avoid issues with RTL display
|
||||
text = text:gsub("%s", "\xE2\x80\x8A")
|
||||
text = text:gsub("%s", "\u{200A}")
|
||||
end
|
||||
-- if generator request a merge of this item, add it directly,
|
||||
-- i.e. no separator before and after the text then.
|
||||
|
||||
@@ -1649,7 +1649,7 @@ function ReaderHighlight:onUnhighlight(bookmark_item)
|
||||
if self.ui.paging then -- We can safely use page
|
||||
-- As we may have changed spaces and hyphens handling in the extracted
|
||||
-- text over the years, check text identities with them removed
|
||||
local sel_text_cleaned = sel_text:gsub("[ -]", ""):gsub("\xC2\xAD", "")
|
||||
local sel_text_cleaned = sel_text:gsub("[ -]", ""):gsub("\u{00AD}", "")
|
||||
for index = 1, #self.view.highlight.saved[page] do
|
||||
local highlight = self.view.highlight.saved[page][index]
|
||||
-- pos0 are tables and can't be compared directly, except when from
|
||||
@@ -1657,7 +1657,7 @@ function ReaderHighlight:onUnhighlight(bookmark_item)
|
||||
-- If bookmark_item provided, just check datetime
|
||||
if ( (datetime == nil and highlight.pos0 == sel_pos0) or
|
||||
(datetime ~= nil and highlight.datetime == datetime) ) then
|
||||
if highlight.text:gsub("[ -]", ""):gsub("\xC2\xAD", "") == sel_text_cleaned then
|
||||
if highlight.text:gsub("[ -]", ""):gsub("\u{00AD}", "") == sel_text_cleaned then
|
||||
idx = index
|
||||
break
|
||||
end
|
||||
|
||||
@@ -83,7 +83,7 @@ end
|
||||
function ReaderToc:cleanUpTocTitle(title, replace_empty)
|
||||
title = title:gsub("\13", "")
|
||||
if replace_empty and title:match("^%s*$") then
|
||||
title = "\xE2\x80\x93" -- U+2013 En-Dash
|
||||
title = "\u{2013}" -- En-Dash
|
||||
end
|
||||
return title
|
||||
end
|
||||
|
||||
@@ -150,7 +150,7 @@ function datetime.secondsToHClock(seconds, withoutSeconds, hmsFormat, withDays,
|
||||
if compact then
|
||||
return T(C_("Time", "%1s"), string.format("%d", seconds))
|
||||
else
|
||||
return T(C_("Time", "%1m\xE2\x80\x89%2s"), "0", string.format("%d", seconds))
|
||||
return T(C_("Time", "%1m\u{2009}%2s"), "0", string.format("%d", seconds)) -- use a thin space
|
||||
end
|
||||
else
|
||||
if compact then
|
||||
@@ -178,13 +178,13 @@ function datetime.secondsToHClock(seconds, withoutSeconds, hmsFormat, withDays,
|
||||
|
||||
if hmsFormat then
|
||||
time_string = time_string:gsub("0(%d)", "%1") -- delete all leading "0"s
|
||||
time_string = time_string:gsub(C_("Time", "d"), C_("Time", "d") .. "\xE2\x80\x89") -- add thin space after "d"
|
||||
time_string = time_string:gsub(C_("Time", "h"), C_("Time", "h") .. "\xE2\x80\x89") -- add thin space after "h"
|
||||
time_string = time_string:gsub(C_("Time", "d"), C_("Time", "d") .. "\u{2009}") -- add thin space after "d"
|
||||
time_string = time_string:gsub(C_("Time", "h"), C_("Time", "h") .. "\u{2009}") -- add thin space after "h"
|
||||
if not withoutSeconds then
|
||||
time_string = time_string:gsub(C_("Time", "m"), C_("Time", "m") .. "\xE2\x80\x89") .. C_("Time", "s") -- add thin space after "m"
|
||||
time_string = time_string:gsub(C_("Time", "m"), C_("Time", "m") .. "\u{2009}") .. C_("Time", "s") -- add thin space after "m"
|
||||
end
|
||||
if compact then
|
||||
time_string = time_string:gsub("\xE2\x80\x89", "\xE2\x80\x8A") -- replace thin space with hair space
|
||||
time_string = time_string:gsub("\u{2009}", "\u{200A}") -- replace thin space with hair space
|
||||
end
|
||||
return time_string
|
||||
else
|
||||
|
||||
@@ -946,7 +946,7 @@ function KoptInterface:getTextFromBoxes(boxes, pos0, pos1)
|
||||
-- Previous line ended with a minus.
|
||||
-- Assume it's some hyphenation and discard it.
|
||||
line_text = line_text:sub(1, -2)
|
||||
elseif line_text:sub(-2, -1) == "\xC2\xAD" then
|
||||
elseif line_text:sub(-2, -1) == "\u{00AD}" then
|
||||
-- Previous line ended with a hyphen.
|
||||
-- Assume it's some hyphenation and discard it.
|
||||
line_text = line_text:sub(1, -3)
|
||||
|
||||
@@ -187,14 +187,14 @@ end
|
||||
-- which would be an issue and would need stripping. But as these
|
||||
-- Free fonts are only used as fallback fonts, and the invisible glyphs
|
||||
-- will have been found in the previous fonts, we don't need to.
|
||||
local LRI = "\xE2\x81\xA6" -- U+2066 LRI / LEFT-TO-RIGHT ISOLATE
|
||||
local RLI = "\xE2\x81\xA7" -- U+2067 RLI / RIGHT-TO-LEFT ISOLATE
|
||||
local FSI = "\xE2\x81\xA8" -- U+2068 FSI / FIRST STRONG ISOLATE
|
||||
local PDI = "\xE2\x81\xA9" -- U+2069 PDI / POP DIRECTIONAL ISOLATE
|
||||
local LRI = "\u{2066}" -- LRI / LEFT-TO-RIGHT ISOLATE
|
||||
local RLI = "\u{2067}" -- RLI / RIGHT-TO-LEFT ISOLATE
|
||||
local FSI = "\u{2068}" -- FSI / FIRST STRONG ISOLATE
|
||||
local PDI = "\u{2069}" -- PDI / POP DIRECTIONAL ISOLATE
|
||||
|
||||
-- Not currently needed:
|
||||
-- local LRM = "\xE2\x80\x8E" -- U+200E LRM / LEFT-TO-RIGHT MARK
|
||||
-- local RLM = "\xE2\x80\x8F" -- U+200F RLM / RIGHT-TO-LEFT MARK
|
||||
-- local LRM = "\u{200E}" -- LRM / LEFT-TO-RIGHT MARK
|
||||
-- local RLM = "\u{200F}" -- RLM / RIGHT-TO-LEFT MARK
|
||||
|
||||
function Bidi.ltr(text)
|
||||
return string.format("%s%s%s", LRI, text, PDI)
|
||||
|
||||
@@ -410,7 +410,7 @@ Note that your selected font size is not affected by this setting.]]),
|
||||
},
|
||||
name_text_hold_callback = optionsutil.showValues,
|
||||
show_true_value_func = function(val) -- add "%"
|
||||
return string.format("%d\xE2\x80\xAF%%", val) -- use Narrow No-Break space here
|
||||
return string.format("%d\u{202F}%%", val) -- use Narrow No-Break space here
|
||||
end,
|
||||
},
|
||||
}
|
||||
@@ -498,7 +498,7 @@ Note that your selected font size is not affected by this setting.]]),
|
||||
name_text_hold_callback = optionsutil.showValues,
|
||||
name_text_true_values = true,
|
||||
show_true_value_func = function(val)
|
||||
return string.format("%d\xE2\x80\xAF%%, %d\xE2\x80\xAF%%", val[1], val[2]) -- use Narrow Now-Break space here
|
||||
return string.format("%d\u{202F}%%, %d\u{202F}%%", val[1], val[2]) -- use Narrow Now-Break space here
|
||||
end,
|
||||
},
|
||||
{
|
||||
@@ -537,7 +537,7 @@ Note that your selected font size is not affected by this setting.]]),
|
||||
name_text_hold_callback = optionsutil.showValues,
|
||||
name_text_true_values = true,
|
||||
show_true_value_func = function(val)
|
||||
return string.format("%d\xE2\x80\xAF%%", val) -- use Narrow No-Break space here
|
||||
return string.format("%d\u{202F}%%", val) -- use Narrow No-Break space here
|
||||
end,
|
||||
},
|
||||
{
|
||||
|
||||
@@ -82,8 +82,8 @@ function ViewHtml:_viewSelectionHTML(document, selected_text, view, with_css_fil
|
||||
end
|
||||
if massage_html then
|
||||
-- Make some invisible chars visible
|
||||
replace_in_html("\xC2\xA0", "␣") -- no break space: open box
|
||||
replace_in_html("\xC2\xAD", "⋅") -- soft hyphen: dot operator (smaller than middle dot ·)
|
||||
replace_in_html("\u{00A0}", "\u{2423}") -- no break space: open box
|
||||
replace_in_html("\u{00AD}", "\u{22C5}") -- soft hyphen: dot operator (smaller than middle dot ·)
|
||||
-- Prettify inlined CSS (from <HEAD>, put in an internal
|
||||
-- <body><stylesheet> element by crengine (the opening tag may
|
||||
-- include some href=, or end with " ~X>" with some html_flags)
|
||||
|
||||
@@ -201,7 +201,7 @@ function DoubleSpinWidget:update(numberpicker_left_value, numberpicker_right_val
|
||||
if self.unit == "°" then
|
||||
unit = self.unit
|
||||
elseif self.unit ~= "" then
|
||||
unit = "\xE2\x80\xAF" .. self.unit -- use Narrow No-Break Space (NNBSP) here
|
||||
unit = "\u{202F}" .. self.unit -- use Narrow No-Break Space (NNBSP) here
|
||||
end
|
||||
end
|
||||
table.insert(buttons, {
|
||||
|
||||
@@ -226,7 +226,7 @@ function NumberPickerWidget:init()
|
||||
if self.unit == "°" then
|
||||
unit = self.unit
|
||||
elseif self.unit ~= "" then
|
||||
unit = "\xE2\x80\xAF" .. self.unit -- use Narrow No-Break Space (NNBSP) here
|
||||
unit = "\u{202F}" .. self.unit -- use Narrow No-Break Space (NNBSP) here
|
||||
end
|
||||
end
|
||||
self.text_value = Button:new{
|
||||
|
||||
@@ -129,7 +129,7 @@ function SpinWidget:update(numberpicker_value, numberpicker_value_index)
|
||||
if self.unit == "°" then
|
||||
unit = self.unit
|
||||
elseif self.unit ~= "" then
|
||||
unit = "\xE2\x80\xAF" .. self.unit -- use Narrow No-Break Space (NNBSP) here
|
||||
unit = "\u{202F}" .. self.unit -- use Narrow No-Break Space (NNBSP) here
|
||||
end
|
||||
end
|
||||
local value
|
||||
|
||||
@@ -546,17 +546,17 @@ end
|
||||
-- These chosen ones are available in most fonts (prettier symbols
|
||||
-- exist in unicode, but are available in a few fonts only) and
|
||||
-- have a quite consistent size/weight in all fonts.
|
||||
local th1_sym = "\xE2\x96\x88" -- full block (big black rectangle) (never met, only for web page title?)
|
||||
local th2_sym = "\xE2\x96\x89" -- big black square
|
||||
local th3_sym = "\xC2\xA0\xE2\x97\xA4" -- black upper left triangle (indented, nicer)
|
||||
local th4_sym = "\xE2\x97\x86" -- black diamond
|
||||
local th5_sym = "\xE2\x9C\xBF" -- black florette
|
||||
local th6_sym = "\xE2\x9D\x96" -- black diamond minus white x
|
||||
local th1_sym = "\u{2588}" -- full block (big black rectangle) (never met, only for web page title?)
|
||||
local th2_sym = "\u{2589}" -- big black square
|
||||
local th3_sym = "\u{00A0}\u{25E4}" -- black upper left triangle (indented, nicer)
|
||||
local th4_sym = "\u{25C6}" -- black diamond
|
||||
local th5_sym = "\u{273F}" -- black florette
|
||||
local th6_sym = "\u{2756}" -- black diamond minus white x
|
||||
-- Others available in most fonts
|
||||
-- local thX_sym = "\xE2\x9C\x9A" -- heavy greek cross
|
||||
-- local thX_sym = "\xE2\x97\xA2" -- black lower right triangle
|
||||
-- local thX_sym = "\xE2\x97\x89" -- fish eye
|
||||
-- local thX_sym = "\xE2\x96\x97" -- quadrant lower right
|
||||
-- local thX_sym = "\u{271A}" -- heavy greek cross
|
||||
-- local thX_sym = "\u{25E2}" -- black lower right triangle
|
||||
-- local thX_sym = "\u{25C9}" -- fish eye
|
||||
-- local thX_sym = "\u{2597}" -- quadrant lower right
|
||||
|
||||
-- For optional prettification of the plain text full page
|
||||
function Wikipedia:prettifyText(text)
|
||||
@@ -571,7 +571,7 @@ function Wikipedia:prettifyText(text)
|
||||
text = text:gsub("==$", "==\n") -- for a </hN> at end of text to be matched by next gsub
|
||||
text = text:gsub(" ===?\n+", "\n\n") -- </h2> to </h3> : empty line after
|
||||
text = text:gsub(" ====+\n+", "\n") -- </h4> to </hN> : single \n, no empty line
|
||||
text = text:gsub("\n\n+\xE2\x80\x94", "\n\xE2\x80\x94") -- em dash, used for quote author, make it stick to prev text
|
||||
text = text:gsub("\n\n+\u{2014}", "\n\u{2014}") -- em dash, used for quote author, make it stick to prev text
|
||||
text = text:gsub("\n +\n", "\n") -- trim lines full of only spaces (often seen in math formulas)
|
||||
text = text:gsub("^\n*", "") -- trim new lines at start
|
||||
text = text:gsub("\n*$", "") -- trim new lines at end
|
||||
@@ -587,17 +587,17 @@ end
|
||||
-- These chosen ones are available in most fonts (prettier symbols
|
||||
-- exist in unicode, but are available in a few fonts only) and
|
||||
-- have a quite consistent size/weight in all fonts.
|
||||
local h1_sym = "\xE2\x96\x88" -- full block (big black rectangle) (never met, only for web page title?)
|
||||
local h2_sym = "\xE2\x96\x89" -- big black square
|
||||
local h3_sym = "\xE2\x97\xA4" -- black upper left triangle
|
||||
local h4_sym = "\xE2\x97\x86" -- black diamond
|
||||
local h5_sym = "\xE2\x9C\xBF" -- black florette
|
||||
local h6_sym = "\xE2\x9D\x96" -- black diamond minus white x
|
||||
local h1_sym = "\u{2588}" -- full block (big black rectangle) (never met, only for web page title?)
|
||||
local h2_sym = "\u{2589}" -- big black square
|
||||
local h3_sym = "\u{25E4}" -- black upper left triangle
|
||||
local h4_sym = "\u{25C6}" -- black diamond
|
||||
local h5_sym = "\u{273F}" -- black florette
|
||||
local h6_sym = "\u{2756}" -- black diamond minus white x
|
||||
-- Other available ones in most fonts
|
||||
-- local hXsym = "\xE2\x9C\x9A" -- heavy greek cross
|
||||
-- local hXsym = "\xE2\x97\xA2" -- black lower right triangle
|
||||
-- local hXsym = "\xE2\x97\x89" -- fish eye
|
||||
-- local hXsym = "\xE2\x96\x97" -- quadrant lower right
|
||||
-- local hXsym = "\u{271A}" -- heavy greek cross
|
||||
-- local hXsym = "\u{25E2}" -- black lower right triangle
|
||||
-- local hXsym = "\u{25C9}" -- fish eye
|
||||
-- local hXsym = "\u{2597}" -- quadrant lower right
|
||||
|
||||
local ext_to_mimetype = {
|
||||
png = "image/png",
|
||||
|
||||
@@ -1093,7 +1093,7 @@ local HTML_ENTITIES_TO_UTF8 = {
|
||||
{">", ">"},
|
||||
{""", '"'},
|
||||
{"'", "'"},
|
||||
{" ", "\xC2\xA0"},
|
||||
{" ", "\u{00A0}"},
|
||||
{"&#(%d+);", function(x) return util.unicodeCodepointToUtf8(tonumber(x)) end},
|
||||
{"&#x(%x+);", function(x) return util.unicodeCodepointToUtf8(tonumber(x, 16)) end},
|
||||
{"&", "&"}, -- must be last
|
||||
|
||||
@@ -168,12 +168,12 @@ function FakeCover:init()
|
||||
-- But at least, make dots breakable (they wouldn't be if not
|
||||
-- followed by a space), by adding to them a zero-width-space,
|
||||
-- so the dots stay on the right of their preceeding word.
|
||||
title = title:gsub("%.", ".\xE2\x80\x8B")
|
||||
title = title:gsub("%.", ".\u{200B}")
|
||||
-- Except for a last dot near end of title that might preceed
|
||||
-- a file extension: we'd rather want the dot and its suffix
|
||||
-- together on a last line: so, move the zero-width-space
|
||||
-- before it.
|
||||
title = title:gsub("%.\xE2\x80\x8B(%w%w?%w?%w?%w?)$", "\xE2\x80\x8B.%1")
|
||||
title = title:gsub("%.\u{200B}(%w%w?%w?%w?%w?)$", "\u{200B}.%1")
|
||||
-- These substitutions will hopefully have no impact with the following BD wrapping
|
||||
end
|
||||
if title then
|
||||
@@ -295,10 +295,10 @@ function FakeCover:init()
|
||||
-- but not around underscores and dots without any space around.
|
||||
-- So, append a zero-width-space to allow text wrap after them.
|
||||
if title then
|
||||
title = title:gsub("_", "_\xE2\x80\x8B"):gsub("%.", ".\xE2\x80\x8B")
|
||||
title = title:gsub("_", "_\u{200B}"):gsub("%.", ".\u{200B}")
|
||||
end
|
||||
if authors then
|
||||
authors = authors:gsub("_", "_\xE2\x80\x8B"):gsub("%.", ".\xE2\x80\x8B")
|
||||
authors = authors:gsub("_", "_\u{200B}"):gsub("%.", ".\u{200B}")
|
||||
end
|
||||
else
|
||||
-- Replace underscores and hyphens with spaces, to allow text wrap there.
|
||||
|
||||
Reference in New Issue
Block a user