[chore] replace utf8 bytes with Unicode escape sequence

This commit is contained in:
poire-z
2023-08-01 10:09:29 +02:00
parent 7bff61150a
commit 626864f856
15 changed files with 54 additions and 54 deletions

View File

@@ -677,14 +677,14 @@ function ReaderDictionary:cleanSelection(text, is_sane)
-- (example: pdf selection "quautrefois," will be cleaned to "autrefois")
--
-- Replace no-break space with regular space
text = text:gsub("\xC2\xA0", ' ') -- U+00A0 no-break space
text = text:gsub("\u{00A0}", ' ')
-- Trim any space at start or end
text = text:gsub("^%s+", "")
text = text:gsub("%s+$", "")
if not is_sane then
-- Replace extended quote (included in the general puncturation range)
-- with plain ascii quote (for french words like "aujourdhui")
text = text:gsub("\xE2\x80\x99", "'") -- U+2019 (right single quotation mark)
text = text:gsub("\u{2019}", "'") -- Right single quotation mark
-- Strip punctuation characters around selection
text = util.stripPunctuation(text)
-- Strip some common english grammatical construct