[fix] util.getSafeFilename() maximum extension length (#5067)

Strip HTML and do some semi-intelligent detection of faux extensions (i.e., more than 10 characters probably isn't one).

Fixes #5049.
This commit is contained in:
Frans de Jonge
2019-06-10 17:06:13 +02:00
committed by GitHub
parent da12fc9e2a
commit 9300a59a89
3 changed files with 26 additions and 2 deletions

View File

@@ -480,13 +480,14 @@ end
---- @string path
---- @int limit
---- @treturn string
function util.getSafeFilename(str, path, limit)
function util.getSafeFilename(str, path, limit, limit_ext)
local filename, suffix = util.splitFileNameSuffix(str)
local replaceFunc = replaceAllInvalidChars
local safe_filename
-- VFAT supports a maximum of 255 UCS-2 characters, although it's probably treated as UTF-16 by Windows
-- default to a slightly lower limit just in case
limit = limit or 240
limit_ext = limit_ext or 10
if path then
local file_system = util.getFilesystemType(path)
@@ -495,6 +496,14 @@ function util.getSafeFilename(str, path, limit)
end
end
if suffix:len() > limit_ext then
-- probably not an actual file extension, or at least not one we'd be
-- dealing with, so strip the whole string
filename = str
suffix = nil
end
filename = util.htmlToPlainTextIfHtml(filename)
filename = filename:sub(1, limit)
-- the limit might result in broken UTF-8, which we don't want in the result
filename = util.fixUtf8(filename, "")

View File

@@ -300,7 +300,7 @@ end
function Wallabag:download(article)
local skip_article = false
local item_url = "/api/entries/" .. article.id .. "/export.epub"
local title = util.getSafeFilename(article.title, self.directory)
local title = util.getSafeFilename(article.title, self.directory, 230, 0)
local local_path = self.directory .. article_id_prefix .. article.id .. article_id_postfix .. title .. ".epub"
logger.dbg("Wallabag: DOWNLOAD: id: ", article.id)
logger.dbg("Wallabag: DOWNLOAD: title: ", article.title)

View File

@@ -239,6 +239,21 @@ describe("util module", function()
assert.are_same("a", util.splitFileNameSuffix("a.txt"))
end)
describe("getSafeFileName()", function()
it("should replace unsafe characters", function()
assert.is_equal("___", util.getSafeFilename("|||"))
end)
it("should truncate any characters beyond the limit", function()
assert.is_equal("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", util.getSafeFilename("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"))
end)
it("should truncate extension beyond the limit", function()
assert.is_equal("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", util.getSafeFilename("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"))
end)
it("should strip HTML from the filename", function()
assert.is_equal("lalala", util.getSafeFilename("<span>lalala</span>"))
end)
end)
describe("fixUtf8()", function()
it("should replace invalid UTF-8 characters with an underscore", function()
assert.is_equal("\127 _ _\127 ", util.fixUtf8("\127 \128 \194\127 ", "_"))