mirror of
https://github.com/koreader/koreader.git
synced 2025-08-10 00:52:38 +00:00
Calibre: Metadata parser improvements (#11922)
* Added a safe pure-Lua SAX JSON parser (via LunaJSON). * Updated RapidJSON. * Also implemented a calibre-specific SAX parser in Lua-RapidJSON, and use it by default instead of the full RapidJSON one. * Raised the file-size threshold to switch between the fast & safe parsers to 50MB. * Added an UI option to switch between the three parsers.
This commit is contained in:
2
base
2
base
Submodule base updated: 22a6e4f616...4518f06447
@@ -118,6 +118,11 @@ function Calibre:addToMainMenu(menu_items)
|
||||
keep_menu_open = true,
|
||||
sub_item_table = self:getWirelessMenuTable(),
|
||||
},
|
||||
{
|
||||
text = _("JSON parser"),
|
||||
keep_menu_open = true,
|
||||
sub_item_table = self:getParserMenuTable(),
|
||||
},
|
||||
}
|
||||
}
|
||||
-- insert the metadata search
|
||||
@@ -414,4 +419,50 @@ function Calibre:getWirelessMenuTable()
|
||||
return t
|
||||
end
|
||||
|
||||
function Calibre:getParserMenuTable()
|
||||
return {
|
||||
{
|
||||
text = _("Automatic"),
|
||||
help_text = _("The program will decide based on the size of the JSON file. Recommended"),
|
||||
checked_func = function()
|
||||
return G_reader_settings:hasNot("calibre_json_parser")
|
||||
end,
|
||||
callback = function()
|
||||
G_reader_settings:delSetting("calibre_json_parser")
|
||||
end,
|
||||
},
|
||||
{
|
||||
text = _("Fast"),
|
||||
help_text = _("Faster parsing, but may not take too kindly to malformed input files"),
|
||||
checked_func = function()
|
||||
return G_reader_settings:readSetting("calibre_json_parser") == "fast"
|
||||
end,
|
||||
callback = function()
|
||||
G_reader_settings:saveSetting("calibre_json_parser", "fast")
|
||||
end,
|
||||
},
|
||||
{
|
||||
text = _("Safe"),
|
||||
help_text = _("Slower, but safer. Useful if you're experiencing problems with the other modes"),
|
||||
checked_func = function()
|
||||
return G_reader_settings:readSetting("calibre_json_parser") == "safe"
|
||||
end,
|
||||
callback = function()
|
||||
G_reader_settings:saveSetting("calibre_json_parser", "safe")
|
||||
end,
|
||||
},
|
||||
{
|
||||
text = _("Legacy"),
|
||||
help_text = _("Fast, but requires more RAM, only recommended on modest library sizes (or beefier devices)"),
|
||||
checked_func = function()
|
||||
return G_reader_settings:readSetting("calibre_json_parser") == "legacy"
|
||||
end,
|
||||
callback = function()
|
||||
G_reader_settings:saveSetting("calibre_json_parser", "legacy")
|
||||
end,
|
||||
},
|
||||
}
|
||||
end
|
||||
|
||||
|
||||
return Calibre
|
||||
|
||||
@@ -52,9 +52,9 @@ local function slim(book, is_search)
|
||||
return slim_book
|
||||
end
|
||||
|
||||
-- this is the max file size we attempt to decode using json. For larger
|
||||
-- files we want to attempt to manually parse the file to avoid OOM errors
|
||||
local MAX_JSON_FILESIZE = 30 * 1000 * 1000
|
||||
-- This is the max file size we attempt to decode using rapidjson.
|
||||
-- For larger files we use a sax parser to avoid OOM errors
|
||||
local MAX_JSON_FILESIZE = 50 * 1024 * 1024
|
||||
|
||||
--- find calibre files for a given dir
|
||||
local function findCalibreFiles(dir)
|
||||
@@ -121,8 +121,11 @@ function CalibreMetadata:loadBookList()
|
||||
return {}
|
||||
end
|
||||
local books, err
|
||||
if attr.size > MAX_JSON_FILESIZE then
|
||||
books, err = parser.parseFile(self.metadata)
|
||||
local impl = G_reader_settings:readSetting("calibre_json_parser") or attr.size > MAX_JSON_FILESIZE and "safe" or "fast"
|
||||
if impl == "fast" then
|
||||
books, err = rapidjson.load_calibre(self.metadata)
|
||||
elseif impl == "safe" then
|
||||
books, err = parser.parseFile(self.metadata)
|
||||
else
|
||||
books, err = rapidjson.load(self.metadata)
|
||||
end
|
||||
|
||||
@@ -1,90 +1,96 @@
|
||||
-- A parser for metadata.calibre
|
||||
local util = require("util")
|
||||
-- parse "metadata.calibre" files
|
||||
local lj = require("lunajson")
|
||||
|
||||
-- removes leading and closing characters and converts hex-unicodes
|
||||
local function replaceHexChars(s, n, j)
|
||||
local l = string.len(s)
|
||||
if string.sub(s, l, l) == "\"" then
|
||||
s = string.sub(s, n, string.len(s)-1)
|
||||
local array_fields = {
|
||||
authors = true,
|
||||
tags = true,
|
||||
series = true,
|
||||
}
|
||||
|
||||
local required_fields = {
|
||||
authors = true,
|
||||
last_modified = true,
|
||||
lpath = true,
|
||||
series = true,
|
||||
series_index = true,
|
||||
size = true,
|
||||
tags = true,
|
||||
title = true,
|
||||
uuid = true,
|
||||
}
|
||||
|
||||
local field
|
||||
local t = {}
|
||||
local function append(v)
|
||||
-- Some fields *may* be arrays, so check whether we ran through startarray first or not
|
||||
if t[field] then
|
||||
table.insert(t[field], v)
|
||||
else
|
||||
s = string.sub(s, n, string.len(s)-j)
|
||||
end
|
||||
s = string.gsub(s, "\\u([a-f0-9][a-f0-9][a-f0-9][a-f0-9])", function(w)
|
||||
return util.unicodeCodepointToUtf8(tonumber(w, 16))
|
||||
end)
|
||||
return s
|
||||
end
|
||||
|
||||
-- a couple of string helper functions for dealing with raw json strings
|
||||
local function isEqual(str, key)
|
||||
if str:sub(1, key:len() + 6) == string.format(" \"%s\"", key) then
|
||||
return true
|
||||
end
|
||||
return false
|
||||
end
|
||||
|
||||
local function getValue(str, key)
|
||||
if str == string.format(" \"%s\": null, ", key) then
|
||||
return nil
|
||||
else
|
||||
return replaceHexChars(str, key:len() + 10, key == "series_index" and 2 or 3)
|
||||
t[field] = v
|
||||
field = nil
|
||||
end
|
||||
end
|
||||
|
||||
local jsonStr = getmetatable("")
|
||||
jsonStr.__index["equals"] = isEqual
|
||||
jsonStr.__index["value"] = getValue
|
||||
local depth = 0
|
||||
local result = {}
|
||||
local sax = {
|
||||
startobject = function()
|
||||
depth = depth + 1
|
||||
end,
|
||||
endobject = function()
|
||||
if depth == 1 then
|
||||
table.insert(result, t)
|
||||
t = {}
|
||||
end
|
||||
depth = depth - 1
|
||||
end,
|
||||
startarray = function()
|
||||
if array_fields[field] then
|
||||
t[field] = {}
|
||||
end
|
||||
end,
|
||||
endarray = function()
|
||||
if field then
|
||||
field = nil
|
||||
end
|
||||
end,
|
||||
key = function(s)
|
||||
if required_fields[s] then
|
||||
field = s
|
||||
end
|
||||
end,
|
||||
string = function(s)
|
||||
if field then
|
||||
append(s)
|
||||
end
|
||||
end,
|
||||
number = function(n)
|
||||
if field then
|
||||
append(n)
|
||||
end
|
||||
end,
|
||||
boolean = function(b)
|
||||
if field then
|
||||
append(b)
|
||||
end
|
||||
end,
|
||||
}
|
||||
|
||||
local function parse_unsafe(path)
|
||||
local p = lj.newfileparser(path, sax)
|
||||
p.run()
|
||||
end
|
||||
|
||||
local parser = {}
|
||||
|
||||
-- read metadata from file, line by line, and keep just the data we need
|
||||
function parser.parseFile(file)
|
||||
assert(type(file) == "string", "wrong type (expected a string")
|
||||
local f, err = io.open(file, "rb")
|
||||
if not f then
|
||||
return nil, string.format("error parsing %s: %s", file, err)
|
||||
result = {}
|
||||
local ok, err = pcall(parse_unsafe, file)
|
||||
field = nil
|
||||
if not ok then
|
||||
return nil, err
|
||||
end
|
||||
f:close()
|
||||
local add = function(t, line)
|
||||
if type(t) ~= "table" or type(line) ~= "string" then
|
||||
return {}
|
||||
end
|
||||
line = replaceHexChars(line, 8, 3)
|
||||
table.insert(t, #t + 1, line)
|
||||
return t
|
||||
end
|
||||
local books, book = {}, {}
|
||||
local is_author, is_tag = false, false
|
||||
for line in io.lines(file) do
|
||||
if line == " }, " or line == " }" then
|
||||
if type(book) == "table" then
|
||||
table.insert(books, #books + 1, book)
|
||||
end
|
||||
book = {}
|
||||
elseif line == " \"authors\": [" then
|
||||
is_author = true
|
||||
elseif line == " \"tags\": [" then
|
||||
is_tag = true
|
||||
elseif line == " ], " or line == " ]" then
|
||||
is_author, is_tag = false, false
|
||||
else
|
||||
for _, key in ipairs({"title", "uuid", "lpath", "size",
|
||||
"last_modified", "series", "series_index"})
|
||||
do
|
||||
if line:equals(key) then
|
||||
book[key] = line:value(key)
|
||||
break
|
||||
end
|
||||
end
|
||||
end
|
||||
if is_author then
|
||||
book.authors = add(book.authors, line)
|
||||
elseif is_tag then
|
||||
book.tags = add(book.tags, line)
|
||||
end
|
||||
end
|
||||
return books
|
||||
return result
|
||||
end
|
||||
|
||||
return parser
|
||||
|
||||
Reference in New Issue
Block a user