mirror of
https://github.com/koreader/koreader.git
synced 2025-08-10 00:52:38 +00:00
md5: centralize and deduplicate (#11003)
Document partial md5 hash is calculated by util.partialMD5() and stored in doc_settings as "partial_md5_checksum" on the first document opening.
This commit is contained in:
@@ -464,23 +464,23 @@ function ReaderUI:init()
|
||||
end
|
||||
self.postInitCallback = nil
|
||||
|
||||
-- Now that document is loaded, store book metadata in settings
|
||||
-- (so that filemanager can use it from sideCar file to display
|
||||
-- Book information).
|
||||
-- Now that document is loaded, store book metadata in settings.
|
||||
local props = self.document:getProps()
|
||||
self.doc_settings:saveSetting("doc_props", props)
|
||||
-- And have an extended and customized copy in memory for quick access.
|
||||
self.doc_props = FileManagerBookInfo.extendProps(props, self.document.file)
|
||||
|
||||
-- Set "reading" status if there is no status.
|
||||
local summary = self.doc_settings:readSetting("summary")
|
||||
if not (summary and summary.status) then
|
||||
if not summary then
|
||||
summary = {}
|
||||
end
|
||||
local summary = self.doc_settings:readSetting("summary", {})
|
||||
if summary.status == nil then
|
||||
summary.status = "reading"
|
||||
summary.modified = os.date("%Y-%m-%d", os.time())
|
||||
self.doc_settings:saveSetting("summary", summary)
|
||||
end
|
||||
|
||||
local md5 = self.doc_settings:readSetting("partial_md5_checksum")
|
||||
if md5 == nil then
|
||||
md5 = util.partialMD5(self.document.file)
|
||||
self.doc_settings:saveSetting("partial_md5_checksum", md5)
|
||||
end
|
||||
|
||||
require("readhistory"):addItem(self.document.file) -- (will update "lastfile")
|
||||
|
||||
@@ -20,7 +20,7 @@ local DOCSETTINGS_HASH_DIR = DataStorage:getDocSettingsHashDir()
|
||||
local custom_metadata_filename = "custom_metadata.lua"
|
||||
|
||||
local is_hash_location_enabled
|
||||
local hash_path_cache = {}
|
||||
local doc_hash_cache = {}
|
||||
|
||||
function DocSettings.isHashLocationEnabled()
|
||||
if is_hash_location_enabled == nil then
|
||||
@@ -93,13 +93,13 @@ function DocSettings:getSidecarDir(doc_path, force_location)
|
||||
if location == "dir" then
|
||||
path = DOCSETTINGS_DIR .. path
|
||||
elseif location == "hash" then
|
||||
local hsh = hash_path_cache[doc_path]
|
||||
local hsh = doc_hash_cache[doc_path]
|
||||
if not hsh then
|
||||
local file = io.open(doc_path, 'rb')
|
||||
if not file then return path .. ".sdr" end
|
||||
hsh = util.partialMD5(file)
|
||||
file:close()
|
||||
hash_path_cache[doc_path] = hsh
|
||||
hsh = util.partialMD5(doc_path)
|
||||
if not hsh then -- fallback to "doc"
|
||||
return path .. ".sdr"
|
||||
end
|
||||
doc_hash_cache[doc_path] = hsh
|
||||
logger.dbg("DocSettings: Caching new partial MD5 hash for", doc_path, "as", hsh)
|
||||
else
|
||||
logger.dbg("DocSettings: Using cached partial MD5 hash for", doc_path, "as", hsh)
|
||||
|
||||
@@ -9,7 +9,6 @@ local Math = require("optmath")
|
||||
local TileCacheItem = require("document/tilecacheitem")
|
||||
local lfs = require("libs/libkoreader-lfs")
|
||||
local logger = require("logger")
|
||||
local util = require("util")
|
||||
|
||||
--[[
|
||||
This is an abstract interface to a document
|
||||
@@ -144,31 +143,6 @@ function Document:discardChange()
|
||||
self.is_edited = false
|
||||
end
|
||||
|
||||
-- calculate partial digest of the document and store in its docsettings to avoid document saving
|
||||
-- feature to change its checksum.
|
||||
function Document:fastDigest(docsettings)
|
||||
if not self.file then return end
|
||||
local file = io.open(self.file, 'rb')
|
||||
if file then
|
||||
local tmp_docsettings = false
|
||||
if not docsettings then -- if not provided, open/create it
|
||||
docsettings = require("docsettings"):open(self.file)
|
||||
tmp_docsettings = true
|
||||
end
|
||||
local result = docsettings:readSetting("partial_md5_checksum")
|
||||
if not result then
|
||||
logger.dbg("computing and storing partial_md5_checksum")
|
||||
result = util.partialMD5(file)
|
||||
docsettings:saveSetting("partial_md5_checksum", result)
|
||||
end
|
||||
if tmp_docsettings then
|
||||
docsettings:close()
|
||||
end
|
||||
file:close()
|
||||
return result
|
||||
end
|
||||
end
|
||||
|
||||
-- this might be overridden by a document implementation
|
||||
function Document:getNativePageDimensions(pageno)
|
||||
local hash = "pgdim|"..self.file.."|"..pageno
|
||||
|
||||
@@ -5,6 +5,7 @@ This module contains miscellaneous helper functions for the KOReader frontend.
|
||||
local BaseUtil = require("ffi/util")
|
||||
local Utf8Proc = require("ffi/utf8proc")
|
||||
local lfs = require("libs/libkoreader-lfs")
|
||||
local md5 = require("ffi/sha2").md5
|
||||
local _ = require("gettext")
|
||||
local C_ = _.pgettext
|
||||
local T = BaseUtil.template
|
||||
@@ -1014,14 +1015,14 @@ end
|
||||
-- Note that if PDF file size is around 1024, 4096, 16384, 65536, 262144
|
||||
-- 1048576, 4194304, 16777216, 67108864, 268435456 or 1073741824, appending data
|
||||
-- by highlighting in KOReader may change the digest value.
|
||||
function util.partialMD5(file)
|
||||
local bit = require("bit")
|
||||
local md5 = require("ffi/sha2").md5
|
||||
local leftshift = bit.lshift
|
||||
function util.partialMD5(filepath)
|
||||
if not filepath then return end
|
||||
local file = io.open(filepath, "rb")
|
||||
if not file then return end
|
||||
local step, size = 1024, 1024
|
||||
local update = md5()
|
||||
for i = -1, 10 do
|
||||
file:seek("set", leftshift(step, 2*i))
|
||||
file:seek("set", lshift(step, 2*i))
|
||||
local sample = file:read(size)
|
||||
if sample then
|
||||
update(sample)
|
||||
@@ -1029,6 +1030,7 @@ function util.partialMD5(file)
|
||||
break
|
||||
end
|
||||
end
|
||||
file:close()
|
||||
return update()
|
||||
end
|
||||
|
||||
|
||||
@@ -169,11 +169,6 @@ function KOSync:onDispatcherRegisterActions()
|
||||
end
|
||||
|
||||
function KOSync:onReaderReady()
|
||||
-- Make sure checksum has been calculated before we ever query it,
|
||||
-- to prevent document saving features from affecting the checksum,
|
||||
-- and eventually affecting the document identity for the progress sync feature.
|
||||
self.view.document:fastDigest(self.ui.doc_settings)
|
||||
|
||||
if self.settings.auto_sync then
|
||||
UIManager:nextTick(function()
|
||||
self:getProgress(true, false)
|
||||
|
||||
@@ -76,6 +76,7 @@ local ReaderStatistics = Widget:extend{
|
||||
avg_time = nil,
|
||||
page_stat = nil, -- Dictionary, indexed by page (hash), contains a list (array) of { timestamp, duration } tuples.
|
||||
data = nil, -- table
|
||||
doc_md5 = nil,
|
||||
}
|
||||
|
||||
-- NOTE: This is used in a migration script by ui/data/onetime_migration,
|
||||
@@ -118,7 +119,6 @@ function ReaderStatistics:init()
|
||||
highlights = 0,
|
||||
notes = 0,
|
||||
pages = 0,
|
||||
md5 = nil,
|
||||
}
|
||||
|
||||
self.start_current_period = os.time()
|
||||
@@ -186,9 +186,6 @@ function ReaderStatistics:initData()
|
||||
self.data.series = series or "N/A"
|
||||
|
||||
self.data.pages = self.document:getPageCount()
|
||||
if not self.data.md5 then
|
||||
self.data.md5 = self:partialMd5(self.document.file)
|
||||
end
|
||||
-- Update these numbers to what's actually stored in the settings
|
||||
self.data.highlights, self.data.notes = self.ui.bookmark:getNumberOfHighlightsAndNotes()
|
||||
self.id_curr_book = self:getIdBookDB()
|
||||
@@ -430,29 +427,6 @@ Please wait…
|
||||
conn:close()
|
||||
end
|
||||
|
||||
function ReaderStatistics:partialMd5(file)
|
||||
if file == nil then
|
||||
return nil
|
||||
end
|
||||
local bit = require("bit")
|
||||
local md5 = require("ffi/sha2").md5
|
||||
local lshift = bit.lshift
|
||||
local step, size = 1024, 1024
|
||||
local update = md5()
|
||||
local file_handle = io.open(file, 'rb')
|
||||
for i = -1, 10 do
|
||||
file_handle:seek("set", lshift(step, 2*i))
|
||||
local sample = file_handle:read(size)
|
||||
if sample then
|
||||
update(sample)
|
||||
else
|
||||
break
|
||||
end
|
||||
end
|
||||
file_handle:close()
|
||||
return update()
|
||||
end
|
||||
|
||||
-- Mainly so we don't duplicate the schema twice between the creation/upgrade codepaths
|
||||
local STATISTICS_DB_PAGE_STAT_DATA_SCHEMA = [[
|
||||
CREATE TABLE IF NOT EXISTS page_stat_data
|
||||
@@ -642,13 +616,14 @@ function ReaderStatistics:addBookStatToDB(book_stats, conn)
|
||||
AND md5 = ?;
|
||||
]]
|
||||
local stmt = conn:prepare(sql_stmt)
|
||||
local result = stmt:reset():bind(self.data.title, self.data.authors, self.data.md5):step()
|
||||
local result = stmt:reset():bind(self.data.title, self.data.authors, self.doc_md5):step()
|
||||
local nr_id = tonumber(result[1])
|
||||
if nr_id == 0 then
|
||||
local partial_md5 = util.partialMD5(book_stats.file)
|
||||
stmt = conn:prepare("INSERT INTO book VALUES(NULL, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);")
|
||||
stmt:reset():bind(book_stats.title, book_stats.authors, book_stats.notes,
|
||||
last_open_book, book_stats.highlights, book_stats.pages,
|
||||
book_stats.series, book_stats.language, self:partialMd5(book_stats.file), total_read_time, total_read_pages) :step()
|
||||
book_stats.series, book_stats.language, partial_md5, total_read_time, total_read_pages) :step()
|
||||
sql_stmt = [[
|
||||
SELECT last_insert_rowid() AS num;
|
||||
]]
|
||||
@@ -662,7 +637,7 @@ function ReaderStatistics:addBookStatToDB(book_stats, conn)
|
||||
AND md5 = ?;
|
||||
]]
|
||||
stmt = conn:prepare(sql_stmt)
|
||||
result = stmt:reset():bind(self.data.title, self.data.authors, self.data.md5):step()
|
||||
result = stmt:reset():bind(self.data.title, self.data.authors, self.doc_md5):step()
|
||||
id_book = result[1]
|
||||
|
||||
end
|
||||
@@ -781,14 +756,14 @@ function ReaderStatistics:getIdBookDB()
|
||||
AND md5 = ?;
|
||||
]]
|
||||
local stmt = conn:prepare(sql_stmt)
|
||||
local result = stmt:reset():bind(self.data.title, self.data.authors, self.data.md5):step()
|
||||
local result = stmt:reset():bind(self.data.title, self.data.authors, self.doc_md5):step()
|
||||
local nr_id = tonumber(result[1])
|
||||
if nr_id == 0 then
|
||||
-- Not in the DB yet, initialize it
|
||||
stmt = conn:prepare("INSERT INTO book VALUES(NULL, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);")
|
||||
stmt:reset():bind(self.data.title, self.data.authors, self.data.notes,
|
||||
os.time(), self.data.highlights, self.data.pages,
|
||||
self.data.series, self.data.language, self.data.md5, 0, 0):step()
|
||||
self.data.series, self.data.language, self.doc_md5, 0, 0):step()
|
||||
sql_stmt = [[
|
||||
SELECT last_insert_rowid() AS num;
|
||||
]]
|
||||
@@ -802,7 +777,7 @@ function ReaderStatistics:getIdBookDB()
|
||||
AND md5 = ?;
|
||||
]]
|
||||
stmt = conn:prepare(sql_stmt)
|
||||
result = stmt:reset():bind(self.data.title, self.data.authors, self.data.md5):step()
|
||||
result = stmt:reset():bind(self.data.title, self.data.authors, self.doc_md5):step()
|
||||
id_book = result[1]
|
||||
end
|
||||
stmt:close()
|
||||
@@ -857,17 +832,13 @@ function ReaderStatistics:onBookMetadataChanged(prop_updated)
|
||||
-- Not the current document: we have to find its id in the db, from the (old) title/authors/md5
|
||||
local db_md5, db_title, db_authors, db_authors_legacy
|
||||
if DocSettings:hasSidecarFile(filepath) then
|
||||
local doc_settings = DocSettings:open(filepath)
|
||||
local stats = doc_settings:readSetting("stats")
|
||||
if stats then
|
||||
db_md5 = stats.md5
|
||||
-- Note: stats.title and stats.authors may be osbolete, if the metadata
|
||||
-- has previously been updated and the document never re-opened since.
|
||||
logger.dbg(log_prefix, "got md5 from docsettings:", db_md5)
|
||||
end
|
||||
db_md5 = DocSettings:open(filepath):readSetting("partial_md5_checksum")
|
||||
-- Note: stats.title and stats.authors may be osbolete, if the metadata
|
||||
-- has previously been updated and the document never re-opened since.
|
||||
logger.dbg(log_prefix, "got md5 from docsettings:", db_md5)
|
||||
end
|
||||
if not db_md5 then
|
||||
db_md5 = self:partialMd5(filepath)
|
||||
db_md5 = util.partialMD5(filepath)
|
||||
logger.dbg(log_prefix, "computed md5:", db_md5)
|
||||
end
|
||||
|
||||
@@ -2819,11 +2790,9 @@ function ReaderStatistics:onReadingResumed()
|
||||
self._reading_paused_ts = nil
|
||||
end
|
||||
|
||||
function ReaderStatistics:onReadSettings(config)
|
||||
function ReaderStatistics:onReaderReady(config)
|
||||
self.data = config:readSetting("stats", { performance_in_pages = {} })
|
||||
end
|
||||
|
||||
function ReaderStatistics:onReaderReady()
|
||||
self.doc_md5 = config:readSetting("partial_md5_checksum")
|
||||
-- we have correct page count now, do the actual initialization work
|
||||
self:initData()
|
||||
self.view.footer:onUpdateFooter()
|
||||
|
||||
@@ -39,9 +39,6 @@ describe("PDF document module", function()
|
||||
local clip1 = doc:clipPagePNGString(pos0, pos1, pboxes, "lighten")
|
||||
assert.truthy(clip1)
|
||||
end)
|
||||
it("should calculate fast digest", function()
|
||||
assert.is_equal(doc:fastDigest(), "41cce710f34e5ec21315e19c99821415")
|
||||
end)
|
||||
it("should close document", function()
|
||||
doc:close()
|
||||
end)
|
||||
@@ -68,9 +65,6 @@ describe("EPUB document module", function()
|
||||
assert.are.same(image:getWidth(), 442)
|
||||
assert.are.same(image:getHeight(), 616)
|
||||
end)
|
||||
it("should calculate fast digest", function()
|
||||
assert.is_equal(doc:fastDigest(), "59d481d168cca6267322f150c5f6a2a3")
|
||||
end)
|
||||
it("should register droid sans fallback", function()
|
||||
local face_list = cre.getFontFaces()
|
||||
assert.is_equal(face_list[1], "Droid Sans Mono")
|
||||
|
||||
@@ -335,6 +335,15 @@ describe("util module", function()
|
||||
end)
|
||||
end)
|
||||
|
||||
describe("partialMD5()", function()
|
||||
it("should calculate partial md5 hash of pdf file", function()
|
||||
assert.is_equal(util.partialMD5("spec/front/unit/data/tall.pdf"), "41cce710f34e5ec21315e19c99821415")
|
||||
end)
|
||||
it("should calculate partial md5 hash of epub file", function()
|
||||
assert.is_equal(util.partialMD5("spec/front/unit/data/leaves.epub"), "59d481d168cca6267322f150c5f6a2a3")
|
||||
end)
|
||||
end)
|
||||
|
||||
describe("fixUtf8()", function()
|
||||
it("should replace invalid UTF-8 characters with an underscore", function()
|
||||
assert.is_equal("\127 _ _\127 ", util.fixUtf8("\127 \128 \194\127 ", "_"))
|
||||
|
||||
Reference in New Issue
Block a user