Cache: Rewrite based on lua-lru

Ought to be faster than our naive array-based approach.
Especially for the glyph cache, which has a solid amount of elements,
and is mostly cache hits.
(There are few things worse for performance in Lua than
table.remove @ !tail and table.insert @ !tail, which this was full of :/).

DocCache: New module that's now an actual Cache instance instead of a
weird hack. Replaces "Cache" (the instance) as used across Document &
co.
Only Cache instance with on-disk persistence.

ImageCache: Update to new Cache.

GlyphCache: Update to new Cache.
Also, actually free glyph bbs on eviction.
This commit is contained in:
NiLuJe
2021-05-04 23:13:24 +02:00
parent ce624be8b8
commit 21b067792d
15 changed files with 243 additions and 229 deletions

View File

@@ -2,11 +2,11 @@
Interface to k2pdfoptlib backend.
--]]
local Cache = require("cache")
local CacheItem = require("cacheitem")
local CanvasContext = require("document/canvascontext")
local DataStorage = require("datastorage")
local DEBUG = require("dbg")
local DocCache = require("document/doccache")
local Document = require("document/document")
local Geom = require("ui/geometry")
local KOPTContext = require("ffi/koptcontext")
@@ -27,11 +27,9 @@ local KoptInterface = {
local ContextCacheItem = CacheItem:new{}
function ContextCacheItem:onFree()
if self.kctx.free then
KoptInterface:waitForContext(self.kctx)
logger.dbg("free koptcontext", self.kctx)
self.kctx:free()
end
KoptInterface:waitForContext(self.kctx)
logger.dbg("ContextCacheItem: free KOPTContext", self.kctx)
self.kctx:free()
end
function ContextCacheItem:dump(filename)
@@ -186,7 +184,7 @@ function KoptInterface:getAutoBBox(doc, pageno)
}
local context_hash = self:getContextHash(doc, pageno, bbox)
local hash = "autobbox|"..context_hash
local cached = Cache:check(hash)
local cached = DocCache:check(hash)
if not cached then
local page = doc._document:openPage(pageno)
local kc = self:createContext(doc, pageno, bbox)
@@ -198,7 +196,7 @@ function KoptInterface:getAutoBBox(doc, pageno)
else
bbox = Document.getPageBBox(doc, pageno)
end
Cache:insert(hash, CacheItem:new{ autobbox = bbox, size = 160 })
DocCache:insert(hash, CacheItem:new{ autobbox = bbox, size = 160 })
page:close()
kc:free()
return bbox
@@ -215,7 +213,7 @@ function KoptInterface:getSemiAutoBBox(doc, pageno)
local bbox = Document.getPageBBox(doc, pageno)
local context_hash = self:getContextHash(doc, pageno, bbox)
local hash = "semiautobbox|"..context_hash
local cached = Cache:check(hash)
local cached = DocCache:check(hash)
if not cached then
local page = doc._document:openPage(pageno)
local kc = self:createContext(doc, pageno, bbox)
@@ -233,7 +231,7 @@ function KoptInterface:getSemiAutoBBox(doc, pageno)
auto_bbox = bbox
end
page:close()
Cache:insert(hash, CacheItem:new{ semiautobbox = auto_bbox, size = 160 })
DocCache:insert(hash, CacheItem:new{ semiautobbox = auto_bbox, size = 160 })
kc:free()
return auto_bbox
else
@@ -251,7 +249,7 @@ function KoptInterface:getCachedContext(doc, pageno)
local bbox = doc:getPageBBox(pageno)
local context_hash = self:getContextHash(doc, pageno, bbox)
local kctx_hash = "kctx|"..context_hash
local cached = Cache:check(kctx_hash, ContextCacheItem)
local cached = DocCache:check(kctx_hash, ContextCacheItem)
if not cached then
-- If kctx is not cached, create one and get reflowed bmp in foreground.
local kc = self:createContext(doc, pageno, bbox)
@@ -267,7 +265,7 @@ function KoptInterface:getCachedContext(doc, pageno)
local fullwidth, fullheight = kc:getPageDim()
logger.dbg("reflowed page", pageno, "fullwidth:", fullwidth, "fullheight:", fullheight)
self.last_context_size = fullwidth * fullheight + 3072 -- estimation
Cache:insert(kctx_hash, ContextCacheItem:new{
DocCache:insert(kctx_hash, ContextCacheItem:new{
persistent = true,
size = self.last_context_size,
kctx = kc
@@ -336,12 +334,12 @@ function KoptInterface:renderReflowedPage(doc, pageno, rect, zoom, rotation, ren
local context_hash = self:getContextHash(doc, pageno, bbox)
local renderpg_hash = "renderpg|"..context_hash
local cached = Cache:check(renderpg_hash)
local cached = DocCache:check(renderpg_hash)
if not cached then
-- do the real reflowing if kctx has not been cached yet
local kc = self:getCachedContext(doc, pageno)
local fullwidth, fullheight = kc:getPageDim()
if not Cache:willAccept(fullwidth * fullheight) then
if not DocCache:willAccept(fullwidth * fullheight) then
-- whole page won't fit into cache
error("aborting, since we don't have enough cache for this page")
end
@@ -352,7 +350,7 @@ function KoptInterface:renderReflowedPage(doc, pageno, rect, zoom, rotation, ren
}
tile.bb = kc:dstToBlitBuffer()
tile.size = tonumber(tile.bb.stride) * tile.bb.h + 512 -- estimation
Cache:insert(renderpg_hash, tile)
DocCache:insert(renderpg_hash, tile)
return tile
else
return cached
@@ -370,7 +368,7 @@ function KoptInterface:renderOptimizedPage(doc, pageno, rect, zoom, rotation, re
local context_hash = self:getContextHash(doc, pageno, bbox)
local renderpg_hash = "renderoptpg|"..context_hash..zoom
local cached = Cache:check(renderpg_hash, TileCacheItem)
local cached = DocCache:check(renderpg_hash, TileCacheItem)
if not cached then
local page_size = Document.getNativePageDimensions(doc, pageno)
local full_page_bbox = {
@@ -399,7 +397,7 @@ function KoptInterface:renderOptimizedPage(doc, pageno, rect, zoom, rotation, re
tile.bb = kc:dstToBlitBuffer()
tile.size = tonumber(tile.bb.stride) * tile.bb.h + 512 -- estimation
kc:free()
Cache:insert(renderpg_hash, tile)
DocCache:insert(renderpg_hash, tile)
return tile
else
return cached
@@ -429,7 +427,7 @@ function KoptInterface:hintReflowedPage(doc, pageno, zoom, rotation, gamma, rend
local bbox = doc:getPageBBox(pageno)
local context_hash = self:getContextHash(doc, pageno, bbox)
local kctx_hash = "kctx|"..context_hash
local cached = Cache:check(kctx_hash)
local cached = DocCache:check(kctx_hash)
if not cached then
local kc = self:createContext(doc, pageno, bbox)
local page = doc._document:openPage(pageno)
@@ -438,7 +436,7 @@ function KoptInterface:hintReflowedPage(doc, pageno, zoom, rotation, gamma, rend
kc:setPreCache()
page:reflow(kc, 0)
page:close()
Cache:insert(kctx_hash, ContextCacheItem:new{
DocCache:insert(kctx_hash, ContextCacheItem:new{
size = self.last_context_size or self.default_context_size,
kctx = kc,
})
@@ -496,16 +494,16 @@ function KoptInterface:getReflowedTextBoxes(doc, pageno)
local bbox = doc:getPageBBox(pageno)
local context_hash = self:getContextHash(doc, pageno, bbox)
local hash = "rfpgboxes|"..context_hash
local cached = Cache:check(hash)
local cached = DocCache:check(hash)
if not cached then
local kctx_hash = "kctx|"..context_hash
cached = Cache:check(kctx_hash)
cached = DocCache:check(kctx_hash)
if cached then
local kc = self:waitForContext(cached.kctx)
--kc:setDebug()
local fullwidth, fullheight = kc:getPageDim()
local boxes, nr_word = kc:getReflowedWordBoxes("dst", 0, 0, fullwidth, fullheight)
Cache:insert(hash, CacheItem:new{ rfpgboxes = boxes, size = 192 * nr_word }) -- estimation
DocCache:insert(hash, CacheItem:new{ rfpgboxes = boxes, size = 192 * nr_word }) -- estimation
return boxes
end
else
@@ -520,16 +518,16 @@ function KoptInterface:getNativeTextBoxes(doc, pageno)
local bbox = doc:getPageBBox(pageno)
local context_hash = self:getContextHash(doc, pageno, bbox)
local hash = "nativepgboxes|"..context_hash
local cached = Cache:check(hash)
local cached = DocCache:check(hash)
if not cached then
local kctx_hash = "kctx|"..context_hash
cached = Cache:check(kctx_hash)
cached = DocCache:check(kctx_hash)
if cached then
local kc = self:waitForContext(cached.kctx)
--kc:setDebug()
local fullwidth, fullheight = kc:getPageDim()
local boxes, nr_word = kc:getNativeWordBoxes("dst", 0, 0, fullwidth, fullheight)
Cache:insert(hash, CacheItem:new{ nativepgboxes = boxes, size = 192 * nr_word }) -- estimation
DocCache:insert(hash, CacheItem:new{ nativepgboxes = boxes, size = 192 * nr_word }) -- estimation
return boxes
end
else
@@ -546,17 +544,17 @@ function KoptInterface:getReflowedTextBoxesFromScratch(doc, pageno)
local bbox = doc:getPageBBox(pageno)
local context_hash = self:getContextHash(doc, pageno, bbox)
local hash = "scratchrfpgboxes|"..context_hash
local cached = Cache:check(hash)
local cached = DocCache:check(hash)
if not cached then
local kctx_hash = "kctx|"..context_hash
cached = Cache:check(kctx_hash)
cached = DocCache:check(kctx_hash)
if cached then
local reflowed_kc = self:waitForContext(cached.kctx)
local fullwidth, fullheight = reflowed_kc:getPageDim()
local kc = self:createContext(doc, pageno)
kc:copyDestBMP(reflowed_kc)
local boxes, nr_word = kc:getNativeWordBoxes("dst", 0, 0, fullwidth, fullheight)
Cache:insert(hash, CacheItem:new{ scratchrfpgboxes = boxes, size = 192 * nr_word }) -- estimation
DocCache:insert(hash, CacheItem:new{ scratchrfpgboxes = boxes, size = 192 * nr_word }) -- estimation
kc:free()
return boxes
end
@@ -589,7 +587,7 @@ Done by OCR pre-processing in Tesseract and Leptonica.
--]]
function KoptInterface:getNativeTextBoxesFromScratch(doc, pageno)
local hash = "scratchnativepgboxes|"..doc.file.."|"..pageno
local cached = Cache:check(hash)
local cached = DocCache:check(hash)
if not cached then
local page_size = Document.getNativePageDimensions(doc, pageno)
local bbox = {
@@ -602,7 +600,7 @@ function KoptInterface:getNativeTextBoxesFromScratch(doc, pageno)
local page = doc._document:openPage(pageno)
page:getPagePix(kc)
local boxes, nr_word = kc:getNativeWordBoxes("src", 0, 0, page_size.w, page_size.h)
Cache:insert(hash, CacheItem:new{ scratchnativepgboxes = boxes, size = 192 * nr_word }) -- estimation
DocCache:insert(hash, CacheItem:new{ scratchnativepgboxes = boxes, size = 192 * nr_word }) -- estimation
page:close()
kc:free()
return boxes
@@ -619,7 +617,7 @@ function KoptInterface:getPageBlock(doc, pageno, x, y)
local bbox = doc:getPageBBox(pageno)
local context_hash = self:getContextHash(doc, pageno, bbox)
local hash = "pageblocks|"..context_hash
local cached = Cache:check(hash)
local cached = DocCache:check(hash)
if not cached then
local page_size = Document.getNativePageDimensions(doc, pageno)
local full_page_bbox = {
@@ -633,7 +631,7 @@ function KoptInterface:getPageBlock(doc, pageno, x, y)
local page = doc._document:openPage(pageno)
page:getPagePix(kc)
kc:findPageBlocks()
Cache:insert(hash, CacheItem:new{ kctx = kc, size = 3072 }) -- estimation
DocCache:insert(hash, CacheItem:new{ kctx = kc, size = 3072 }) -- estimation
page:close()
kctx = kc
else
@@ -646,8 +644,8 @@ end
Get word from OCR providing selected word box.
--]]
function KoptInterface:getOCRWord(doc, pageno, wbox)
if not Cache:check(self.ocrengine) then
Cache:insert(self.ocrengine, OCREngine:new{ ocrengine = KOPTContext.new(), size = 3072 }) -- estimation
if not DocCache:check(self.ocrengine) then
DocCache:insert(self.ocrengine, OCREngine:new{ ocrengine = KOPTContext.new(), size = 3072 }) -- estimation
end
if doc.configurable.text_wrap == 1 then
return self:getReflewOCRWord(doc, pageno, wbox.sbox)
@@ -664,17 +662,17 @@ function KoptInterface:getReflewOCRWord(doc, pageno, rect)
local bbox = doc:getPageBBox(pageno)
local context_hash = self:getContextHash(doc, pageno, bbox)
local hash = "rfocrword|"..context_hash..rect.x..rect.y..rect.w..rect.h
local cached = Cache:check(hash)
local cached = DocCache:check(hash)
if not cached then
local kctx_hash = "kctx|"..context_hash
cached = Cache:check(kctx_hash)
cached = DocCache:check(kctx_hash)
if cached then
local kc = self:waitForContext(cached.kctx)
local _, word = pcall(
kc.getTOCRWord, kc, "dst",
rect.x, rect.y, rect.w, rect.h,
self.tessocr_data, self.ocr_lang, self.ocr_type, 0, 1)
Cache:insert(hash, CacheItem:new{ rfocrword = word, size = #word + 64 }) -- estimation
DocCache:insert(hash, CacheItem:new{ rfocrword = word, size = #word + 64 }) -- estimation
return word
end
else
@@ -689,7 +687,7 @@ function KoptInterface:getNativeOCRWord(doc, pageno, rect)
self.ocr_lang = doc.configurable.doc_language
local hash = "ocrword|"..doc.file.."|"..pageno..rect.x..rect.y..rect.w..rect.h
logger.dbg("hash", hash)
local cached = Cache:check(hash)
local cached = DocCache:check(hash)
if not cached then
local bbox = {
x0 = rect.x - math.floor(rect.h * 0.3),
@@ -707,7 +705,7 @@ function KoptInterface:getNativeOCRWord(doc, pageno, rect)
kc.getTOCRWord, kc, "src",
0, 0, word_w, word_h,
self.tessocr_data, self.ocr_lang, self.ocr_type, 0, 1)
Cache:insert(hash, CacheItem:new{ ocrword = word, size = #word + 64 }) -- estimation
DocCache:insert(hash, CacheItem:new{ ocrword = word, size = #word + 64 }) -- estimation
logger.dbg("word", word)
page:close()
kc:free()
@@ -721,8 +719,8 @@ end
Get text from OCR providing selected text boxes.
--]]
function KoptInterface:getOCRText(doc, pageno, tboxes)
if not Cache:check(self.ocrengine) then
Cache:insert(self.ocrengine, OCREngine:new{ ocrengine = KOPTContext.new(), size = 3072 }) -- estimation
if not DocCache:check(self.ocrengine) then
DocCache:insert(self.ocrengine, OCREngine:new{ ocrengine = KOPTContext.new(), size = 3072 }) -- estimation
end
logger.info("Not implemented yet")
end