mirror of
https://github.com/koreader/koreader.git
synced 2025-08-10 00:52:38 +00:00
HTML dictionary support (#3573)
* Adds a generic HTML widget modeled after the text widget, and HTML dictionary support. HTML dictionaries can have their own CSS (for X.ifo it must be X.css). The base CSS just resets the margin and sets the font. Note that the widget doesn't handle links, that wasn't needed for the dictionary. Closes <https://github.com/koreader/koreader/issues/1776>. * Show tag stripped HTML if the dictionary entry isn't valid HTML * Simulate the normal <br/> behavior * Bump base
This commit is contained in:
2
base
2
base
Submodule base updated: 81e789e724...feca07cc6f
@@ -67,6 +67,17 @@ local ReaderDictionary = InputContainer:new{
|
||||
lookup_msg = _("Searching dictionary for:\n%1"),
|
||||
}
|
||||
|
||||
local function readDictionaryCss(path)
|
||||
local f = io.open(path, "r")
|
||||
if not f then
|
||||
return nil
|
||||
end
|
||||
|
||||
local content = f:read("*all")
|
||||
f:close()
|
||||
return content
|
||||
end
|
||||
|
||||
function ReaderDictionary:init()
|
||||
self.ui.menu:registerToMainMenu(self)
|
||||
self.data_dir = os.getenv("STARDICT_DATA_DIR") or
|
||||
@@ -90,11 +101,14 @@ function ReaderDictionary:init()
|
||||
local content = f:read("*all")
|
||||
f:close()
|
||||
local dictname = content:match("\nbookname=(.-)\n")
|
||||
local is_html = content:find("sametypesequence=h", 1, true) ~= nil
|
||||
-- sdcv won't use dict that don't have a bookname=
|
||||
if dictname then
|
||||
table.insert(available_ifos, {
|
||||
file = ifo_file,
|
||||
name = dictname,
|
||||
is_html = is_html,
|
||||
css = readDictionaryCss(ifo_file:gsub("%.ifo$", ".css"))
|
||||
})
|
||||
end
|
||||
end
|
||||
@@ -331,26 +345,42 @@ local function dictDirsEmpty(dict_dirs)
|
||||
return true
|
||||
end
|
||||
|
||||
local function getAvailableIfoByName(dictionary_name)
|
||||
for _, ifo in ipairs(available_ifos) do
|
||||
if ifo.name == dictionary_name then
|
||||
return ifo
|
||||
end
|
||||
end
|
||||
|
||||
return nil
|
||||
end
|
||||
|
||||
local function tidyMarkup(results)
|
||||
local cdata_tag = "<!%[CDATA%[(.-)%]%]>"
|
||||
local format_escape = "&[29Ib%+]{(.-)}"
|
||||
for _, result in ipairs(results) do
|
||||
local def = result.definition
|
||||
-- preserve the <br> tag for line break
|
||||
def = def:gsub("<[bB][rR] ?/?>", "\n")
|
||||
-- parse CDATA text in XML
|
||||
if def:find(cdata_tag) then
|
||||
def = def:gsub(cdata_tag, "%1")
|
||||
-- ignore format strings
|
||||
while def:find(format_escape) do
|
||||
def = def:gsub(format_escape, "%1")
|
||||
local ifo = getAvailableIfoByName(result.dict)
|
||||
if ifo and ifo.is_html then
|
||||
result.is_html = ifo.is_html
|
||||
result.css = ifo.css
|
||||
else
|
||||
local def = result.definition
|
||||
-- preserve the <br> tag for line break
|
||||
def = def:gsub("<[bB][rR] ?/?>", "\n")
|
||||
-- parse CDATA text in XML
|
||||
if def:find(cdata_tag) then
|
||||
def = def:gsub(cdata_tag, "%1")
|
||||
-- ignore format strings
|
||||
while def:find(format_escape) do
|
||||
def = def:gsub(format_escape, "%1")
|
||||
end
|
||||
end
|
||||
-- ignore all markup tags
|
||||
def = def:gsub("%b<>", "")
|
||||
-- strip all leading empty lines/spaces
|
||||
def = def:gsub("^%s+", "")
|
||||
result.definition = def
|
||||
end
|
||||
-- ignore all markup tags
|
||||
def = def:gsub("%b<>", "")
|
||||
-- strip all leading empty lines/spaces
|
||||
def = def:gsub("^%s+", "")
|
||||
result.definition = def
|
||||
end
|
||||
return results
|
||||
end
|
||||
|
||||
@@ -14,6 +14,7 @@ local InputDialog = require("ui/widget/inputdialog")
|
||||
local LeftContainer = require("ui/widget/container/leftcontainer")
|
||||
local LineWidget = require("ui/widget/linewidget")
|
||||
local OverlapGroup = require("ui/widget/overlapgroup")
|
||||
local ScrollHtmlWidget = require("ui/widget/scrollhtmlwidget")
|
||||
local ScrollTextWidget = require("ui/widget/scrolltextwidget")
|
||||
local Size = require("ui/size")
|
||||
local TextWidget = require("ui/widget/textwidget")
|
||||
@@ -37,6 +38,7 @@ local DictQuickLookup = InputContainer:new{
|
||||
displayword = nil,
|
||||
is_wiki = false,
|
||||
is_fullpage = false,
|
||||
is_html = false,
|
||||
dict_index = 1,
|
||||
title_face = Font:getFace("x_smalltfont"),
|
||||
content_face = Font:getFace("cfont", DDICT_FONT_SIZE),
|
||||
@@ -156,6 +158,25 @@ function DictQuickLookup:isDocless()
|
||||
return self.ui == nil or self.ui.highlight == nil
|
||||
end
|
||||
|
||||
function DictQuickLookup:getHtmlDictionaryCss()
|
||||
-- Using Noto Sans because Nimbus doesn't contain the IPA symbols.
|
||||
local css = [[
|
||||
@page {
|
||||
margin: 0;
|
||||
font-family: 'Noto Sans';
|
||||
}
|
||||
|
||||
body {
|
||||
margin: 0;
|
||||
}
|
||||
]]
|
||||
|
||||
if self.css then
|
||||
return css .. self.css
|
||||
end
|
||||
return css
|
||||
end
|
||||
|
||||
function DictQuickLookup:update()
|
||||
local orig_dimen = self.dict_frame and self.dict_frame.dimen or Geom:new{}
|
||||
-- calculate window dimension
|
||||
@@ -236,12 +257,20 @@ function DictQuickLookup:update()
|
||||
text_font_size = lookup_word_font_size,
|
||||
hold_callback = function() self:lookupInputWord(self.lookupword) end,
|
||||
}
|
||||
-- word definition
|
||||
local definition = FrameContainer:new{
|
||||
padding = self.definition_padding,
|
||||
margin = self.definition_margin,
|
||||
bordersize = 0,
|
||||
ScrollTextWidget:new{
|
||||
|
||||
local text_widget
|
||||
|
||||
if self.is_html then
|
||||
text_widget = ScrollHtmlWidget:new{
|
||||
html_body = self.definition,
|
||||
css = self:getHtmlDictionaryCss(),
|
||||
default_font_size = DDICT_FONT_SIZE,
|
||||
width = self.width,
|
||||
height = self.is_fullpage and self.height*0.75 or self.height*0.7,
|
||||
dialog = self,
|
||||
}
|
||||
else
|
||||
text_widget = ScrollTextWidget:new{
|
||||
text = self.definition,
|
||||
face = self.content_face,
|
||||
width = self.width,
|
||||
@@ -250,7 +279,15 @@ function DictQuickLookup:update()
|
||||
dialog = self,
|
||||
-- allow for disabling justification
|
||||
justified = G_reader_settings:nilOrTrue("dict_justify"),
|
||||
},
|
||||
}
|
||||
end
|
||||
|
||||
-- word definition
|
||||
local definition = FrameContainer:new{
|
||||
padding = self.definition_padding,
|
||||
margin = self.definition_margin,
|
||||
bordersize = 0,
|
||||
text_widget,
|
||||
}
|
||||
-- Different sets of buttons if fullpage or not
|
||||
local buttons
|
||||
@@ -538,6 +575,8 @@ function DictQuickLookup:changeDictionary(index)
|
||||
self.lookupword = self.results[index].word
|
||||
self.definition = self.results[index].definition
|
||||
self.is_fullpage = self.results[index].is_fullpage
|
||||
self.is_html = self.results[index].is_html
|
||||
self.css = self.results[index].css
|
||||
self.lang = self.results[index].lang
|
||||
if self.is_fullpage then
|
||||
self.displayword = self.lookupword
|
||||
|
||||
189
frontend/ui/widget/htmlboxwidget.lua
Normal file
189
frontend/ui/widget/htmlboxwidget.lua
Normal file
@@ -0,0 +1,189 @@
|
||||
--[[--
|
||||
HTML widget (without scroll bars).
|
||||
--]]
|
||||
|
||||
local DrawContext = require("ffi/drawcontext")
|
||||
local Geom = require("ui/geometry")
|
||||
local InputContainer = require("ui/widget/container/inputcontainer")
|
||||
local logger = require("logger")
|
||||
local Mupdf = require("ffi/mupdf")
|
||||
local util = require("util")
|
||||
local TimeVal = require("ui/timeval")
|
||||
|
||||
local HtmlBoxWidget = InputContainer:new{
|
||||
bb = nil,
|
||||
dimen = nil,
|
||||
document = nil,
|
||||
page_count = 0,
|
||||
page_number = 1,
|
||||
hold_start_pos = nil,
|
||||
hold_start_tv = nil,
|
||||
}
|
||||
|
||||
function HtmlBoxWidget:setContent(body, css, default_font_size)
|
||||
-- fz_set_user_css is tied to the context instead of the document so to easily support multiple
|
||||
-- HTML dictionaries with different CSS, we embed the stylesheet into the HTML instead of using
|
||||
-- that function.
|
||||
local head = ""
|
||||
if css then
|
||||
head = string.format("<head><style>%s</style></head>", css)
|
||||
end
|
||||
local html = string.format("<html>%s<body>%s</body></html>", head, body)
|
||||
|
||||
-- For some reason in MuPDF <br/> always creates both a line break and an empty line, so we have to
|
||||
-- simulate the normal <br/> behavior.
|
||||
-- https://bugs.ghostscript.com/show_bug.cgi?id=698351
|
||||
html = html:gsub("%<br ?/?%>", " <div></div>")
|
||||
|
||||
local ok
|
||||
ok, self.document = pcall(Mupdf.openDocumentFromText, html, "html")
|
||||
if not ok then
|
||||
-- self.document contains the error
|
||||
logger.warn("HTML loading error:", self.document)
|
||||
|
||||
body = util.htmlToPlainText(body)
|
||||
body = util.htmlEscape(body)
|
||||
-- Normally \n would be replaced with <br/>. See the previous comment regarding the bug in MuPDF.
|
||||
body = body:gsub("\n", " <div></div>")
|
||||
html = string.format("<html>%s<body>%s</body></html>", head, body)
|
||||
|
||||
ok, self.document = pcall(Mupdf.openDocumentFromText, html, "html")
|
||||
if not ok then
|
||||
error(self.document)
|
||||
end
|
||||
end
|
||||
|
||||
self.document:layoutDocument(self.dimen.w, self.dimen.h, default_font_size)
|
||||
|
||||
self.page_count = self.document:getPages()
|
||||
end
|
||||
|
||||
function HtmlBoxWidget:_render()
|
||||
if self.bb then
|
||||
return
|
||||
end
|
||||
|
||||
local page = self.document:openPage(self.page_number)
|
||||
local dc = DrawContext.new()
|
||||
self.bb = page:draw_new(dc, self.dimen.w, self.dimen.h, 0, 0)
|
||||
page:close()
|
||||
end
|
||||
|
||||
function HtmlBoxWidget:getSize()
|
||||
return self.dimen
|
||||
end
|
||||
|
||||
function HtmlBoxWidget:paintTo(bb, x, y)
|
||||
self.dimen.x = x
|
||||
self.dimen.y = y
|
||||
|
||||
self:_render()
|
||||
|
||||
local size = self:getSize()
|
||||
|
||||
bb:blitFrom(self.bb, x, y, 0, 0, size.w, size.h)
|
||||
end
|
||||
|
||||
function HtmlBoxWidget:freeBb()
|
||||
if self.bb and self.bb.free then
|
||||
self.bb:free()
|
||||
end
|
||||
|
||||
self.bb = nil
|
||||
end
|
||||
|
||||
-- This will normally be called by our WidgetContainer:free()
|
||||
-- But it SHOULD explicitly be called if we are getting replaced
|
||||
-- (ie: in some other widget's update()), to not leak memory with
|
||||
-- BlitBuffer zombies
|
||||
function HtmlBoxWidget:free()
|
||||
self:freeBb()
|
||||
|
||||
self.document:close()
|
||||
self.document = nil
|
||||
end
|
||||
|
||||
function HtmlBoxWidget:onCloseWidget()
|
||||
-- free when UIManager:close() was called
|
||||
self:free()
|
||||
end
|
||||
|
||||
function HtmlBoxWidget:onHoldStartText(_, ges)
|
||||
self.hold_start_pos = Geom:new{
|
||||
x = ges.pos.x - self.dimen.x,
|
||||
y = ges.pos.y - self.dimen.y,
|
||||
}
|
||||
|
||||
self.hold_start_tv = TimeVal.now()
|
||||
|
||||
return true
|
||||
end
|
||||
|
||||
function HtmlBoxWidget:getSelectedText(lines, start_pos, end_pos)
|
||||
local found_start = false
|
||||
local words = {}
|
||||
|
||||
for _, line in pairs(lines) do
|
||||
for _, w in pairs(line) do
|
||||
if type(w) == 'table' then
|
||||
if (not found_start) and
|
||||
(start_pos.x >= w.x0 and start_pos.x < w.x1 and start_pos.y >= w.y0 and start_pos.y < w.y1) then
|
||||
found_start = true
|
||||
end
|
||||
|
||||
if found_start then
|
||||
table.insert(words, w.word)
|
||||
|
||||
-- Found the end.
|
||||
if end_pos.x >= w.x0 and end_pos.x < w.x1 and end_pos.y >= w.y0 and end_pos.y < w.y1 then
|
||||
return words
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return words
|
||||
end
|
||||
|
||||
function HtmlBoxWidget:onHoldReleaseText(callback, ges)
|
||||
if not callback then
|
||||
return false
|
||||
end
|
||||
|
||||
-- check we have seen a HoldStart event
|
||||
if not self.hold_start_pos then
|
||||
return false
|
||||
end
|
||||
|
||||
local start_pos = self.hold_start_pos
|
||||
local end_pos = Geom:new{
|
||||
x = ges.pos.x - self.dimen.x,
|
||||
y = ges.pos.y - self.dimen.y,
|
||||
}
|
||||
|
||||
self.hold_start_pos = nil
|
||||
|
||||
-- check start and end coordinates are actually inside our area
|
||||
if start_pos.x < 0 or end_pos.x < 0 or
|
||||
start_pos.x >= self.dimen.w or end_pos.x >= self.dimen.w or
|
||||
start_pos.y < 0 or end_pos.y < 0 or
|
||||
start_pos.y >= self.dimen.h or end_pos.y >= self.dimen.h then
|
||||
return false
|
||||
end
|
||||
|
||||
local hold_duration = TimeVal.now() - self.hold_start_tv
|
||||
hold_duration = hold_duration.sec + (hold_duration.usec/1000000)
|
||||
|
||||
local page = self.document:openPage(self.page_number)
|
||||
local lines = page:getPageText()
|
||||
page:close()
|
||||
|
||||
local words = self:getSelectedText(lines, start_pos, end_pos)
|
||||
local selected_text = table.concat(words, " ")
|
||||
callback(selected_text, hold_duration)
|
||||
|
||||
return true
|
||||
end
|
||||
|
||||
return HtmlBoxWidget
|
||||
150
frontend/ui/widget/scrollhtmlwidget.lua
Normal file
150
frontend/ui/widget/scrollhtmlwidget.lua
Normal file
@@ -0,0 +1,150 @@
|
||||
--[[--
|
||||
HTML widget with vertical scroll bar.
|
||||
--]]
|
||||
|
||||
local Device = require("device")
|
||||
local HtmlBoxWidget = require("ui/widget/htmlboxwidget")
|
||||
local Geom = require("ui/geometry")
|
||||
local GestureRange = require("ui/gesturerange")
|
||||
local HorizontalGroup = require("ui/widget/horizontalgroup")
|
||||
local HorizontalSpan = require("ui/widget/horizontalspan")
|
||||
local InputContainer = require("ui/widget/container/inputcontainer")
|
||||
local UIManager = require("ui/uimanager")
|
||||
local VerticalScrollBar = require("ui/widget/verticalscrollbar")
|
||||
|
||||
local Input = Device.input
|
||||
local Screen = Device.screen
|
||||
|
||||
local ScrollHtmlWidget = InputContainer:new{
|
||||
html_body = nil,
|
||||
css = nil,
|
||||
default_font_size = 18,
|
||||
htmlbox_widget = nil,
|
||||
v_scroll_bar = nil,
|
||||
dialog = nil,
|
||||
dimen = nil,
|
||||
width = 0,
|
||||
height = 0,
|
||||
scroll_bar_width = Screen:scaleBySize(6),
|
||||
text_scroll_span = Screen:scaleBySize(12),
|
||||
}
|
||||
|
||||
function ScrollHtmlWidget:init()
|
||||
self.htmlbox_widget = HtmlBoxWidget:new{
|
||||
dimen = Geom:new{
|
||||
w = self.width - self.scroll_bar_width - self.text_scroll_span,
|
||||
h = self.height,
|
||||
},
|
||||
}
|
||||
|
||||
self.htmlbox_widget:setContent(self.html_body, self.css, self.default_font_size)
|
||||
|
||||
self.v_scroll_bar = VerticalScrollBar:new{
|
||||
enable = self.htmlbox_widget.page_count > 1,
|
||||
width = self.scroll_bar_width,
|
||||
height = self.height,
|
||||
}
|
||||
|
||||
self.v_scroll_bar:set((self.htmlbox_widget.page_number-1) / self.htmlbox_widget.page_count, self.htmlbox_widget.page_number / self.htmlbox_widget.page_count)
|
||||
|
||||
local horizontal_group = HorizontalGroup:new{}
|
||||
table.insert(horizontal_group, self.htmlbox_widget)
|
||||
table.insert(horizontal_group, HorizontalSpan:new{width=self.text_scroll_span})
|
||||
table.insert(horizontal_group, self.v_scroll_bar)
|
||||
self[1] = horizontal_group
|
||||
|
||||
self.dimen = Geom:new(self[1]:getSize())
|
||||
|
||||
if Device:isTouchDevice() then
|
||||
self.ges_events = {
|
||||
SwipeScrollText = {
|
||||
GestureRange:new{
|
||||
ges = "swipe",
|
||||
range = function() return self.dimen end,
|
||||
},
|
||||
},
|
||||
TapScrollText = { -- allow scrolling with tap
|
||||
GestureRange:new{
|
||||
ges = "tap",
|
||||
range = function() return self.dimen end,
|
||||
},
|
||||
},
|
||||
}
|
||||
end
|
||||
|
||||
if Device:hasKeyboard() or Device:hasKeys() then
|
||||
self.key_events = {
|
||||
ScrollDown = {{Input.group.PgFwd}, doc = "scroll down"},
|
||||
ScrollUp = {{Input.group.PgBack}, doc = "scroll up"},
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
function ScrollHtmlWidget:scrollText(direction)
|
||||
if direction == 0 then
|
||||
return
|
||||
end
|
||||
|
||||
if direction > 0 then
|
||||
if self.htmlbox_widget.page_number >= self.htmlbox_widget.page_count then
|
||||
return
|
||||
end
|
||||
|
||||
self.htmlbox_widget.page_number = self.htmlbox_widget.page_number + 1
|
||||
elseif direction < 0 then
|
||||
if self.htmlbox_widget.page_number <= 1 then
|
||||
return
|
||||
end
|
||||
|
||||
self.htmlbox_widget.page_number = self.htmlbox_widget.page_number - 1
|
||||
end
|
||||
|
||||
self.v_scroll_bar:set((self.htmlbox_widget.page_number-1) / self.htmlbox_widget.page_count, self.htmlbox_widget.page_number / self.htmlbox_widget.page_count)
|
||||
|
||||
self.htmlbox_widget:freeBb()
|
||||
self.htmlbox_widget:_render()
|
||||
|
||||
UIManager:setDirty(self.dialog, function()
|
||||
return "partial", self.dimen
|
||||
end)
|
||||
end
|
||||
|
||||
function ScrollHtmlWidget:onScrollText(arg, ges)
|
||||
if ges.direction == "north" then
|
||||
self:scrollText(1)
|
||||
return true
|
||||
elseif ges.direction == "south" then
|
||||
self:scrollText(-1)
|
||||
return true
|
||||
end
|
||||
-- if swipe west/east, let it propagate up (e.g. for quickdictlookup to
|
||||
-- go to next/prev result)
|
||||
end
|
||||
|
||||
function ScrollHtmlWidget:onTapScrollText(arg, ges)
|
||||
if ges.pos.x < Screen:getWidth()/2 then
|
||||
if self.htmlbox_widget.page_number > 1 then
|
||||
self:scrollText(-1)
|
||||
return true
|
||||
end
|
||||
else
|
||||
if self.htmlbox_widget.page_number <= self.htmlbox_widget.page_count then
|
||||
self:scrollText(1)
|
||||
return true
|
||||
end
|
||||
end
|
||||
-- if we couldn't scroll (because we're already at top or bottom),
|
||||
-- let it propagate up (e.g. for quickdictlookup to go to next/prev result)
|
||||
end
|
||||
|
||||
function ScrollHtmlWidget:onScrollDown()
|
||||
self:scrollText(1)
|
||||
return true
|
||||
end
|
||||
|
||||
function ScrollHtmlWidget:onScrollUp()
|
||||
self:scrollText(-1)
|
||||
return true
|
||||
end
|
||||
|
||||
return ScrollHtmlWidget
|
||||
@@ -564,4 +564,18 @@ function util.htmlToPlainTextIfHtml(text)
|
||||
return text
|
||||
end
|
||||
|
||||
--- Encode the HTML entities in a string
|
||||
-- @string text the string to escape
|
||||
-- Taken from https://github.com/kernelsauce/turbo/blob/e4a35c2e3fb63f07464f8f8e17252bea3a029685/turbo/escape.lua#L58-L70
|
||||
function util.htmlEscape(text)
|
||||
return text:gsub("[}{\">/<'&]", {
|
||||
["&"] = "&",
|
||||
["<"] = "<",
|
||||
[">"] = ">",
|
||||
['"'] = """,
|
||||
["'"] = "'",
|
||||
["/"] = "/",
|
||||
})
|
||||
end
|
||||
|
||||
return util
|
||||
|
||||
Reference in New Issue
Block a user