mirror of
https://github.com/gedoor/legado.git
synced 2025-08-10 00:52:30 +00:00
jsoup自带的xpath有些规则无法解析,先恢复
比如org.jsoup.select.Selector$SelectorParseException: Could not evaluate XPath query [//*[@class*="all-book-list" or @id="rank-view-list" or @id="limit-list"]//*[@data-rid]]: javax.xml.transform.TransformerException: A location path was expected, but the following token was encountered: =
This commit is contained in:
@@ -191,6 +191,7 @@ dependencies {
|
||||
//规则相关
|
||||
implementation('org.jsoup:jsoup:1.15.4')
|
||||
implementation('com.jayway.jsonpath:json-path:2.7.0')
|
||||
implementation('cn.wanghaomiao:JsoupXpath:2.5.2')
|
||||
implementation(project(path: ':epublib'))
|
||||
|
||||
//JS rhino
|
||||
|
||||
@@ -7,6 +7,7 @@ import org.jsoup.parser.Parser
|
||||
import org.jsoup.select.Collector
|
||||
import org.jsoup.select.Elements
|
||||
import org.jsoup.select.Evaluator
|
||||
import org.seimicrawler.xpath.JXNode
|
||||
|
||||
/**
|
||||
* Created by GKF on 2018/1/25.
|
||||
@@ -21,6 +22,9 @@ class AnalyzeByJSoup(doc: Any) {
|
||||
if (doc is Element) {
|
||||
return doc
|
||||
}
|
||||
if (doc is JXNode) {
|
||||
return if (doc.isElement) doc.asElement() else Jsoup.parse(doc.toString())
|
||||
}
|
||||
if (doc.toString().startsWith("<?xml", true)) {
|
||||
return Jsoup.parse(doc.toString(), Parser.xmlParser())
|
||||
}
|
||||
|
||||
@@ -3,38 +3,62 @@ package io.legado.app.model.analyzeRule
|
||||
import android.text.TextUtils
|
||||
import androidx.annotation.Keep
|
||||
import org.jsoup.Jsoup
|
||||
import org.jsoup.nodes.Document
|
||||
import org.jsoup.nodes.Element
|
||||
import org.jsoup.nodes.TextNode
|
||||
import org.jsoup.parser.Parser
|
||||
import org.jsoup.select.Elements
|
||||
import org.seimicrawler.xpath.JXDocument
|
||||
import org.seimicrawler.xpath.JXNode
|
||||
|
||||
@Keep
|
||||
class AnalyzeByXPath(doc: Any) {
|
||||
private var jxNode: Any = parse(doc)
|
||||
|
||||
private var element: Element = parse(doc)
|
||||
|
||||
private fun parse(doc: Any): Element {
|
||||
if (doc is Element) {
|
||||
return doc
|
||||
private fun parse(doc: Any): Any {
|
||||
return when (doc) {
|
||||
is JXNode -> if (doc.isElement) doc else strToJXDocument(doc.toString())
|
||||
is Document -> JXDocument.create(doc)
|
||||
is Element -> JXDocument.create(Elements(doc))
|
||||
is Elements -> JXDocument.create(doc)
|
||||
else -> strToJXDocument(doc.toString())
|
||||
}
|
||||
if (doc.toString().startsWith("<?xml", true)) {
|
||||
return Jsoup.parse(doc.toString(), Parser.xmlParser())
|
||||
}
|
||||
return Jsoup.parse(doc.toString())
|
||||
}
|
||||
|
||||
internal fun getElements(xPath: String): Elements? {
|
||||
private fun strToJXDocument(html: String): JXDocument {
|
||||
var html1 = html
|
||||
if (html1.endsWith("</td>")) {
|
||||
html1 = "<tr>${html1}</tr>"
|
||||
}
|
||||
if (html1.endsWith("</tr>") || html1.endsWith("</tbody>")) {
|
||||
html1 = "<table>${html1}</table>"
|
||||
}
|
||||
if (html1.trim().startsWith("<?xml", true)) {
|
||||
return JXDocument.create(Jsoup.parse(html1, Parser.xmlParser()))
|
||||
}
|
||||
return JXDocument.create(html1)
|
||||
}
|
||||
|
||||
private fun getResult(xPath: String): List<JXNode>? {
|
||||
val node = jxNode
|
||||
return if (node is JXNode) {
|
||||
node.sel(xPath)
|
||||
} else {
|
||||
(node as JXDocument).selN(xPath)
|
||||
}
|
||||
}
|
||||
|
||||
internal fun getElements(xPath: String): List<JXNode>? {
|
||||
|
||||
if (xPath.isEmpty()) return null
|
||||
|
||||
val jxNodes = Elements()
|
||||
val jxNodes = ArrayList<JXNode>()
|
||||
val ruleAnalyzes = RuleAnalyzer(xPath)
|
||||
val rules = ruleAnalyzes.splitRule("&&", "||", "%%")
|
||||
|
||||
if (rules.size == 1) {
|
||||
return element.selectXpath(rules[0])
|
||||
return getResult(rules[0])
|
||||
} else {
|
||||
val results = ArrayList<Elements>()
|
||||
val results = ArrayList<List<JXNode>>()
|
||||
for (rl in rules) {
|
||||
val temp = getElements(rl)
|
||||
if (temp != null && temp.isNotEmpty()) {
|
||||
@@ -70,8 +94,8 @@ class AnalyzeByXPath(doc: Any) {
|
||||
val rules = ruleAnalyzes.splitRule("&&", "||", "%%")
|
||||
|
||||
if (rules.size == 1) {
|
||||
element.selectXpath(xPath).forEach {
|
||||
result.add(it.toString())
|
||||
getResult(xPath)?.map {
|
||||
result.add(it.asString())
|
||||
}
|
||||
return result
|
||||
} else {
|
||||
@@ -108,22 +132,10 @@ class AnalyzeByXPath(doc: Any) {
|
||||
val ruleAnalyzes = RuleAnalyzer(rule)
|
||||
val rules = ruleAnalyzes.splitRule("&&", "||")
|
||||
if (rules.size == 1) {
|
||||
val xpath = when {
|
||||
rule.startsWith("///") -> ".${rule.substring(1)}"
|
||||
rule.startsWith("/") -> ".$rule"
|
||||
else -> rule
|
||||
}
|
||||
val x = xpath.substringAfterLast("/")
|
||||
return if (x.startsWith("@")) {
|
||||
element.selectXpath(xpath.substringBeforeLast("/"))
|
||||
.eachAttr(x.substring(1)).let {
|
||||
TextUtils.join("\n", it)
|
||||
}
|
||||
} else {
|
||||
element.selectXpath(xpath, TextNode::class.java).let {
|
||||
TextUtils.join("\n", it)
|
||||
}
|
||||
getResult(rule)?.let {
|
||||
return TextUtils.join("\n", it)
|
||||
}
|
||||
return null
|
||||
} else {
|
||||
val textList = arrayListOf<String>()
|
||||
for (rl in rules) {
|
||||
|
||||
@@ -458,8 +458,7 @@ class AnalyzeRule(
|
||||
mode = Mode.Json
|
||||
ruleStr
|
||||
}
|
||||
ruleStr.startsWith("/") || ruleStr.startsWith("./") -> {
|
||||
//XPath特征很明显,无需配置单独的识别标头
|
||||
ruleStr.startsWith("/") -> {//XPath特征很明显,无需配置单独的识别标头
|
||||
mode = Mode.XPath
|
||||
ruleStr
|
||||
}
|
||||
@@ -604,7 +603,6 @@ class AnalyzeRule(
|
||||
|| ruleStr.startsWith("$.")
|
||||
|| ruleStr.startsWith("$[")
|
||||
|| ruleStr.startsWith("//")
|
||||
|| ruleStr.startsWith("./")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user