From d8a165c8a5a8ad0addddec9eb2ab76b716d075b8 Mon Sep 17 00:00:00 2001 From: kunfei Date: Wed, 22 Feb 2023 17:52:45 +0800 Subject: [PATCH] =?UTF-8?q?jsoup=E8=87=AA=E5=B8=A6=E7=9A=84xpath=E6=9C=89?= =?UTF-8?q?=E4=BA=9B=E8=A7=84=E5=88=99=E6=97=A0=E6=B3=95=E8=A7=A3=E6=9E=90?= =?UTF-8?q?,=E5=85=88=E6=81=A2=E5=A4=8D=20=E6=AF=94=E5=A6=82org.jsoup.sele?= =?UTF-8?q?ct.Selector$SelectorParseException:=20Could=20not=20evaluate=20?= =?UTF-8?q?XPath=20query=20[//*[@class*=3D"all-book-list"=20or=20@id=3D"ra?= =?UTF-8?q?nk-view-list"=20or=20@id=3D"limit-list"]//*[@data-rid]]:=20=20?= =?UTF-8?q?=20javax.xml.transform.TransformerException:=20A=20location=20p?= =?UTF-8?q?ath=20was=20expected,=20but=20the=20following=20token=20was=20e?= =?UTF-8?q?ncountered:=20=20=3D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/build.gradle | 1 + .../app/model/analyzeRule/AnalyzeByJSoup.kt | 4 + .../app/model/analyzeRule/AnalyzeByXPath.kt | 74 +++++++++++-------- .../app/model/analyzeRule/AnalyzeRule.kt | 4 +- 4 files changed, 49 insertions(+), 34 deletions(-) diff --git a/app/build.gradle b/app/build.gradle index 9fffd1501..b380d8bff 100644 --- a/app/build.gradle +++ b/app/build.gradle @@ -191,6 +191,7 @@ dependencies { //规则相关 implementation('org.jsoup:jsoup:1.15.4') implementation('com.jayway.jsonpath:json-path:2.7.0') + implementation('cn.wanghaomiao:JsoupXpath:2.5.2') implementation(project(path: ':epublib')) //JS rhino diff --git a/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeByJSoup.kt b/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeByJSoup.kt index 4e58e1536..28bddac73 100644 --- a/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeByJSoup.kt +++ b/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeByJSoup.kt @@ -7,6 +7,7 @@ import org.jsoup.parser.Parser import org.jsoup.select.Collector import org.jsoup.select.Elements import org.jsoup.select.Evaluator +import org.seimicrawler.xpath.JXNode /** * Created by GKF on 2018/1/25. @@ -21,6 +22,9 @@ class AnalyzeByJSoup(doc: Any) { if (doc is Element) { return doc } + if (doc is JXNode) { + return if (doc.isElement) doc.asElement() else Jsoup.parse(doc.toString()) + } if (doc.toString().startsWith(" if (doc.isElement) doc else strToJXDocument(doc.toString()) + is Document -> JXDocument.create(doc) + is Element -> JXDocument.create(Elements(doc)) + is Elements -> JXDocument.create(doc) + else -> strToJXDocument(doc.toString()) } - if (doc.toString().startsWith("")) { + html1 = "${html1}" + } + if (html1.endsWith("") || html1.endsWith("")) { + html1 = "${html1}
" + } + if (html1.trim().startsWith("? { + val node = jxNode + return if (node is JXNode) { + node.sel(xPath) + } else { + (node as JXDocument).selN(xPath) + } + } + + internal fun getElements(xPath: String): List? { if (xPath.isEmpty()) return null - val jxNodes = Elements() + val jxNodes = ArrayList() val ruleAnalyzes = RuleAnalyzer(xPath) val rules = ruleAnalyzes.splitRule("&&", "||", "%%") if (rules.size == 1) { - return element.selectXpath(rules[0]) + return getResult(rules[0]) } else { - val results = ArrayList() + val results = ArrayList>() for (rl in rules) { val temp = getElements(rl) if (temp != null && temp.isNotEmpty()) { @@ -70,8 +94,8 @@ class AnalyzeByXPath(doc: Any) { val rules = ruleAnalyzes.splitRule("&&", "||", "%%") if (rules.size == 1) { - element.selectXpath(xPath).forEach { - result.add(it.toString()) + getResult(xPath)?.map { + result.add(it.asString()) } return result } else { @@ -108,22 +132,10 @@ class AnalyzeByXPath(doc: Any) { val ruleAnalyzes = RuleAnalyzer(rule) val rules = ruleAnalyzes.splitRule("&&", "||") if (rules.size == 1) { - val xpath = when { - rule.startsWith("///") -> ".${rule.substring(1)}" - rule.startsWith("/") -> ".$rule" - else -> rule - } - val x = xpath.substringAfterLast("/") - return if (x.startsWith("@")) { - element.selectXpath(xpath.substringBeforeLast("/")) - .eachAttr(x.substring(1)).let { - TextUtils.join("\n", it) - } - } else { - element.selectXpath(xpath, TextNode::class.java).let { - TextUtils.join("\n", it) - } + getResult(rule)?.let { + return TextUtils.join("\n", it) } + return null } else { val textList = arrayListOf() for (rl in rules) { diff --git a/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeRule.kt b/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeRule.kt index 873ab621c..7096c1b01 100644 --- a/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeRule.kt +++ b/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeRule.kt @@ -458,8 +458,7 @@ class AnalyzeRule( mode = Mode.Json ruleStr } - ruleStr.startsWith("/") || ruleStr.startsWith("./") -> { - //XPath特征很明显,无需配置单独的识别标头 + ruleStr.startsWith("/") -> {//XPath特征很明显,无需配置单独的识别标头 mode = Mode.XPath ruleStr } @@ -604,7 +603,6 @@ class AnalyzeRule( || ruleStr.startsWith("$.") || ruleStr.startsWith("$[") || ruleStr.startsWith("//") - || ruleStr.startsWith("./") } }