From d8a165c8a5a8ad0addddec9eb2ab76b716d075b8 Mon Sep 17 00:00:00 2001
From: kunfei <gekunfei@live.com>
Date: Wed, 22 Feb 2023 17:52:45 +0800
Subject: [PATCH] =?UTF-8?q?jsoup=E8=87=AA=E5=B8=A6=E7=9A=84xpath=E6=9C=89?=
 =?UTF-8?q?=E4=BA=9B=E8=A7=84=E5=88=99=E6=97=A0=E6=B3=95=E8=A7=A3=E6=9E=90?=
 =?UTF-8?q?,=E5=85=88=E6=81=A2=E5=A4=8D=20=E6=AF=94=E5=A6=82org.jsoup.sele?=
 =?UTF-8?q?ct.Selector$SelectorParseException:=20Could=20not=20evaluate=20?=
 =?UTF-8?q?XPath=20query=20[//*[@class*=3D"all-book-list"=20or=20@id=3D"ra?=
 =?UTF-8?q?nk-view-list"=20or=20@id=3D"limit-list"]//*[@data-rid]]:=20=20?=
 =?UTF-8?q?=20javax.xml.transform.TransformerException:=20A=20location=20p?=
 =?UTF-8?q?ath=20was=20expected,=20but=20the=20following=20token=20was=20e?=
 =?UTF-8?q?ncountered:=20=20=3D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/build.gradle                              |  1 +
 .../app/model/analyzeRule/AnalyzeByJSoup.kt   |  4 +
 .../app/model/analyzeRule/AnalyzeByXPath.kt   | 74 +++++++++++--------
 .../app/model/analyzeRule/AnalyzeRule.kt      |  4 +-
 4 files changed, 49 insertions(+), 34 deletions(-)

diff --git a/app/build.gradle b/app/build.gradle
index 9fffd1501..b380d8bff 100644
--- a/app/build.gradle
+++ b/app/build.gradle
@@ -191,6 +191,7 @@ dependencies {
     //规则相关
     implementation('org.jsoup:jsoup:1.15.4')
     implementation('com.jayway.jsonpath:json-path:2.7.0')
+    implementation('cn.wanghaomiao:JsoupXpath:2.5.2')
     implementation(project(path: ':epublib'))
 
     //JS rhino
diff --git a/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeByJSoup.kt b/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeByJSoup.kt
index 4e58e1536..28bddac73 100644
--- a/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeByJSoup.kt
+++ b/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeByJSoup.kt
@@ -7,6 +7,7 @@ import org.jsoup.parser.Parser
 import org.jsoup.select.Collector
 import org.jsoup.select.Elements
 import org.jsoup.select.Evaluator
+import org.seimicrawler.xpath.JXNode
 
 /**
  * Created by GKF on 2018/1/25.
@@ -21,6 +22,9 @@ class AnalyzeByJSoup(doc: Any) {
         if (doc is Element) {
             return doc
         }
+        if (doc is JXNode) {
+            return if (doc.isElement) doc.asElement() else Jsoup.parse(doc.toString())
+        }
         if (doc.toString().startsWith("<?xml", true)) {
             return Jsoup.parse(doc.toString(), Parser.xmlParser())
         }
diff --git a/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeByXPath.kt b/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeByXPath.kt
index dbc50b486..5b9937324 100644
--- a/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeByXPath.kt
+++ b/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeByXPath.kt
@@ -3,38 +3,62 @@ package io.legado.app.model.analyzeRule
 import android.text.TextUtils
 import androidx.annotation.Keep
 import org.jsoup.Jsoup
+import org.jsoup.nodes.Document
 import org.jsoup.nodes.Element
-import org.jsoup.nodes.TextNode
 import org.jsoup.parser.Parser
 import org.jsoup.select.Elements
+import org.seimicrawler.xpath.JXDocument
+import org.seimicrawler.xpath.JXNode
 
 @Keep
 class AnalyzeByXPath(doc: Any) {
+    private var jxNode: Any = parse(doc)
 
-    private var element: Element = parse(doc)
-
-    private fun parse(doc: Any): Element {
-        if (doc is Element) {
-            return doc
+    private fun parse(doc: Any): Any {
+        return when (doc) {
+            is JXNode -> if (doc.isElement) doc else strToJXDocument(doc.toString())
+            is Document -> JXDocument.create(doc)
+            is Element -> JXDocument.create(Elements(doc))
+            is Elements -> JXDocument.create(doc)
+            else -> strToJXDocument(doc.toString())
         }
-        if (doc.toString().startsWith("<?xml", true)) {
-            return Jsoup.parse(doc.toString(), Parser.xmlParser())
-        }
-        return Jsoup.parse(doc.toString())
     }
 
-    internal fun getElements(xPath: String): Elements? {
+    private fun strToJXDocument(html: String): JXDocument {
+        var html1 = html
+        if (html1.endsWith("</td>")) {
+            html1 = "<tr>${html1}</tr>"
+        }
+        if (html1.endsWith("</tr>") || html1.endsWith("</tbody>")) {
+            html1 = "<table>${html1}</table>"
+        }
+        if (html1.trim().startsWith("<?xml", true)) {
+            return JXDocument.create(Jsoup.parse(html1, Parser.xmlParser()))
+        }
+        return JXDocument.create(html1)
+    }
+
+    private fun getResult(xPath: String): List<JXNode>? {
+        val node = jxNode
+        return if (node is JXNode) {
+            node.sel(xPath)
+        } else {
+            (node as JXDocument).selN(xPath)
+        }
+    }
+
+    internal fun getElements(xPath: String): List<JXNode>? {
 
         if (xPath.isEmpty()) return null
 
-        val jxNodes = Elements()
+        val jxNodes = ArrayList<JXNode>()
         val ruleAnalyzes = RuleAnalyzer(xPath)
         val rules = ruleAnalyzes.splitRule("&&", "||", "%%")
 
         if (rules.size == 1) {
-            return element.selectXpath(rules[0])
+            return getResult(rules[0])
         } else {
-            val results = ArrayList<Elements>()
+            val results = ArrayList<List<JXNode>>()
             for (rl in rules) {
                 val temp = getElements(rl)
                 if (temp != null && temp.isNotEmpty()) {
@@ -70,8 +94,8 @@ class AnalyzeByXPath(doc: Any) {
         val rules = ruleAnalyzes.splitRule("&&", "||", "%%")
 
         if (rules.size == 1) {
-            element.selectXpath(xPath).forEach {
-                result.add(it.toString())
+            getResult(xPath)?.map {
+                result.add(it.asString())
             }
             return result
         } else {
@@ -108,22 +132,10 @@ class AnalyzeByXPath(doc: Any) {
         val ruleAnalyzes = RuleAnalyzer(rule)
         val rules = ruleAnalyzes.splitRule("&&", "||")
         if (rules.size == 1) {
-            val xpath = when {
-                rule.startsWith("///") -> ".${rule.substring(1)}"
-                rule.startsWith("/") -> ".$rule"
-                else -> rule
-            }
-            val x = xpath.substringAfterLast("/")
-            return if (x.startsWith("@")) {
-                element.selectXpath(xpath.substringBeforeLast("/"))
-                    .eachAttr(x.substring(1)).let {
-                        TextUtils.join("\n", it)
-                    }
-            } else {
-                element.selectXpath(xpath, TextNode::class.java).let {
-                    TextUtils.join("\n", it)
-                }
+            getResult(rule)?.let {
+                return TextUtils.join("\n", it)
             }
+            return null
         } else {
             val textList = arrayListOf<String>()
             for (rl in rules) {
diff --git a/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeRule.kt b/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeRule.kt
index 873ab621c..7096c1b01 100644
--- a/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeRule.kt
+++ b/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeRule.kt
@@ -458,8 +458,7 @@ class AnalyzeRule(
                     mode = Mode.Json
                     ruleStr
                 }
-                ruleStr.startsWith("/") || ruleStr.startsWith("./") -> {
-                    //XPath特征很明显,无需配置单独的识别标头
+                ruleStr.startsWith("/") -> {//XPath特征很明显,无需配置单独的识别标头
                     mode = Mode.XPath
                     ruleStr
                 }
@@ -604,7 +603,6 @@ class AnalyzeRule(
                     || ruleStr.startsWith("$.")
                     || ruleStr.startsWith("$[")
                     || ruleStr.startsWith("//")
-                    || ruleStr.startsWith("./")
         }
     }