diff --git a/app/build.gradle b/app/build.gradle index 9fffd1501..b380d8bff 100644 --- a/app/build.gradle +++ b/app/build.gradle @@ -191,6 +191,7 @@ dependencies { //规则相关 implementation('org.jsoup:jsoup:1.15.4') implementation('com.jayway.jsonpath:json-path:2.7.0') + implementation('cn.wanghaomiao:JsoupXpath:2.5.2') implementation(project(path: ':epublib')) //JS rhino diff --git a/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeByJSoup.kt b/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeByJSoup.kt index 4e58e1536..28bddac73 100644 --- a/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeByJSoup.kt +++ b/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeByJSoup.kt @@ -7,6 +7,7 @@ import org.jsoup.parser.Parser import org.jsoup.select.Collector import org.jsoup.select.Elements import org.jsoup.select.Evaluator +import org.seimicrawler.xpath.JXNode /** * Created by GKF on 2018/1/25. @@ -21,6 +22,9 @@ class AnalyzeByJSoup(doc: Any) { if (doc is Element) { return doc } + if (doc is JXNode) { + return if (doc.isElement) doc.asElement() else Jsoup.parse(doc.toString()) + } if (doc.toString().startsWith(" if (doc.isElement) doc else strToJXDocument(doc.toString()) + is Document -> JXDocument.create(doc) + is Element -> JXDocument.create(Elements(doc)) + is Elements -> JXDocument.create(doc) + else -> strToJXDocument(doc.toString()) } - if (doc.toString().startsWith("")) { + html1 = "${html1}" + } + if (html1.endsWith("") || html1.endsWith("")) { + html1 = "${html1}
" + } + if (html1.trim().startsWith("? { + val node = jxNode + return if (node is JXNode) { + node.sel(xPath) + } else { + (node as JXDocument).selN(xPath) + } + } + + internal fun getElements(xPath: String): List? { if (xPath.isEmpty()) return null - val jxNodes = Elements() + val jxNodes = ArrayList() val ruleAnalyzes = RuleAnalyzer(xPath) val rules = ruleAnalyzes.splitRule("&&", "||", "%%") if (rules.size == 1) { - return element.selectXpath(rules[0]) + return getResult(rules[0]) } else { - val results = ArrayList() + val results = ArrayList>() for (rl in rules) { val temp = getElements(rl) if (temp != null && temp.isNotEmpty()) { @@ -70,8 +94,8 @@ class AnalyzeByXPath(doc: Any) { val rules = ruleAnalyzes.splitRule("&&", "||", "%%") if (rules.size == 1) { - element.selectXpath(xPath).forEach { - result.add(it.toString()) + getResult(xPath)?.map { + result.add(it.asString()) } return result } else { @@ -108,22 +132,10 @@ class AnalyzeByXPath(doc: Any) { val ruleAnalyzes = RuleAnalyzer(rule) val rules = ruleAnalyzes.splitRule("&&", "||") if (rules.size == 1) { - val xpath = when { - rule.startsWith("///") -> ".${rule.substring(1)}" - rule.startsWith("/") -> ".$rule" - else -> rule - } - val x = xpath.substringAfterLast("/") - return if (x.startsWith("@")) { - element.selectXpath(xpath.substringBeforeLast("/")) - .eachAttr(x.substring(1)).let { - TextUtils.join("\n", it) - } - } else { - element.selectXpath(xpath, TextNode::class.java).let { - TextUtils.join("\n", it) - } + getResult(rule)?.let { + return TextUtils.join("\n", it) } + return null } else { val textList = arrayListOf() for (rl in rules) { diff --git a/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeRule.kt b/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeRule.kt index 873ab621c..7096c1b01 100644 --- a/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeRule.kt +++ b/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeRule.kt @@ -458,8 +458,7 @@ class AnalyzeRule( mode = Mode.Json ruleStr } - ruleStr.startsWith("/") || ruleStr.startsWith("./") -> { - //XPath特征很明显,无需配置单独的识别标头 + ruleStr.startsWith("/") -> {//XPath特征很明显,无需配置单独的识别标头 mode = Mode.XPath ruleStr } @@ -604,7 +603,6 @@ class AnalyzeRule( || ruleStr.startsWith("$.") || ruleStr.startsWith("$[") || ruleStr.startsWith("//") - || ruleStr.startsWith("./") } }