diff --git a/app/build.gradle b/app/build.gradle index b380d8bff..9fffd1501 100644 --- a/app/build.gradle +++ b/app/build.gradle @@ -191,7 +191,6 @@ dependencies { //规则相关 implementation('org.jsoup:jsoup:1.15.4') implementation('com.jayway.jsonpath:json-path:2.7.0') - implementation('cn.wanghaomiao:JsoupXpath:2.5.2') implementation(project(path: ':epublib')) //JS rhino diff --git a/app/src/main/assets/defaultData/dictRules.json b/app/src/main/assets/defaultData/dictRules.json index 3495469ba..b4ac3c6ff 100644 --- a/app/src/main/assets/defaultData/dictRules.json +++ b/app/src/main/assets/defaultData/dictRules.json @@ -2,7 +2,7 @@ { "name": "百度汉语", "urlRule": "https://dict.baidu.com/s?wd={{key}}", - "showRule": "@js:var jsoup = org.jsoup.Jsoup.parse(result)\njsoup.select(\"script\").remove()\njsoup.select(\"#word-header\").remove()\njsoup.select(\"#term-header\").remove()\njsoup.select(\".more-button\").remove()\njsoup.select(\".disactive\").remove()\njsoup.select(\"#download-wrapper\").remove()\njsoup.select(\"#right-panel\").remove()\njsoup.select(\"#content-panel\").html()", + "showRule": "@js:var jsoup = org.jsoup.Jsoup.parse(result)\njsoup.select(\"script\").remove()\njsoup.select(\"#word-header\").remove()\njsoup.select(\"#term-header\").remove()\njsoup.select(\".more-button\").remove()\njsoup.select(\".disactive\").remove()\njsoup.select(\"#download-wrapper\").remove()\njsoup.select(\"#upload-dialog\").remove()\njsoup.select(\"#right-panel\").remove()\njsoup.select(\"#content-panel\").html()", "enabled": true, "sortNumber": 0 }, diff --git a/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeByJSoup.kt b/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeByJSoup.kt index 45ea15c8f..92e24a26b 100644 --- a/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeByJSoup.kt +++ b/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeByJSoup.kt @@ -6,7 +6,6 @@ import org.jsoup.nodes.Element import org.jsoup.select.Collector import org.jsoup.select.Elements import org.jsoup.select.Evaluator -import org.seimicrawler.xpath.JXNode /** * Created by GKF on 2018/1/25. @@ -14,20 +13,16 @@ import org.seimicrawler.xpath.JXNode */ @Keep class AnalyzeByJSoup(doc: Any) { - companion object { - - fun parse(doc: Any): Element { - return when (doc) { - is Element -> doc - is JXNode -> if (doc.isElement) doc.asElement() else Jsoup.parse(doc.toString()) - else -> Jsoup.parse(doc.toString()) - } - } - - } private var element: Element = parse(doc) + private fun parse(doc: Any): Element { + return when (doc) { + is Element -> doc + else -> Jsoup.parse(doc.toString()) + } + } + /** * 获取列表 */ @@ -470,7 +465,6 @@ class AnalyzeByJSoup(doc: Any) { l = "" //清空 curMinus = false //重置 } - } split = ' ' diff --git a/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeByXPath.kt b/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeByXPath.kt index 5b9937324..1cb01826c 100644 --- a/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeByXPath.kt +++ b/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeByXPath.kt @@ -3,62 +3,34 @@ package io.legado.app.model.analyzeRule import android.text.TextUtils import androidx.annotation.Keep import org.jsoup.Jsoup -import org.jsoup.nodes.Document import org.jsoup.nodes.Element -import org.jsoup.parser.Parser +import org.jsoup.nodes.TextNode import org.jsoup.select.Elements -import org.seimicrawler.xpath.JXDocument -import org.seimicrawler.xpath.JXNode @Keep class AnalyzeByXPath(doc: Any) { - private var jxNode: Any = parse(doc) - private fun parse(doc: Any): Any { + private var element: Element = parse(doc) + + private fun parse(doc: Any): Element { return when (doc) { - is JXNode -> if (doc.isElement) doc else strToJXDocument(doc.toString()) - is Document -> JXDocument.create(doc) - is Element -> JXDocument.create(Elements(doc)) - is Elements -> JXDocument.create(doc) - else -> strToJXDocument(doc.toString()) + is Element -> doc + else -> Jsoup.parse(doc.toString()) } } - private fun strToJXDocument(html: String): JXDocument { - var html1 = html - if (html1.endsWith("")) { - html1 = "${html1}" - } - if (html1.endsWith("") || html1.endsWith("")) { - html1 = "${html1}
" - } - if (html1.trim().startsWith("? { - val node = jxNode - return if (node is JXNode) { - node.sel(xPath) - } else { - (node as JXDocument).selN(xPath) - } - } - - internal fun getElements(xPath: String): List? { + internal fun getElements(xPath: String): Elements? { if (xPath.isEmpty()) return null - val jxNodes = ArrayList() + val jxNodes = Elements() val ruleAnalyzes = RuleAnalyzer(xPath) val rules = ruleAnalyzes.splitRule("&&", "||", "%%") if (rules.size == 1) { - return getResult(rules[0]) + return element.selectXpath(rules[0]) } else { - val results = ArrayList>() + val results = ArrayList() for (rl in rules) { val temp = getElements(rl) if (temp != null && temp.isNotEmpty()) { @@ -94,8 +66,8 @@ class AnalyzeByXPath(doc: Any) { val rules = ruleAnalyzes.splitRule("&&", "||", "%%") if (rules.size == 1) { - getResult(xPath)?.map { - result.add(it.asString()) + element.selectXpath(xPath).forEach { + result.add(it.toString()) } return result } else { @@ -132,10 +104,22 @@ class AnalyzeByXPath(doc: Any) { val ruleAnalyzes = RuleAnalyzer(rule) val rules = ruleAnalyzes.splitRule("&&", "||") if (rules.size == 1) { - getResult(rule)?.let { - return TextUtils.join("\n", it) + val xpath = when { + rule.startsWith("///") -> ".${rule.substring(1)}" + rule.startsWith("/") -> ".$rule" + else -> rule + } + val x = xpath.substringAfterLast("/") + return if (x.startsWith("@")) { + element.selectXpath(xpath.substringBeforeLast("/")) + .eachAttr(x.substring(1)).let { + TextUtils.join("\n", it) + } + } else { + element.selectXpath(xpath, TextNode::class.java).let { + TextUtils.join("\n", it) + } } - return null } else { val textList = arrayListOf() for (rl in rules) { diff --git a/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeRule.kt b/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeRule.kt index 7096c1b01..873ab621c 100644 --- a/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeRule.kt +++ b/app/src/main/java/io/legado/app/model/analyzeRule/AnalyzeRule.kt @@ -458,7 +458,8 @@ class AnalyzeRule( mode = Mode.Json ruleStr } - ruleStr.startsWith("/") -> {//XPath特征很明显,无需配置单独的识别标头 + ruleStr.startsWith("/") || ruleStr.startsWith("./") -> { + //XPath特征很明显,无需配置单独的识别标头 mode = Mode.XPath ruleStr } @@ -603,6 +604,7 @@ class AnalyzeRule( || ruleStr.startsWith("$.") || ruleStr.startsWith("$[") || ruleStr.startsWith("//") + || ruleStr.startsWith("./") } }