1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
|
package utils
import org.jsoup.nodes.Element
import org.jsoup.nodes.Node
import org.jsoup.nodes.TextNode
fun Element.match(vararg matchers: Any, ignoreSpanWithTokenStyle:Boolean = false): Unit =
childNodes()
.let { list ->
if(ignoreSpanWithTokenStyle) {
list
.filterNot { it is Element && it.tagName() == "span" && it.attr("class").startsWith("token ") && it.childNodeSize() == 0}
.map { if(it is Element && it.tagName() == "span"
&& it.attr("class").startsWith("token ")
&& it.childNodeSize() == 1) it.childNode(0) else it }
.uniteConsecutiveTextNodes()
} else list
}
.filter { (it !is TextNode || it.text().isNotBlank())}
.let { it.drop(it.size - matchers.size) }
.zip(matchers)
.forEach { (n, m) -> m.accepts(n, ignoreSpan = ignoreSpanWithTokenStyle) }
open class Tag(val name: String, vararg val matchers: Any, val expectedClasses: List<String> = emptyList())
class Div(vararg matchers: Any) : Tag("div", *matchers)
class P(vararg matchers: Any) : Tag("p", *matchers)
class Span(vararg matchers: Any) : Tag("span", *matchers)
class A(vararg matchers: Any) : Tag("a", *matchers)
class B(vararg matchers: Any) : Tag("b", *matchers)
class I(vararg matchers: Any) : Tag("i", *matchers)
class STRIKE(vararg matchers: Any) : Tag("strike", *matchers)
class Dl(vararg matchers: Any) : Tag("dl", *matchers)
class Dt(vararg matchers: Any) : Tag("dt", *matchers)
class Dd(vararg matchers: Any) : Tag("dd", *matchers)
object Wbr : Tag("wbr")
object Br : Tag("br")
fun Tag.withClasses(vararg classes: String) = Tag(name, *matchers, expectedClasses = classes.toList())
private fun Any.accepts(n: Node, ignoreSpan:Boolean = true) {
when (this) {
is String -> assert(n is TextNode && n.text().trim() == this.trim()) { "\"$this\" expected but found: $n" }
is Tag -> {
check(n is Element) { "Expected node to be Element: $n" }
assert(n.tagName() == name) { "Tag \"$name\" expected but found: \"$n\"" }
expectedClasses.forEach {
assert(n.hasClass(it)) { "Expected to find class \"$it\" for tag \"$name\", found: ${n.classNames()}" }
}
if (matchers.isNotEmpty()) n.match(*matchers, ignoreSpanWithTokenStyle = ignoreSpan)
}
else -> throw IllegalArgumentException("$this is not proper matcher")
}
}
private fun List<Node>.uniteConsecutiveTextNodes(): MutableList<Node> {
val resList = mutableListOf<Node>()
var acc = StringBuilder()
forEachIndexed { index, item ->
if (item is TextNode) {
acc.append(item.text())
if (!(index + 1 < size && this[index + 1] is TextNode)) {
resList.add(TextNode(acc.toString()))
acc = StringBuilder()
}
} else resList.add(item)
}
return resList
}
|