blob: 7bda9d0bbaf0e22f3c4a8028375499225462db74 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
|
package org.jetbrains.dokka.base.translators
import org.intellij.markdown.lexer.Compat.codePointToString
import org.intellij.markdown.lexer.Compat.forEachCodePoint
import org.jetbrains.dokka.model.doc.DocTag
import org.jetbrains.dokka.model.doc.DocTag.Companion.contentTypeParam
import org.jetbrains.dokka.model.doc.Text
import org.jsoup.Jsoup
import org.jsoup.internal.StringUtil
import org.jsoup.nodes.Entities
internal fun String.parseHtmlEncodedWithNormalisedSpaces(
renderWhiteCharactersAsSpaces: Boolean
): List<DocTag> {
val accum = StringBuilder()
val tags = mutableListOf<DocTag>()
var lastWasWhite = false
forEachCodePoint { c ->
if (renderWhiteCharactersAsSpaces && StringUtil.isWhitespace(c)) {
if (!lastWasWhite) {
accum.append(' ')
lastWasWhite = true
}
} else if (codePointToString(c).let { it != Entities.escape(it) }) {
accum.toString().takeIf { it.isNotBlank() }?.let { tags.add(Text(it)) }
accum.delete(0, accum.length)
accum.appendCodePoint(c)
tags.add(Text(accum.toString(), params = contentTypeParam("html")))
accum.delete(0, accum.length)
} else if (!StringUtil.isInvisibleChar(c)) {
accum.appendCodePoint(c)
lastWasWhite = false
}
}
accum.toString().takeIf { it.isNotBlank() }?.let { tags.add(Text(it)) }
return tags
}
/**
* Parses string into [Text] doc tags that can have either value of the string or html-encoded value with content-type=html parameter.
* Content type is added when dealing with html entries like ` `
*/
internal fun String.parseWithNormalisedSpaces(
renderWhiteCharactersAsSpaces: Boolean
): List<DocTag> {
if (!requiresHtmlEncoding()) {
return parseHtmlEncodedWithNormalisedSpaces(renderWhiteCharactersAsSpaces)
}
// parsing it using jsoup is required to get codePoints, otherwise they are interpreted separately, as chars
// But we dont need to do it for java as it is already parsed with jsoup
return Jsoup.parseBodyFragment(this).body().wholeText().parseHtmlEncodedWithNormalisedSpaces(renderWhiteCharactersAsSpaces)
}
private fun String.requiresHtmlEncoding(): Boolean = indexOf('&') != -1
|