diff options
author | Marcin Aman <marcin.aman@gmail.com> | 2021-07-12 09:58:38 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-07-12 09:58:38 +0200 |
commit | cc6b2991df60f43607c8271d9657be89b3463a69 (patch) | |
tree | c705aa2808224a6b80de270ac65d61936e93f253 /plugins/base/src/main/kotlin/translators | |
parent | 4548d1d929950c794b81cdad648bd2e0fd13a4e1 (diff) | |
download | dokka-cc6b2991df60f43607c8271d9657be89b3463a69.tar.gz dokka-cc6b2991df60f43607c8271d9657be89b3463a69.tar.bz2 dokka-cc6b2991df60f43607c8271d9657be89b3463a69.zip |
Handle NBSP and other html entries (#2005)
Diffstat (limited to 'plugins/base/src/main/kotlin/translators')
-rw-r--r-- | plugins/base/src/main/kotlin/translators/parseWithNormalisedSpaces.kt | 50 | ||||
-rw-r--r-- | plugins/base/src/main/kotlin/translators/psi/parsers/JavadocParser.kt | 13 |
2 files changed, 60 insertions, 3 deletions
diff --git a/plugins/base/src/main/kotlin/translators/parseWithNormalisedSpaces.kt b/plugins/base/src/main/kotlin/translators/parseWithNormalisedSpaces.kt new file mode 100644 index 00000000..4bb60f1a --- /dev/null +++ b/plugins/base/src/main/kotlin/translators/parseWithNormalisedSpaces.kt @@ -0,0 +1,50 @@ +package org.jetbrains.dokka.base.translators + +import org.intellij.markdown.lexer.Compat.codePointToString +import org.intellij.markdown.lexer.Compat.forEachCodePoint +import org.jetbrains.dokka.model.doc.DocTag +import org.jetbrains.dokka.model.doc.DocTag.Companion.contentTypeParam +import org.jetbrains.dokka.model.doc.Text +import org.jsoup.Jsoup +import org.jsoup.internal.StringUtil +import org.jsoup.nodes.Entities + +internal fun String.parseHtmlEncodedWithNormalisedSpaces( + renderWhiteCharactersAsSpaces: Boolean +): List<DocTag> { + val accum = StringBuilder() + val tags = mutableListOf<DocTag>() + var lastWasWhite = false + + forEachCodePoint { c -> + if (renderWhiteCharactersAsSpaces && StringUtil.isWhitespace(c)) { + if (!lastWasWhite) { + accum.append(' ') + lastWasWhite = true + } + } else if (codePointToString(c).let { it != Entities.escape(it) }) { + accum.toString().takeIf { it.isNotBlank() }?.let { tags.add(Text(it)) } + accum.delete(0, accum.length) + + accum.appendCodePoint(c) + tags.add(Text(accum.toString(), params = contentTypeParam("html"))) + accum.delete(0, accum.length) + } else if (!StringUtil.isInvisibleChar(c)) { + accum.appendCodePoint(c) + lastWasWhite = false + } + } + accum.toString().takeIf { it.isNotBlank() }?.let { tags.add(Text(it)) } + return tags +} + +/** + * Parses string into [Text] doc tags that can have either value of the string or html-encoded value with content-type=html parameter. + * Content type is added when dealing with html entries like ` ` + */ +internal fun String.parseWithNormalisedSpaces( + renderWhiteCharactersAsSpaces: Boolean +): List<DocTag> = + //parsing it using jsoup is required to get codePoints, otherwise they are interpreted separately, as chars + //But we dont need to do it for java as it is already parsed with jsoup + Jsoup.parseBodyFragment(this).body().wholeText().parseHtmlEncodedWithNormalisedSpaces(renderWhiteCharactersAsSpaces)
\ No newline at end of file diff --git a/plugins/base/src/main/kotlin/translators/psi/parsers/JavadocParser.kt b/plugins/base/src/main/kotlin/translators/psi/parsers/JavadocParser.kt index 53424ef9..ce022dd7 100644 --- a/plugins/base/src/main/kotlin/translators/psi/parsers/JavadocParser.kt +++ b/plugins/base/src/main/kotlin/translators/psi/parsers/JavadocParser.kt @@ -8,12 +8,14 @@ import com.intellij.psi.impl.source.tree.LazyParseablePsiElement import com.intellij.psi.impl.source.tree.LeafPsiElement import com.intellij.psi.javadoc.* import org.intellij.markdown.MarkdownElementTypes +import org.intellij.markdown.lexer.Compat.forEachCodePoint import org.jetbrains.dokka.analysis.DokkaResolutionFacade import org.jetbrains.dokka.analysis.from import org.jetbrains.dokka.base.parsers.MarkdownParser +import org.jetbrains.dokka.base.translators.parseHtmlEncodedWithNormalisedSpaces +import org.jetbrains.dokka.base.translators.parseWithNormalisedSpaces import org.jetbrains.dokka.links.DRI import org.jetbrains.dokka.model.doc.* -import org.jetbrains.dokka.model.doc.Deprecated import org.jetbrains.dokka.utilities.DokkaLogger import org.jetbrains.dokka.utilities.enumValueOrNull import org.jetbrains.kotlin.idea.kdoc.resolveKDocLink @@ -22,7 +24,9 @@ import org.jetbrains.kotlin.idea.util.CommentSaver.Companion.tokenType import org.jetbrains.kotlin.psi.psiUtil.getNextSiblingIgnoringWhitespace import org.jetbrains.kotlin.psi.psiUtil.siblings import org.jsoup.Jsoup +import org.jsoup.internal.StringUtil import org.jsoup.nodes.Element +import org.jsoup.nodes.Entities import org.jsoup.nodes.Node import org.jsoup.nodes.TextNode import java.util.* @@ -402,8 +406,11 @@ class JavadocParser( } private fun convertHtmlNode(node: Node, insidePre: Boolean = false): List<DocTag> = when (node) { - is TextNode -> (if (insidePre) node.wholeText else node.text() - .takeIf { it.isNotBlank() })?.let { listOf(Text(body = it)) }.orEmpty() + is TextNode -> (if (insidePre) { + node.wholeText.takeIf { it.isNotBlank() }?.let { listOf(Text(body = it)) } + } else { + node.wholeText.parseHtmlEncodedWithNormalisedSpaces(renderWhiteCharactersAsSpaces = true) + }).orEmpty() is Element -> createBlock(node) else -> emptyList() } |