From cc6b2991df60f43607c8271d9657be89b3463a69 Mon Sep 17 00:00:00 2001 From: Marcin Aman Date: Mon, 12 Jul 2021 09:58:38 +0200 Subject: Handle NBSP and other html entries (#2005) --- plugins/base/api/base.api | 4 +- .../base/src/main/kotlin/parsers/MarkdownParser.kt | 91 ++++++++--------- .../factories/DocTagsFromIElementFactory.kt | 16 ++- .../translators/parseWithNormalisedSpaces.kt | 50 +++++++++ .../translators/psi/parsers/JavadocParser.kt | 13 ++- .../kotlin/content/ContentInDescriptionTest.kt | 113 +++++++++++++++++++++ 6 files changed, 230 insertions(+), 57 deletions(-) create mode 100644 plugins/base/src/main/kotlin/translators/parseWithNormalisedSpaces.kt create mode 100644 plugins/base/src/test/kotlin/content/ContentInDescriptionTest.kt (limited to 'plugins/base') diff --git a/plugins/base/api/base.api b/plugins/base/api/base.api index a89f38c0..51c44862 100644 --- a/plugins/base/api/base.api +++ b/plugins/base/api/base.api @@ -129,8 +129,8 @@ public abstract class org/jetbrains/dokka/base/parsers/Parser { public final class org/jetbrains/dokka/base/parsers/factories/DocTagsFromIElementFactory { public static final field INSTANCE Lorg/jetbrains/dokka/base/parsers/factories/DocTagsFromIElementFactory; - public final fun getInstance (Lorg/intellij/markdown/IElementType;Ljava/util/List;Ljava/util/Map;Ljava/lang/String;Lorg/jetbrains/dokka/links/DRI;)Lorg/jetbrains/dokka/model/doc/DocTag; - public static synthetic fun getInstance$default (Lorg/jetbrains/dokka/base/parsers/factories/DocTagsFromIElementFactory;Lorg/intellij/markdown/IElementType;Ljava/util/List;Ljava/util/Map;Ljava/lang/String;Lorg/jetbrains/dokka/links/DRI;ILjava/lang/Object;)Lorg/jetbrains/dokka/model/doc/DocTag; + public final fun getInstance (Lorg/intellij/markdown/IElementType;Ljava/util/List;Ljava/util/Map;Ljava/lang/String;Lorg/jetbrains/dokka/links/DRI;Z)Ljava/util/List; + public static synthetic fun getInstance$default (Lorg/jetbrains/dokka/base/parsers/factories/DocTagsFromIElementFactory;Lorg/intellij/markdown/IElementType;Ljava/util/List;Ljava/util/Map;Ljava/lang/String;Lorg/jetbrains/dokka/links/DRI;ZILjava/lang/Object;)Ljava/util/List; } public final class org/jetbrains/dokka/base/parsers/factories/DocTagsFromStringFactory { diff --git a/plugins/base/src/main/kotlin/parsers/MarkdownParser.kt b/plugins/base/src/main/kotlin/parsers/MarkdownParser.kt index 34dceeea..4787c091 100644 --- a/plugins/base/src/main/kotlin/parsers/MarkdownParser.kt +++ b/plugins/base/src/main/kotlin/parsers/MarkdownParser.kt @@ -12,24 +12,8 @@ import org.intellij.markdown.flavours.gfm.GFMFlavourDescriptor import org.intellij.markdown.flavours.gfm.GFMTokenTypes import org.jetbrains.dokka.base.parsers.factories.DocTagsFromIElementFactory import org.jetbrains.dokka.links.DRI -import org.jetbrains.dokka.model.doc.Author -import org.jetbrains.dokka.model.doc.Constructor -import org.jetbrains.dokka.model.doc.CustomTagWrapper -import org.jetbrains.dokka.model.doc.Description -import org.jetbrains.dokka.model.doc.DocTag -import org.jetbrains.dokka.model.doc.DocumentationLink -import org.jetbrains.dokka.model.doc.DocumentationNode -import org.jetbrains.dokka.model.doc.Param -import org.jetbrains.dokka.model.doc.Property -import org.jetbrains.dokka.model.doc.Receiver -import org.jetbrains.dokka.model.doc.Return -import org.jetbrains.dokka.model.doc.Sample -import org.jetbrains.dokka.model.doc.See -import org.jetbrains.dokka.model.doc.Since +import org.jetbrains.dokka.model.doc.* import org.jetbrains.dokka.model.doc.Suppress -import org.jetbrains.dokka.model.doc.TagWrapper -import org.jetbrains.dokka.model.doc.Text -import org.jetbrains.dokka.model.doc.Throws import org.jetbrains.kotlin.kdoc.parser.KDocKnownTag import org.jetbrains.kotlin.kdoc.psi.impl.KDocSection import org.jetbrains.kotlin.kdoc.psi.impl.KDocTag @@ -50,7 +34,12 @@ open class MarkdownParser( val markdownAstRoot = IntellijMarkdownParser(gfmFlavourDescriptor).buildMarkdownTreeFromString(extractedString) destinationLinksMap = getAllDestinationLinks(extractedString, markdownAstRoot).toMap() text = extractedString - return visitNode(markdownAstRoot) + + val parsed = visitNode(markdownAstRoot) + if (parsed.size == 1) { + return parsed.first() + } + return CustomDocTag(children = parsed, params = emptyMap(), name = "") } override fun preparse(text: String) = text.replace("\r\n", "\n").replace("\r", "\n") @@ -82,7 +71,7 @@ open class MarkdownParser( node.type, visitNode(node.children.find { it.type == MarkdownTokenTypes.ATX_CONTENT } ?: throw detailedException("Wrong AST Tree. Header does not contain expected content", node) - ).children + ).flatMap { it.children } ) private fun horizontalRulesHandler(node: ASTNode) = @@ -110,7 +99,7 @@ open class MarkdownParser( .evaluateChildren() ) - private fun listsHandler(node: ASTNode): DocTag { + private fun listsHandler(node: ASTNode): List { val children = node.children.filterIsInstance().flatMap { if (it.children.last().type in listOf( @@ -129,7 +118,7 @@ open class MarkdownParser( node.type, children = children - .map { + .flatMap { if (it.type == MarkdownElementTypes.LIST_ITEM) DocTagsFromIElementFactory.getInstance( it.type, @@ -178,7 +167,7 @@ open class MarkdownParser( node.children.filterIsInstance().flatMap { getAllDestinationLinks(text, it) } - private fun referenceLinksHandler(node: ASTNode): DocTag { + private fun referenceLinksHandler(node: ASTNode): List { val linkLabel = node.children.find { it.type == MarkdownElementTypes.LINK_LABEL } ?: throw detailedException("Wrong AST Tree. Reference link does not contain link label", node) val linkText = node.children.findLast { it.type == MarkdownElementTypes.LINK_TEXT } ?: linkLabel @@ -190,7 +179,7 @@ open class MarkdownParser( return linksHandler(linkText, link) } - private fun inlineLinksHandler(node: ASTNode): DocTag { + private fun inlineLinksHandler(node: ASTNode): List { val linkText = node.children.find { it.type == MarkdownElementTypes.LINK_TEXT } ?: throw detailedException("Wrong AST Tree. Inline link does not contain link text", node) val linkDestination = node.children.find { it.type == MarkdownElementTypes.LINK_DESTINATION } @@ -208,13 +197,13 @@ open class MarkdownParser( children = node.children.evaluateChildren() ) - private fun autoLinksHandler(node: ASTNode): DocTag { + private fun autoLinksHandler(node: ASTNode): List { val link = text.substring(node.startOffset + 1, node.endOffset - 1) return linksHandler(node, link) } - private fun linksHandler(linkText: ASTNode, link: String?, linkTitle: ASTNode? = null): DocTag { + private fun linksHandler(linkText: ASTNode, link: String?, linkTitle: ASTNode? = null): List { val dri: DRI? = link?.let { resolveDRI(it) } val linkOrEmpty = link ?: "" val linkTextString = @@ -247,9 +236,10 @@ open class MarkdownParser( body = text.substring(node.startOffset, node.endOffset) ) - private fun textHandler(node: ASTNode) = DocTagsFromIElementFactory.getInstance( + private fun textHandler(node: ASTNode, keepAllFormatting: Boolean) = DocTagsFromIElementFactory.getInstance( MarkdownTokenTypes.TEXT, - body = text.substring(node.startOffset, node.endOffset).transform() + body = text.substring(node.startOffset, node.endOffset).transform(), + keepFormatting = keepAllFormatting ) private fun strikeThroughHandler(node: ASTNode) = DocTagsFromIElementFactory.getInstance( @@ -284,7 +274,7 @@ open class MarkdownParser( false } - private fun imagesHandler(node: ASTNode): DocTag = + private fun imagesHandler(node: ASTNode): List = with(node.children.last().children) { val destination = find { it.type == MarkdownElementTypes.LINK_DESTINATION } val description = find { it.type == MarkdownElementTypes.LINK_TEXT } @@ -304,7 +294,7 @@ open class MarkdownParser( } - private fun rawHtmlHandler(node: ASTNode): DocTag = + private fun rawHtmlHandler(node: ASTNode): List = DocTagsFromIElementFactory.getInstance( node.type, body = text.substring(node.startOffset, node.endOffset) @@ -313,12 +303,9 @@ open class MarkdownParser( private fun codeSpansHandler(node: ASTNode) = DocTagsFromIElementFactory.getInstance( node.type, - children = listOf( - DocTagsFromIElementFactory.getInstance( - MarkdownTokenTypes.TEXT, - body = text.substring(node.startOffset + 1, node.endOffset - 1).replace('\n', ' ').trimIndent() - ) - + children = DocTagsFromIElementFactory.getInstance( + MarkdownTokenTypes.TEXT, + body = text.substring(node.startOffset + 1, node.endOffset - 1).replace('\n', ' ').trimIndent() ) ) @@ -334,7 +321,7 @@ open class MarkdownParser( LeafASTNode(MarkdownTokenTypes.HARD_LINE_BREAK, 0, 0) else it - }.evaluateChildren(), + }.evaluateChildren(keepAllFormatting = true), params = node .children .find { it.type == MarkdownTokenTypes.FENCE_LANG } @@ -343,7 +330,7 @@ open class MarkdownParser( ) private fun codeBlocksHandler(node: ASTNode) = - DocTagsFromIElementFactory.getInstance(node.type, children = node.children.mergeLeafASTNodes().map { + DocTagsFromIElementFactory.getInstance(node.type, children = node.children.mergeLeafASTNodes().flatMap { DocTagsFromIElementFactory.getInstance( MarkdownTokenTypes.TEXT, body = text.substring(it.startOffset, it.endOffset) @@ -356,7 +343,7 @@ open class MarkdownParser( children = node.children.evaluateChildren() ) - private fun visitNode(node: ASTNode): DocTag = + private fun visitNode(node: ASTNode, keepAllFormatting: Boolean = false): List = when (node.type) { MarkdownElementTypes.ATX_1, MarkdownElementTypes.ATX_2, @@ -389,7 +376,7 @@ open class MarkdownParser( MarkdownTokenTypes.CODE_FENCE_CONTENT, MarkdownTokenTypes.CODE_LINE, -> codeLineHandler(node) - MarkdownTokenTypes.TEXT -> textHandler(node) + MarkdownTokenTypes.TEXT -> textHandler(node, keepAllFormatting) MarkdownElementTypes.MARKDOWN_FILE -> markdownFileHandler(node) GFMElementTypes.STRIKETHROUGH -> strikeThroughHandler(node) GFMElementTypes.TABLE -> tableHandler(node) @@ -402,18 +389,22 @@ open class MarkdownParser( private fun List.filterTabSeparators() = this.filterNot { it.type == GFMTokenTypes.TABLE_SEPARATOR } - private fun List.evaluateChildren(): List = - this.removeUselessTokens().swapImagesThatShouldBeLinks().mergeLeafASTNodes().map { visitNode(it) } + private fun List.evaluateChildren(keepAllFormatting: Boolean = false): List = + this.removeUselessTokens().swapImagesThatShouldBeLinks(keepAllFormatting).mergeLeafASTNodes().flatMap { visitNode(it, keepAllFormatting) } - private fun List.swapImagesThatShouldBeLinks(): List = - flatMap { node -> - if (node.type == MarkdownElementTypes.IMAGE - && node.children.firstOrNull()?.let { it is LeafASTNode && it.type.name == "!" } == true - && node.children.lastOrNull()?.type == MarkdownElementTypes.SHORT_REFERENCE_LINK - ) { - node.children - } else { - listOf(node) + private fun List.swapImagesThatShouldBeLinks(keepAllFormatting: Boolean): List = + if (keepAllFormatting) { + this + } else { + flatMap { node -> + if (node.type == MarkdownElementTypes.IMAGE + && node.children.firstOrNull()?.let { it is LeafASTNode && it.type.name == "!" } == true + && node.children.lastOrNull()?.type == MarkdownElementTypes.SHORT_REFERENCE_LINK + ) { + node.children + } else { + listOf(node) + } } } diff --git a/plugins/base/src/main/kotlin/parsers/factories/DocTagsFromIElementFactory.kt b/plugins/base/src/main/kotlin/parsers/factories/DocTagsFromIElementFactory.kt index 9ee11732..a3cbcc2e 100644 --- a/plugins/base/src/main/kotlin/parsers/factories/DocTagsFromIElementFactory.kt +++ b/plugins/base/src/main/kotlin/parsers/factories/DocTagsFromIElementFactory.kt @@ -6,12 +6,15 @@ import org.intellij.markdown.MarkdownElementTypes import org.intellij.markdown.MarkdownTokenTypes import org.intellij.markdown.flavours.gfm.GFMElementTypes import org.intellij.markdown.flavours.gfm.GFMTokenTypes +import org.jetbrains.dokka.base.translators.parseWithNormalisedSpaces import org.jetbrains.dokka.links.DRI import org.jetbrains.dokka.model.doc.DocTag.Companion.contentTypeParam import java.lang.NullPointerException object DocTagsFromIElementFactory { - fun getInstance(type: IElementType, children: List = emptyList(), params: Map = emptyMap(), body: String? = null, dri: DRI? = null) = + + @Suppress("IMPLICIT_CAST_TO_ANY") + fun getInstance(type: IElementType, children: List = emptyList(), params: Map = emptyMap(), body: String? = null, dri: DRI? = null, keepFormatting: Boolean = false) = when(type) { MarkdownElementTypes.SHORT_REFERENCE_LINK, MarkdownElementTypes.FULL_REFERENCE_LINK, @@ -33,7 +36,11 @@ object DocTagsFromIElementFactory { MarkdownElementTypes.ORDERED_LIST -> Ol(children, params) MarkdownElementTypes.UNORDERED_LIST -> Ul(children, params) MarkdownElementTypes.PARAGRAPH -> P(children, params) - MarkdownTokenTypes.TEXT -> Text(body ?: throw NullPointerException("Text body should be at least empty string passed to DocNodes factory!"), children, params ) + MarkdownTokenTypes.TEXT -> if (keepFormatting) Text( + body.orEmpty(), + children, + params + ) else body?.parseWithNormalisedSpaces(renderWhiteCharactersAsSpaces = false).orEmpty() MarkdownTokenTypes.HORIZONTAL_RULE -> HorizontalRule MarkdownTokenTypes.HARD_LINE_BREAK -> Br GFMElementTypes.STRIKETHROUGH -> Strikethrough(children, params) @@ -46,5 +53,10 @@ object DocTagsFromIElementFactory { MarkdownTokenTypes.HTML_TAG, MarkdownTokenTypes.HTML_BLOCK_CONTENT -> Text(body.orEmpty(), params = params + contentTypeParam("html")) else -> CustomDocTag(children, params, type.name) + }.let { + when (it) { + is List<*> -> it as List + else -> listOf(it as DocTag) + } } } diff --git a/plugins/base/src/main/kotlin/translators/parseWithNormalisedSpaces.kt b/plugins/base/src/main/kotlin/translators/parseWithNormalisedSpaces.kt new file mode 100644 index 00000000..4bb60f1a --- /dev/null +++ b/plugins/base/src/main/kotlin/translators/parseWithNormalisedSpaces.kt @@ -0,0 +1,50 @@ +package org.jetbrains.dokka.base.translators + +import org.intellij.markdown.lexer.Compat.codePointToString +import org.intellij.markdown.lexer.Compat.forEachCodePoint +import org.jetbrains.dokka.model.doc.DocTag +import org.jetbrains.dokka.model.doc.DocTag.Companion.contentTypeParam +import org.jetbrains.dokka.model.doc.Text +import org.jsoup.Jsoup +import org.jsoup.internal.StringUtil +import org.jsoup.nodes.Entities + +internal fun String.parseHtmlEncodedWithNormalisedSpaces( + renderWhiteCharactersAsSpaces: Boolean +): List { + val accum = StringBuilder() + val tags = mutableListOf() + var lastWasWhite = false + + forEachCodePoint { c -> + if (renderWhiteCharactersAsSpaces && StringUtil.isWhitespace(c)) { + if (!lastWasWhite) { + accum.append(' ') + lastWasWhite = true + } + } else if (codePointToString(c).let { it != Entities.escape(it) }) { + accum.toString().takeIf { it.isNotBlank() }?.let { tags.add(Text(it)) } + accum.delete(0, accum.length) + + accum.appendCodePoint(c) + tags.add(Text(accum.toString(), params = contentTypeParam("html"))) + accum.delete(0, accum.length) + } else if (!StringUtil.isInvisibleChar(c)) { + accum.appendCodePoint(c) + lastWasWhite = false + } + } + accum.toString().takeIf { it.isNotBlank() }?.let { tags.add(Text(it)) } + return tags +} + +/** + * Parses string into [Text] doc tags that can have either value of the string or html-encoded value with content-type=html parameter. + * Content type is added when dealing with html entries like ` ` + */ +internal fun String.parseWithNormalisedSpaces( + renderWhiteCharactersAsSpaces: Boolean +): List = + //parsing it using jsoup is required to get codePoints, otherwise they are interpreted separately, as chars + //But we dont need to do it for java as it is already parsed with jsoup + Jsoup.parseBodyFragment(this).body().wholeText().parseHtmlEncodedWithNormalisedSpaces(renderWhiteCharactersAsSpaces) \ No newline at end of file diff --git a/plugins/base/src/main/kotlin/translators/psi/parsers/JavadocParser.kt b/plugins/base/src/main/kotlin/translators/psi/parsers/JavadocParser.kt index 53424ef9..ce022dd7 100644 --- a/plugins/base/src/main/kotlin/translators/psi/parsers/JavadocParser.kt +++ b/plugins/base/src/main/kotlin/translators/psi/parsers/JavadocParser.kt @@ -8,12 +8,14 @@ import com.intellij.psi.impl.source.tree.LazyParseablePsiElement import com.intellij.psi.impl.source.tree.LeafPsiElement import com.intellij.psi.javadoc.* import org.intellij.markdown.MarkdownElementTypes +import org.intellij.markdown.lexer.Compat.forEachCodePoint import org.jetbrains.dokka.analysis.DokkaResolutionFacade import org.jetbrains.dokka.analysis.from import org.jetbrains.dokka.base.parsers.MarkdownParser +import org.jetbrains.dokka.base.translators.parseHtmlEncodedWithNormalisedSpaces +import org.jetbrains.dokka.base.translators.parseWithNormalisedSpaces import org.jetbrains.dokka.links.DRI import org.jetbrains.dokka.model.doc.* -import org.jetbrains.dokka.model.doc.Deprecated import org.jetbrains.dokka.utilities.DokkaLogger import org.jetbrains.dokka.utilities.enumValueOrNull import org.jetbrains.kotlin.idea.kdoc.resolveKDocLink @@ -22,7 +24,9 @@ import org.jetbrains.kotlin.idea.util.CommentSaver.Companion.tokenType import org.jetbrains.kotlin.psi.psiUtil.getNextSiblingIgnoringWhitespace import org.jetbrains.kotlin.psi.psiUtil.siblings import org.jsoup.Jsoup +import org.jsoup.internal.StringUtil import org.jsoup.nodes.Element +import org.jsoup.nodes.Entities import org.jsoup.nodes.Node import org.jsoup.nodes.TextNode import java.util.* @@ -402,8 +406,11 @@ class JavadocParser( } private fun convertHtmlNode(node: Node, insidePre: Boolean = false): List = when (node) { - is TextNode -> (if (insidePre) node.wholeText else node.text() - .takeIf { it.isNotBlank() })?.let { listOf(Text(body = it)) }.orEmpty() + is TextNode -> (if (insidePre) { + node.wholeText.takeIf { it.isNotBlank() }?.let { listOf(Text(body = it)) } + } else { + node.wholeText.parseHtmlEncodedWithNormalisedSpaces(renderWhiteCharactersAsSpaces = true) + }).orEmpty() is Element -> createBlock(node) else -> emptyList() } diff --git a/plugins/base/src/test/kotlin/content/ContentInDescriptionTest.kt b/plugins/base/src/test/kotlin/content/ContentInDescriptionTest.kt new file mode 100644 index 00000000..1a3bda5e --- /dev/null +++ b/plugins/base/src/test/kotlin/content/ContentInDescriptionTest.kt @@ -0,0 +1,113 @@ +package content + +import junit.framework.Assert.assertEquals +import org.jetbrains.dokka.base.testApi.testRunner.BaseAbstractTest +import org.jetbrains.dokka.model.doc.CustomDocTag +import org.jetbrains.dokka.model.doc.Description +import org.jetbrains.dokka.model.doc.P +import org.jetbrains.dokka.model.doc.Text +import org.junit.jupiter.api.Test +import kotlin.test.assertTrue + +class ContentInDescriptionTest : BaseAbstractTest() { + private val configuration = dokkaConfiguration { + sourceSets { + sourceSet { + sourceRoots = listOf("src/") + analysisPlatform = "jvm" + classpath += jvmStdlibPath!! + } + } + } + + val expectedDescription = Description( + CustomDocTag( + listOf( + P( + listOf( + Text("Hello World! Docs with period issue, e.g."), + Text(String(Character.toChars(160)), params = mapOf("content-type" to "html")), + Text("this.") + ) + ) + ), + params = emptyMap(), + name = "MARKDOWN_FILE" + ) + ) + + @Test + fun `nbsp is handled as code in kotlin`() { + testInline( + """ + |/src/main/kotlin/sample/ParentKt.kt + |package sample; + |/** + | * Hello World! Docs with period issue, e.g. this. + | */ + |public class ParentKt { + |} + """.trimIndent(), configuration + ) { + documentablesMergingStage = { + val classlike = it.packages.flatMap { it.classlikes }.find { it.name == "ParentKt" } + + assertTrue(classlike != null) + assertEquals(expectedDescription, classlike.documentation.values.first().children.first()) + } + } + } + + @Test + fun `nbsp is handled as code in java`() { + testInline( + """ + |/src/main/kotlin/sample/Parent.java + |package sample; + |/** + | * Hello World! Docs with period issue, e.g. this. + | */ + |public class Parent { + |} + """.trimIndent(), configuration + ) { + documentablesMergingStage = { + val classlike = it.packages.flatMap { it.classlikes }.find { it.name == "Parent" } + + assertTrue(classlike != null) + assertEquals(expectedDescription, classlike.documentation.values.first().children.first()) + } + } + } + + @Test + fun `same documentation in java and kotlin when nbsp is present`() { + testInline( + """ + |/src/main/kotlin/sample/Parent.java + |package sample; + |/** + | * Hello World! Docs with period issue, e.g. this. + | */ + |public class Parent { + |} + | + |/src/main/kotlin/sample/ParentKt.kt + |package sample; + |/** + | * Hello World! Docs with period issue, e.g. this. + | */ + |public class ParentKt { + |} + """.trimIndent(), + configuration + ) { + documentablesMergingStage = { module -> + val java = module.packages.flatMap { it.classlikes }.first { it.name == "Parent" } + val kotlin = module.packages.flatMap { it.classlikes }.first { it.name == "ParentKt" } + + assertEquals(java.documentation.values.first(), kotlin.documentation.values.first()) + } + } + } +} \ No newline at end of file -- cgit