diff options
3 files changed, 94 insertions, 2 deletions
diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index c7202839..e3f190fb 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -26,7 +26,7 @@ korlibs-template = "4.0.10" kotlinx-html = "0.9.1" ## Markdown -jetbrains-markdown = "0.3.1" +jetbrains-markdown = "0.5.2" ## JSON jackson = "2.12.7" # jackson 2.13.X does not support kotlin language version 1.4, check before updating diff --git a/plugins/base/src/test/kotlin/markdown/ParserTest.kt b/plugins/base/src/test/kotlin/markdown/ParserTest.kt index 8e2d71eb..bcca27c4 100644 --- a/plugins/base/src/test/kotlin/markdown/ParserTest.kt +++ b/plugins/base/src/test/kotlin/markdown/ParserTest.kt @@ -1573,7 +1573,60 @@ class ParserTest : KDocTest() { P(listOf(Text(" sdsdsds sdd"))), P(listOf(Text(" eweww "))) ) - print(expectedDocumentationNode) + assertEquals(actualDocumentationNode, expectedDocumentationNode) + } + + @Test // exists due to #3231 + fun `should ignore the leading whitespace in header in-between the hash symbol and header text`() { + val markdown = """ + | # first header + | ## second header + | ### third header + """.trimMargin() + val actualDocumentationNode = parseMarkdownToDocNode(markdown).children + val expectedDocumentationNode = listOf( + H1(listOf(Text("first header"))), + H2(listOf(Text("second header"))), + H3(listOf(Text("third header"))), + ) + assertEquals(actualDocumentationNode, expectedDocumentationNode) + } + + @Test // exists due to #3231 + fun `should ignore trailing whitespace in header`() { + val markdown = """ + | # first header + | ## second header + | ### third header + """.trimMargin() + val actualDocumentationNode = parseMarkdownToDocNode(markdown).children + val expectedDocumentationNode = listOf( + H1(listOf(Text("first header"))), + H2(listOf(Text("second header"))), + H3(listOf(Text("third header"))), + ) + assertEquals(actualDocumentationNode, expectedDocumentationNode) + } + + @Test // exists due to #3231 + fun `should ignore leading and trailing whitespace in header, but not whitespace in the middle`() { + val markdown = """ + | # first header + | ## second ~~header~~ in a **long** sentence ending with whitespaces + | ### third header + """.trimMargin() + val actualDocumentationNode = parseMarkdownToDocNode(markdown).children + val expectedDocumentationNode = listOf( + H1(listOf(Text("first header"))), + H2(listOf( + Text("second "), + Strikethrough(listOf(Text("header"))), + Text(" in a "), + B(listOf(Text("long"))), + Text(" sentence ending with whitespaces") + )), + H3(listOf(Text("third header"))), + ) assertEquals(actualDocumentationNode, expectedDocumentationNode) } } diff --git a/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/MarkdownParser.kt b/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/MarkdownParser.kt index 256c7b98..130c6def 100644 --- a/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/MarkdownParser.kt +++ b/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/MarkdownParser.kt @@ -77,6 +77,44 @@ public open class MarkdownParser( ).flatMap { it.children } ) + /** + * Handler for [MarkdownTokenTypes.ATX_CONTENT], which is the content of the header + * elements like [MarkdownElementTypes.ATX_1], [MarkdownElementTypes.ATX_2] and so on. + * + * For example, a header line like `# Header text` is expected to be parsed into: + * - One [MarkdownTokenTypes.ATX_HEADER] with startOffset = 0, endOffset = 1 (only the `#` symbol) + * - Composite [MarkdownTokenTypes.ATX_CONTENT] with four children: WHITE_SPACE, TEXT, WHITE_SPACE, TEXT. + */ + private fun headerContentHandler(node: ASTNode): List<DocTag> { + // ATX_CONTENT contains everything after the `#` symbol, so if there's a space + // in-between the `#` symbol and the text (like `# header`), it will be present here too. + // However, we don't need the leading space between the `#` symbol and the text, nor do we need trailing spaces, + // so we just skip it (otherwise the header text will be parsed as `<whitespace>header` instead of `header`). + // If there's more space between `#` and text, like `# header`, it will still be a single WHITE_SPACE + // element, but it will be wider, so the solution below should still hold. The same applies to trailing spaces. + val trimmedChildren = node.children.trimWhitespaceToken() + + val children = trimmedChildren.evaluateChildren() + return DocTagsFromIElementFactory.getInstance( + MarkdownElementTypes.PARAGRAPH, // PARAGRAPH instead of TEXT to preserve compatibility with prev. versions + children = children + ) + } + + /** + * @return a sublist of [this] list that does not contain + * leading and trailing [MarkdownTokenTypes.WHITE_SPACE] elements + */ + private fun List<ASTNode>.trimWhitespaceToken(): List<ASTNode> { + val firstNonWhitespaceIndex = this.indexOfFirst { it.type != MarkdownTokenTypes.WHITE_SPACE } + if (firstNonWhitespaceIndex == -1) { + return this + } + val lastNonWhitespaceIndex = this.indexOfLast { it.type != MarkdownTokenTypes.WHITE_SPACE } + + return this.subList(firstNonWhitespaceIndex, lastNonWhitespaceIndex + 1) + } + private fun horizontalRulesHandler() = DocTagsFromIElementFactory.getInstance(MarkdownTokenTypes.HORIZONTAL_RULE) @@ -365,6 +403,7 @@ public open class MarkdownParser( MarkdownElementTypes.ATX_5, MarkdownElementTypes.ATX_6, -> headersHandler(node) + MarkdownTokenTypes.ATX_CONTENT -> headerContentHandler(node) MarkdownTokenTypes.HORIZONTAL_RULE -> horizontalRulesHandler() MarkdownElementTypes.STRONG -> strongHandler(node) MarkdownElementTypes.EMPH -> emphasisHandler(node) |