aboutsummaryrefslogtreecommitdiff
path: root/subprojects/analysis-markdown-jb/src
diff options
context:
space:
mode:
Diffstat (limited to 'subprojects/analysis-markdown-jb/src')
-rw-r--r--subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/MarkdownApi.kt8
-rw-r--r--subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/MarkdownParser.kt511
-rw-r--r--subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/ParseUtils.kt39
-rw-r--r--subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/Parser.kt131
-rw-r--r--subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/factories/DocTagsFromIElementFactory.kt86
5 files changed, 775 insertions, 0 deletions
diff --git a/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/MarkdownApi.kt b/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/MarkdownApi.kt
new file mode 100644
index 00000000..e8738190
--- /dev/null
+++ b/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/MarkdownApi.kt
@@ -0,0 +1,8 @@
+package org.jetbrains.dokka.analysis.markdown.jb
+
+import org.intellij.markdown.MarkdownElementTypes
+import org.jetbrains.dokka.InternalDokkaApi
+
+// TODO [beresnev] move/rename if it's only used for CustomDocTag. for now left as is for compatibility
+@InternalDokkaApi
+val MARKDOWN_ELEMENT_FILE_NAME = MarkdownElementTypes.MARKDOWN_FILE.name
diff --git a/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/MarkdownParser.kt b/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/MarkdownParser.kt
new file mode 100644
index 00000000..165ca4c6
--- /dev/null
+++ b/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/MarkdownParser.kt
@@ -0,0 +1,511 @@
+package org.jetbrains.dokka.analysis.markdown.jb
+
+import org.intellij.markdown.MarkdownElementTypes
+import org.intellij.markdown.MarkdownTokenTypes
+import org.intellij.markdown.ast.ASTNode
+import org.intellij.markdown.ast.CompositeASTNode
+import org.intellij.markdown.ast.LeafASTNode
+import org.intellij.markdown.ast.impl.ListItemCompositeNode
+import org.intellij.markdown.flavours.gfm.GFMElementTypes
+import org.intellij.markdown.flavours.gfm.GFMFlavourDescriptor
+import org.intellij.markdown.flavours.gfm.GFMTokenTypes
+import org.intellij.markdown.html.HtmlGenerator
+import org.jetbrains.dokka.InternalDokkaApi
+import org.jetbrains.dokka.analysis.markdown.jb.factories.DocTagsFromIElementFactory
+import org.jetbrains.dokka.links.DRI
+import org.jetbrains.dokka.links.PointingToDeclaration
+import org.jetbrains.dokka.model.doc.*
+import java.net.MalformedURLException
+import java.net.URL
+import org.intellij.markdown.parser.MarkdownParser as IntellijMarkdownParser
+
+@InternalDokkaApi
+open class MarkdownParser(
+ private val externalDri: (String) -> DRI?,
+ private val kdocLocation: String?,
+) : Parser() {
+
+ private lateinit var destinationLinksMap: Map<String, String>
+ private lateinit var text: String
+
+ override fun parseStringToDocNode(extractedString: String): DocTag {
+ val gfmFlavourDescriptor = GFMFlavourDescriptor()
+ val markdownAstRoot = IntellijMarkdownParser(gfmFlavourDescriptor).buildMarkdownTreeFromString(extractedString)
+ destinationLinksMap = getAllDestinationLinks(extractedString, markdownAstRoot).toMap()
+ text = extractedString
+
+ val parsed = visitNode(markdownAstRoot)
+ if (parsed.size == 1) {
+ return parsed.first()
+ }
+ return CustomDocTag(children = parsed, params = emptyMap(), name = "")
+ }
+
+ override fun preparse(text: String) = text.replace("\r\n", "\n").replace("\r", "\n")
+
+ override fun parseTagWithBody(tagName: String, content: String): TagWrapper =
+ when (tagName) {
+ "see" -> {
+ val referencedName = content.substringBefore(' ')
+ val dri = externalDri(referencedName)
+ See(
+ parseStringToDocNode(content.substringAfter(' ')),
+ dri?.fqDeclarationName() ?: referencedName,
+ dri
+ )
+ }
+ "throws", "exception" -> {
+ val dri = externalDri(content.substringBefore(' '))
+ Throws(
+ parseStringToDocNode(content.substringAfter(' ')),
+ dri?.fqDeclarationName() ?: content.substringBefore(' '),
+ dri
+ )
+ }
+ else -> super.parseTagWithBody(tagName, content)
+ }
+
+ private fun headersHandler(node: ASTNode) =
+ DocTagsFromIElementFactory.getInstance(
+ node.type,
+ visitNode(node.children.find { it.type == MarkdownTokenTypes.ATX_CONTENT }
+ ?: throw detailedException("Wrong AST Tree. Header does not contain expected content", node)
+ ).flatMap { it.children }
+ )
+
+ private fun horizontalRulesHandler() =
+ DocTagsFromIElementFactory.getInstance(MarkdownTokenTypes.HORIZONTAL_RULE)
+
+ private fun emphasisHandler(node: ASTNode) =
+ DocTagsFromIElementFactory.getInstance(
+ node.type,
+ children = node.children.evaluateChildrenWithDroppedEnclosingTokens(1)
+ )
+
+ private fun strongHandler(node: ASTNode) =
+ DocTagsFromIElementFactory.getInstance(
+ node.type,
+ children = node.children.evaluateChildrenWithDroppedEnclosingTokens(2)
+ )
+
+ private fun List<ASTNode>.evaluateChildrenWithDroppedEnclosingTokens(count: Int) =
+ drop(count).dropLast(count).evaluateChildren()
+
+ private fun blockquotesHandler(node: ASTNode) =
+ DocTagsFromIElementFactory.getInstance(
+ node.type, children = node.children
+ .filterIsInstance<CompositeASTNode>()
+ .evaluateChildren()
+ )
+
+ private fun listsHandler(node: ASTNode): List<DocTag> {
+
+ val children = node.children.filterIsInstance<ListItemCompositeNode>().flatMap {
+ if (it.children.last().type in listOf(
+ MarkdownElementTypes.ORDERED_LIST,
+ MarkdownElementTypes.UNORDERED_LIST
+ )
+ ) {
+ val nestedList = it.children.last()
+ (it.children as MutableList).removeAt(it.children.lastIndex)
+ listOf(it, nestedList)
+ } else
+ listOf(it)
+ }
+
+ return DocTagsFromIElementFactory.getInstance(
+ node.type,
+ children =
+ children
+ .flatMap {
+ if (it.type == MarkdownElementTypes.LIST_ITEM)
+ DocTagsFromIElementFactory.getInstance(
+ it.type,
+ children = it
+ .children
+ .filterIsInstance<CompositeASTNode>()
+ .evaluateChildren()
+ )
+ else
+ visitNode(it)
+ },
+ params =
+ if (node.type == MarkdownElementTypes.ORDERED_LIST) {
+ val listNumberNode = node.children.first().children.first()
+ mapOf(
+ "start" to text.substring(
+ listNumberNode.startOffset,
+ listNumberNode.endOffset
+ ).trim().dropLast(1)
+ )
+ } else
+ emptyMap()
+ )
+ }
+
+ private fun resolveDRI(mdLink: String): DRI? =
+ mdLink
+ .removePrefix("[")
+ .removeSuffix("]")
+ .let { link ->
+ try {
+ URL(link)
+ null
+ } catch (e: MalformedURLException) {
+ externalDri(link)
+ }
+ }
+
+ private fun getAllDestinationLinks(text: String, node: ASTNode): List<Pair<String, String>> =
+ node.children
+ .filter { it.type == MarkdownElementTypes.LINK_DEFINITION }
+ .map {
+ text.substring(it.children[0].startOffset, it.children[0].endOffset).toLowerCase() to
+ text.substring(it.children[2].startOffset, it.children[2].endOffset)
+ } +
+ node.children.filterIsInstance<CompositeASTNode>().flatMap { getAllDestinationLinks(text, it) }
+
+
+ private fun referenceLinksHandler(node: ASTNode): List<DocTag> {
+ val linkLabel = node.children.find { it.type == MarkdownElementTypes.LINK_LABEL }
+ ?: throw detailedException("Wrong AST Tree. Reference link does not contain link label", node)
+ val linkText = node.children.findLast { it.type == MarkdownElementTypes.LINK_TEXT } ?: linkLabel
+
+ val linkKey = text.substring(linkLabel.startOffset, linkLabel.endOffset)
+
+ val link = destinationLinksMap[linkKey.toLowerCase()] ?: linkKey
+
+ return linksHandler(linkText, link)
+ }
+
+ private fun inlineLinksHandler(node: ASTNode): List<DocTag> {
+ val linkText = node.children.find { it.type == MarkdownElementTypes.LINK_TEXT }
+ ?: throw detailedException("Wrong AST Tree. Inline link does not contain link text", node)
+ val linkDestination = node.children.find { it.type == MarkdownElementTypes.LINK_DESTINATION }
+ val linkTitle = node.children.find { it.type == MarkdownElementTypes.LINK_TITLE }
+
+ // Link destination may be ommited: https://github.github.com/gfm/#example-495
+ val link = linkDestination?.let { text.substring(it.startOffset, it.endOffset) }
+
+ return linksHandler(linkText, link, linkTitle)
+ }
+
+ private fun markdownFileHandler(node: ASTNode) =
+ DocTagsFromIElementFactory.getInstance(
+ node.type,
+ children = node.children
+ .filterSpacesAndEOL()
+ .evaluateChildren()
+ )
+
+ private fun autoLinksHandler(node: ASTNode): List<DocTag> {
+ val link = text.substring(node.startOffset + 1, node.endOffset - 1)
+
+ return linksHandler(node, link)
+ }
+
+ private fun linksHandler(linkText: ASTNode, link: String?, linkTitle: ASTNode? = null): List<DocTag> {
+ val dri: DRI? = link?.let { resolveDRI(it) }
+ val linkOrEmpty = link ?: ""
+ val linkTextString =
+ if (linkTitle == null) linkOrEmpty else text.substring(linkTitle.startOffset + 1, linkTitle.endOffset - 1)
+
+ val params = if (linkTitle == null)
+ mapOf("href" to linkOrEmpty)
+ else
+ mapOf("href" to linkOrEmpty, "title" to linkTextString)
+
+ return if (link != null && dri == null && !linkOrEmpty.isRemoteLink()) {
+ DocTagsFromIElementFactory.getInstance(
+ MarkdownTokenTypes.TEXT,
+ params = params,
+ children = linkText.children.drop(1).dropLast(1).evaluateChildren(),
+ body = linkTextString.removeSurrounding("[", "]")
+ )
+ } else {
+ DocTagsFromIElementFactory.getInstance(
+ MarkdownElementTypes.INLINE_LINK,
+ params = params,
+ children = linkText.children.drop(1).dropLast(1).evaluateChildren(),
+ dri = dri
+ )
+ }
+ }
+
+ private fun codeLineHandler(node: ASTNode) = DocTagsFromIElementFactory.getInstance(
+ MarkdownElementTypes.CODE_BLOCK,
+ body = text.substring(node.startOffset, node.endOffset)
+ )
+
+ private fun textHandler(node: ASTNode, keepAllFormatting: Boolean) = DocTagsFromIElementFactory.getInstance(
+ MarkdownTokenTypes.TEXT,
+ body = text.substring(node.startOffset, node.endOffset).transform(),
+ keepFormatting = keepAllFormatting
+ )
+
+ private fun strikeThroughHandler(node: ASTNode) = DocTagsFromIElementFactory.getInstance(
+ node.type,
+ children = node.children.evaluateChildrenWithDroppedEnclosingTokens(2)
+ )
+
+ private fun tableHandler(node: ASTNode) = DocTagsFromIElementFactory.getInstance(
+ GFMElementTypes.TABLE,
+ children = node.children
+ .filter { it.type == GFMElementTypes.ROW || it.type == GFMElementTypes.HEADER }
+ .evaluateChildren()
+ )
+
+ private fun headerHandler(node: ASTNode) = DocTagsFromIElementFactory.getInstance(
+ GFMElementTypes.HEADER,
+ children = node.children
+ .filter { it.type == GFMTokenTypes.CELL }
+ .evaluateChildren()
+ )
+
+ private fun rowHandler(node: ASTNode) = DocTagsFromIElementFactory.getInstance(
+ GFMElementTypes.ROW,
+ children = node.children
+ .filter { it.type == GFMTokenTypes.CELL }
+ .evaluateChildren()
+ )
+
+ private fun cellHandler(node: ASTNode) = DocTagsFromIElementFactory.getInstance(
+ GFMTokenTypes.CELL,
+ children = node.children.filterTabSeparators().evaluateChildren().trimSurroundingTokensIfText()
+ )
+
+ private fun String.isRemoteLink() = try {
+ URL(this)
+ true
+ } catch (e: MalformedURLException) {
+ false
+ }
+
+ private fun imagesHandler(node: ASTNode): List<DocTag> =
+ with(node.children.last().children) {
+ val destination = find { it.type == MarkdownElementTypes.LINK_DESTINATION }
+ val description = find { it.type == MarkdownElementTypes.LINK_TEXT }
+
+ val src = destination?.let {
+ mapOf("href" to text.substring(it.startOffset, it.endOffset))
+ } ?: emptyMap()
+
+ val alt = description?.let {
+ mapOf("alt" to text.substring(it.startOffset + 1, it.endOffset - 1))
+ } ?: emptyMap()
+
+ return DocTagsFromIElementFactory.getInstance(
+ node.type,
+ params = src + alt
+ )
+ }
+
+
+ private fun rawHtmlHandler(node: ASTNode): List<DocTag> =
+ DocTagsFromIElementFactory.getInstance(
+ node.type,
+ body = text.substring(node.startOffset, node.endOffset)
+ )
+
+ private fun codeSpansHandler(node: ASTNode) =
+ DocTagsFromIElementFactory.getInstance(
+ node.type,
+ children = DocTagsFromIElementFactory.getInstance(
+ MarkdownTokenTypes.TEXT,
+ body = text.substring(node.startOffset + 1, node.endOffset - 1).replace('\n', ' ').trimIndent(),
+ keepFormatting = true
+ )
+ )
+
+ private fun codeFencesHandler(node: ASTNode) =
+ DocTagsFromIElementFactory.getInstance(
+ node.type,
+ children = node
+ .children
+ .dropWhile { it.type != MarkdownTokenTypes.CODE_FENCE_CONTENT }
+ .dropLastWhile { it.type != MarkdownTokenTypes.CODE_FENCE_CONTENT }
+ .filter { it.type != MarkdownTokenTypes.WHITE_SPACE }
+ .map {
+ if (it.type == MarkdownTokenTypes.EOL)
+ LeafASTNode(MarkdownTokenTypes.HARD_LINE_BREAK, 0, 0)
+ else
+ it
+ }.evaluateChildren(keepAllFormatting = true),
+ params = node
+ .children
+ .find { it.type == MarkdownTokenTypes.FENCE_LANG }
+ ?.let { mapOf("lang" to text.substring(it.startOffset, it.endOffset)) }
+ ?: emptyMap()
+ )
+
+ private fun codeBlocksHandler(node: ASTNode) =
+ DocTagsFromIElementFactory.getInstance(node.type, children = node.children.mergeLeafASTNodes().flatMap {
+ DocTagsFromIElementFactory.getInstance(
+ MarkdownTokenTypes.TEXT,
+ body = HtmlGenerator.trimIndents(text.substring(it.startOffset, it.endOffset), 4).toString()
+ )
+ })
+
+ private fun defaultHandler(node: ASTNode) =
+ DocTagsFromIElementFactory.getInstance(
+ MarkdownElementTypes.PARAGRAPH,
+ children = node.children.evaluateChildren()
+ )
+
+ private fun visitNode(node: ASTNode, keepAllFormatting: Boolean = false): List<DocTag> =
+ when (node.type) {
+ MarkdownElementTypes.ATX_1,
+ MarkdownElementTypes.ATX_2,
+ MarkdownElementTypes.ATX_3,
+ MarkdownElementTypes.ATX_4,
+ MarkdownElementTypes.ATX_5,
+ MarkdownElementTypes.ATX_6,
+ -> headersHandler(node)
+ MarkdownTokenTypes.HORIZONTAL_RULE -> horizontalRulesHandler()
+ MarkdownElementTypes.STRONG -> strongHandler(node)
+ MarkdownElementTypes.EMPH -> emphasisHandler(node)
+ MarkdownElementTypes.FULL_REFERENCE_LINK,
+ MarkdownElementTypes.SHORT_REFERENCE_LINK,
+ -> referenceLinksHandler(node)
+ MarkdownElementTypes.INLINE_LINK -> inlineLinksHandler(node)
+ MarkdownElementTypes.AUTOLINK -> autoLinksHandler(node)
+ MarkdownElementTypes.BLOCK_QUOTE -> blockquotesHandler(node)
+ MarkdownElementTypes.UNORDERED_LIST,
+ MarkdownElementTypes.ORDERED_LIST,
+ -> listsHandler(node)
+ MarkdownElementTypes.CODE_BLOCK -> codeBlocksHandler(node)
+ MarkdownElementTypes.CODE_FENCE -> codeFencesHandler(node)
+ MarkdownElementTypes.CODE_SPAN -> codeSpansHandler(node)
+ MarkdownElementTypes.IMAGE -> imagesHandler(node)
+ MarkdownElementTypes.HTML_BLOCK,
+ MarkdownTokenTypes.HTML_TAG,
+ MarkdownTokenTypes.HTML_BLOCK_CONTENT,
+ -> rawHtmlHandler(node)
+ MarkdownTokenTypes.HARD_LINE_BREAK -> DocTagsFromIElementFactory.getInstance(node.type)
+ MarkdownTokenTypes.CODE_FENCE_CONTENT,
+ MarkdownTokenTypes.CODE_LINE,
+ -> codeLineHandler(node)
+ MarkdownTokenTypes.TEXT -> textHandler(node, keepAllFormatting)
+ MarkdownElementTypes.MARKDOWN_FILE -> markdownFileHandler(node)
+ GFMElementTypes.STRIKETHROUGH -> strikeThroughHandler(node)
+ GFMElementTypes.TABLE -> tableHandler(node)
+ GFMElementTypes.HEADER -> headerHandler(node)
+ GFMElementTypes.ROW -> rowHandler(node)
+ GFMTokenTypes.CELL -> cellHandler(node)
+ else -> defaultHandler(node)
+ }
+
+ private fun List<ASTNode>.filterTabSeparators() =
+ this.filterNot { it.type == GFMTokenTypes.TABLE_SEPARATOR }
+
+ private fun List<ASTNode>.filterSpacesAndEOL() =
+ this.filterNot { it.type == MarkdownTokenTypes.WHITE_SPACE || it.type == MarkdownTokenTypes.EOL }
+
+ private fun List<ASTNode>.evaluateChildren(keepAllFormatting: Boolean = false): List<DocTag> =
+ this.removeUselessTokens().swapImagesThatShouldBeLinks(keepAllFormatting).mergeLeafASTNodes().flatMap { visitNode(it, keepAllFormatting) }
+
+ private fun List<ASTNode>.swapImagesThatShouldBeLinks(keepAllFormatting: Boolean): List<ASTNode> =
+ if (keepAllFormatting) {
+ this
+ } else {
+ flatMap { node ->
+ if (node.type == MarkdownElementTypes.IMAGE
+ && node.children.firstOrNull()?.let { it is LeafASTNode && it.type.name == "!" } == true
+ && node.children.lastOrNull()?.type == MarkdownElementTypes.SHORT_REFERENCE_LINK
+ ) {
+ node.children
+ } else {
+ listOf(node)
+ }
+ }
+ }
+
+ private fun List<ASTNode>.removeUselessTokens(): List<ASTNode> =
+ this.filterIndexed { index, node ->
+ !(node.type == MarkdownElementTypes.LINK_DEFINITION || (
+ node.type == MarkdownTokenTypes.EOL &&
+ this.getOrNull(index - 1)?.type == MarkdownTokenTypes.HARD_LINE_BREAK
+ ))
+ }
+
+ private fun List<DocTag>.trimSurroundingTokensIfText() = mapIndexed { index, elem ->
+ val elemTransformed = if (index == 0 && elem is Text) elem.copy(elem.body.trimStart()) else elem
+ if (index == lastIndex && elemTransformed is Text) elemTransformed.copy(elemTransformed.body.trimEnd()) else elemTransformed
+ }
+
+ private val notLeafNodes = listOf(
+ MarkdownTokenTypes.HORIZONTAL_RULE,
+ MarkdownTokenTypes.HARD_LINE_BREAK,
+ MarkdownTokenTypes.HTML_TAG,
+ MarkdownTokenTypes.HTML_BLOCK_CONTENT
+ )
+
+ private fun ASTNode.isNotLeaf() = this is CompositeASTNode || this.type in notLeafNodes
+
+ private fun List<ASTNode>.isNotLeaf(index: Int): Boolean =
+ if (index in 0..this.lastIndex)
+ this[index].isNotLeaf()
+ else
+ false
+
+ private fun List<ASTNode>.mergeLeafASTNodes(): List<ASTNode> {
+ val children: MutableList<ASTNode> = mutableListOf()
+ var index = 0
+ while (index <= this.lastIndex) {
+ if (this.isNotLeaf(index)) {
+ children += this[index]
+ } else {
+ val startOffset = this[index].startOffset
+ val sIndex = index
+ while (index < this.lastIndex) {
+ if (this.isNotLeaf(index + 1) || this[index + 1].startOffset != this[index].endOffset) {
+ children += mergedLeafNode(this, index, startOffset, sIndex)
+ break
+ }
+ index++
+ }
+ if (index == this.lastIndex) {
+ children += mergedLeafNode(this, index, startOffset, sIndex)
+ }
+ }
+ index++
+ }
+ return children
+ }
+
+ private fun mergedLeafNode(nodes: List<ASTNode>, index: Int, startOffset: Int, sIndex: Int): LeafASTNode {
+ val endOffset = nodes[index].endOffset
+ val type = if (nodes.subList(sIndex, index)
+ .any { it.type == MarkdownTokenTypes.CODE_LINE }
+ ) MarkdownTokenTypes.CODE_LINE else MarkdownTokenTypes.TEXT
+ return LeafASTNode(type, startOffset, endOffset)
+ }
+
+ private fun String.transform() = this
+ .replace(Regex("\n\n+"), "") // Squashing new lines between paragraphs
+ .replace(Regex("\n"), " ")
+ .replace(Regex(" >+ +"), " ") // Replacement used in blockquotes, get rid of garbage
+
+ private fun detailedException(baseMessage: String, node: ASTNode) =
+ IllegalStateException(
+ baseMessage + " in ${kdocLocation ?: "unspecified location"}, element starts from offset ${node.startOffset} and ends ${node.endOffset}: ${
+ text.substring(
+ node.startOffset,
+ node.endOffset
+ )
+ }"
+ )
+
+
+ companion object {
+ fun DRI.fqDeclarationName(): String? {
+ if (this.target !is PointingToDeclaration) {
+ return null
+ }
+ return listOfNotNull(this.packageName, this.classNames, this.callable?.name)
+ .joinToString(separator = ".")
+ .takeIf { it.isNotBlank() }
+ }
+ }
+}
+
diff --git a/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/ParseUtils.kt b/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/ParseUtils.kt
new file mode 100644
index 00000000..7f520591
--- /dev/null
+++ b/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/ParseUtils.kt
@@ -0,0 +1,39 @@
+package org.jetbrains.dokka.analysis.markdown.jb
+
+import org.intellij.markdown.lexer.Compat
+import org.intellij.markdown.lexer.Compat.forEachCodePoint
+import org.jetbrains.dokka.InternalDokkaApi
+import org.jetbrains.dokka.model.doc.DocTag
+import org.jetbrains.dokka.model.doc.Text
+import org.jsoup.internal.StringUtil
+import org.jsoup.nodes.Entities
+
+@InternalDokkaApi
+fun String.parseHtmlEncodedWithNormalisedSpaces(
+ renderWhiteCharactersAsSpaces: Boolean
+): List<DocTag> {
+ val accum = StringBuilder()
+ val tags = mutableListOf<DocTag>()
+ var lastWasWhite = false
+
+ forEachCodePoint { c ->
+ if (renderWhiteCharactersAsSpaces && StringUtil.isWhitespace(c)) {
+ if (!lastWasWhite) {
+ accum.append(' ')
+ lastWasWhite = true
+ }
+ } else if (Compat.codePointToString(c).let { it != Entities.escape(it) }) {
+ accum.toString().takeIf { it.isNotBlank() }?.let { tags.add(Text(it)) }
+ accum.delete(0, accum.length)
+
+ accum.appendCodePoint(c)
+ tags.add(Text(accum.toString(), params = DocTag.contentTypeParam("html")))
+ accum.delete(0, accum.length)
+ } else if (!StringUtil.isInvisibleChar(c)) {
+ accum.appendCodePoint(c)
+ lastWasWhite = false
+ }
+ }
+ accum.toString().takeIf { it.isNotBlank() }?.let { tags.add(Text(it)) }
+ return tags
+}
diff --git a/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/Parser.kt b/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/Parser.kt
new file mode 100644
index 00000000..09650b32
--- /dev/null
+++ b/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/Parser.kt
@@ -0,0 +1,131 @@
+package org.jetbrains.dokka.analysis.markdown.jb
+
+import org.jetbrains.dokka.InternalDokkaApi
+import org.jetbrains.dokka.model.doc.*
+
+@InternalDokkaApi
+abstract class Parser {
+
+ abstract fun parseStringToDocNode(extractedString: String): DocTag
+
+ protected abstract fun preparse(text: String): String
+
+ open fun parse(text: String): DocumentationNode =
+ DocumentationNode(extractTagsToListOfPairs(preparse(text)).map { (tag, content) -> parseTagWithBody(tag, content) })
+
+ protected open fun parseTagWithBody(tagName: String, content: String): TagWrapper =
+ when (tagName) {
+ "description" -> Description(parseStringToDocNode(content))
+ "author" -> Author(parseStringToDocNode(content))
+ "version" -> Version(parseStringToDocNode(content))
+ "since" -> Since(parseStringToDocNode(content))
+ "see" -> See(
+ parseStringToDocNode(content.substringAfter(' ')),
+ content.substringBefore(' '),
+ null
+ )
+ "param" -> Param(
+ parseStringToDocNode(content.substringAfter(' ')),
+ content.substringBefore(' ')
+ )
+ "property" -> Property(
+ parseStringToDocNode(content.substringAfter(' ')),
+ content.substringBefore(' ')
+ )
+ "return" -> Return(parseStringToDocNode(content))
+ "constructor" -> Constructor(parseStringToDocNode(content))
+ "receiver" -> Receiver(parseStringToDocNode(content))
+ "throws", "exception" -> Throws(
+ parseStringToDocNode(content.substringAfter(' ')),
+ content.substringBefore(' '),
+ null
+ )
+ "deprecated" -> Deprecated(parseStringToDocNode(content))
+ "sample" -> Sample(
+ parseStringToDocNode(content.substringAfter(' ')),
+ content.substringBefore(' ')
+ )
+ "suppress" -> Suppress(parseStringToDocNode(content))
+ else -> CustomTagWrapper(parseStringToDocNode(content), tagName)
+ }
+
+ /**
+ * KDoc parser from Kotlin compiler relies on a comment asterisk
+ * So there is a mini parser here
+ * TODO: at least to adapt [org.jetbrains.kotlin.kdoc.lexer.KDocLexer] to analyze KDoc without the asterisks and use it here
+ */
+ private fun extractTagsToListOfPairs(text: String): List<Pair<String, String>> =
+ "description $text"
+ .extractKDocSections()
+ .map { content ->
+ val contentWithEscapedAts = content.replace("\\@", "@")
+ val (tag, body) = contentWithEscapedAts.split(" ", limit = 2)
+ tag to body
+ }
+
+ /**
+ * Ignore a doc tag inside code spans and blocks
+ * @see org.jetbrains.kotlin.kdoc.psi.impl.KDocSection
+ */
+ private fun CharSequence.extractKDocSections(delimiter: String = "\n@"): List<String> {
+ var countOfBackticks = 0
+ var countOfTildes = 0
+ var countOfBackticksInOpeningFence = 0
+ var countOfTildesInOpeningFence = 0
+
+ var isInCode = false
+ val result = mutableListOf<String>()
+ var rangeStart = 0
+ var rangeEnd = 0
+ var currentOffset = 0
+ while (currentOffset < length) {
+
+ when (get(currentOffset)) {
+ '`' -> {
+ countOfBackticks++
+ countOfTildes = 0
+ }
+ '~' -> {
+ countOfTildes++
+ countOfBackticks = 0
+ }
+ else -> {
+ if (isInCode) {
+ // The closing code fence must be at least as long as the opening fence
+ if(countOfBackticks >= countOfBackticksInOpeningFence
+ || countOfTildes >= countOfTildesInOpeningFence)
+ isInCode = false
+ } else {
+ // as per CommonMark spec, there can be any number of backticks for a code span, not only one or three
+ if (countOfBackticks > 0) {
+ isInCode = true
+ countOfBackticksInOpeningFence = countOfBackticks
+ countOfTildesInOpeningFence = Int.MAX_VALUE
+ }
+ // tildes are only for a code block, not code span
+ if (countOfTildes >= 3) {
+ isInCode = true
+ countOfTildesInOpeningFence = countOfTildes
+ countOfBackticksInOpeningFence = Int.MAX_VALUE
+ }
+ }
+ countOfTildes = 0
+ countOfBackticks = 0
+ }
+ }
+ if (!isInCode && startsWith(delimiter, currentOffset)) {
+ result.add(substring(rangeStart, rangeEnd))
+ currentOffset += delimiter.length
+ rangeStart = currentOffset
+ rangeEnd = currentOffset
+ continue
+ }
+
+ ++rangeEnd
+ ++currentOffset
+ }
+ result.add(substring(rangeStart, rangeEnd))
+ return result
+ }
+
+}
diff --git a/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/factories/DocTagsFromIElementFactory.kt b/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/factories/DocTagsFromIElementFactory.kt
new file mode 100644
index 00000000..86de8000
--- /dev/null
+++ b/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/factories/DocTagsFromIElementFactory.kt
@@ -0,0 +1,86 @@
+package org.jetbrains.dokka.analysis.markdown.jb.factories
+
+import org.intellij.markdown.IElementType
+import org.intellij.markdown.MarkdownElementTypes
+import org.intellij.markdown.MarkdownTokenTypes
+import org.intellij.markdown.flavours.gfm.GFMElementTypes
+import org.intellij.markdown.flavours.gfm.GFMTokenTypes
+import org.jetbrains.dokka.analysis.markdown.jb.MARKDOWN_ELEMENT_FILE_NAME
+import org.jetbrains.dokka.analysis.markdown.jb.parseHtmlEncodedWithNormalisedSpaces
+import org.jetbrains.dokka.links.DRI
+import org.jetbrains.dokka.model.doc.*
+import org.jetbrains.dokka.model.doc.DocTag.Companion.contentTypeParam
+import org.jsoup.Jsoup
+
+internal object DocTagsFromIElementFactory {
+
+ @Suppress("IMPLICIT_CAST_TO_ANY")
+ fun getInstance(type: IElementType, children: List<DocTag> = emptyList(), params: Map<String, String> = emptyMap(), body: String? = null, dri: DRI? = null, keepFormatting: Boolean = false) =
+ when(type) {
+ MarkdownElementTypes.SHORT_REFERENCE_LINK,
+ MarkdownElementTypes.FULL_REFERENCE_LINK,
+ MarkdownElementTypes.INLINE_LINK -> if(dri == null) A(children, params) else DocumentationLink(dri, children, params)
+ MarkdownElementTypes.STRONG -> B(children, params)
+ MarkdownElementTypes.BLOCK_QUOTE -> BlockQuote(children, params)
+ MarkdownElementTypes.CODE_SPAN -> CodeInline(children, params)
+ MarkdownElementTypes.CODE_BLOCK,
+ MarkdownElementTypes.CODE_FENCE -> CodeBlock(children, params)
+ MarkdownElementTypes.ATX_1 -> H1(children, params)
+ MarkdownElementTypes.ATX_2 -> H2(children, params)
+ MarkdownElementTypes.ATX_3 -> H3(children, params)
+ MarkdownElementTypes.ATX_4 -> H4(children, params)
+ MarkdownElementTypes.ATX_5 -> H5(children, params)
+ MarkdownElementTypes.ATX_6 -> H6(children, params)
+ MarkdownElementTypes.EMPH -> I(children, params)
+ MarkdownElementTypes.IMAGE -> Img(children, params)
+ MarkdownElementTypes.LIST_ITEM -> Li(children, params)
+ MarkdownElementTypes.ORDERED_LIST -> Ol(children, params)
+ MarkdownElementTypes.UNORDERED_LIST -> Ul(children, params)
+ MarkdownElementTypes.PARAGRAPH -> P(children, params)
+ MarkdownTokenTypes.TEXT -> if (keepFormatting) Text(
+ body.orEmpty(),
+ children,
+ params
+ ) else {
+ // corner case: there are only spaces between two Markdown nodes
+ val containsOnlySpaces = body?.isNotEmpty() == true && body.all { it.isWhitespace() }
+ if (containsOnlySpaces) Text(" ", children, params)
+ else body?.parseWithNormalisedSpaces(renderWhiteCharactersAsSpaces = false).orEmpty()
+ }
+ MarkdownTokenTypes.HORIZONTAL_RULE -> HorizontalRule
+ MarkdownTokenTypes.HARD_LINE_BREAK -> Br
+ GFMElementTypes.STRIKETHROUGH -> Strikethrough(children, params)
+ GFMElementTypes.TABLE -> Table(children, params)
+ GFMElementTypes.HEADER -> Th(children, params)
+ GFMElementTypes.ROW -> Tr(children, params)
+ GFMTokenTypes.CELL -> Td(children, params)
+ MarkdownElementTypes.MARKDOWN_FILE -> CustomDocTag(children, params, MARKDOWN_ELEMENT_FILE_NAME)
+ MarkdownElementTypes.HTML_BLOCK,
+ MarkdownTokenTypes.HTML_TAG,
+ MarkdownTokenTypes.HTML_BLOCK_CONTENT -> Text(body.orEmpty(), params = params + contentTypeParam("html"))
+ else -> CustomDocTag(children, params, type.name)
+ }.let {
+ @Suppress("UNCHECKED_CAST")
+ when (it) {
+ is List<*> -> it as List<DocTag>
+ else -> listOf(it as DocTag)
+ }
+ }
+
+ /**
+ * Parses string into [Text] doc tags that can have either value of the string or html-encoded value with content-type=html parameter.
+ * Content type is added when dealing with html entries like `&nbsp;`
+ */
+ private fun String.parseWithNormalisedSpaces(
+ renderWhiteCharactersAsSpaces: Boolean
+ ): List<DocTag> {
+ if (!requiresHtmlEncoding()) {
+ return parseHtmlEncodedWithNormalisedSpaces(renderWhiteCharactersAsSpaces)
+ }
+ // parsing it using jsoup is required to get codePoints, otherwise they are interpreted separately, as chars
+ // But we dont need to do it for java as it is already parsed with jsoup
+ return Jsoup.parseBodyFragment(this).body().wholeText().parseHtmlEncodedWithNormalisedSpaces(renderWhiteCharactersAsSpaces)
+ }
+
+ private fun String.requiresHtmlEncoding(): Boolean = indexOf('&') != -1
+}