aboutsummaryrefslogtreecommitdiff
path: root/plugins/base/src/main/kotlin/parsers/MarkdownParser.kt
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/base/src/main/kotlin/parsers/MarkdownParser.kt')
-rw-r--r--plugins/base/src/main/kotlin/parsers/MarkdownParser.kt420
1 files changed, 420 insertions, 0 deletions
diff --git a/plugins/base/src/main/kotlin/parsers/MarkdownParser.kt b/plugins/base/src/main/kotlin/parsers/MarkdownParser.kt
new file mode 100644
index 00000000..05eb71ab
--- /dev/null
+++ b/plugins/base/src/main/kotlin/parsers/MarkdownParser.kt
@@ -0,0 +1,420 @@
+package org.jetbrains.dokka.base.parsers
+
+import com.intellij.psi.PsiElement
+import org.jetbrains.dokka.model.doc.*
+import org.intellij.markdown.MarkdownElementTypes
+import org.intellij.markdown.MarkdownTokenTypes
+import org.intellij.markdown.ast.ASTNode
+import org.intellij.markdown.ast.CompositeASTNode
+import org.intellij.markdown.ast.LeafASTNode
+import org.intellij.markdown.ast.impl.ListItemCompositeNode
+import org.intellij.markdown.flavours.gfm.GFMElementTypes
+import org.intellij.markdown.flavours.gfm.GFMFlavourDescriptor
+import org.intellij.markdown.flavours.gfm.GFMTokenTypes
+import org.jetbrains.dokka.analysis.DokkaResolutionFacade
+import org.jetbrains.dokka.analysis.from
+import org.jetbrains.dokka.links.DRI
+import org.jetbrains.dokka.base.parsers.factories.DocTagsFromIElementFactory
+import org.jetbrains.dokka.utilities.DokkaLogger
+import org.jetbrains.kotlin.descriptors.ClassDescriptor
+import org.jetbrains.kotlin.descriptors.DeclarationDescriptor
+import org.jetbrains.kotlin.idea.kdoc.resolveKDocLink
+import org.jetbrains.kotlin.kdoc.parser.KDocKnownTag
+import org.jetbrains.kotlin.kdoc.psi.impl.KDocSection
+import org.jetbrains.kotlin.kdoc.psi.impl.KDocTag
+import java.net.MalformedURLException
+import org.intellij.markdown.parser.MarkdownParser as IntellijMarkdownParser
+
+class MarkdownParser(
+ private val resolutionFacade: DokkaResolutionFacade? = null,
+ private val declarationDescriptor: DeclarationDescriptor? = null,
+ private val logger: DokkaLogger
+) : Parser() {
+
+ inner class MarkdownVisitor(val text: String, val destinationLinksMap: Map<String, String>) {
+
+ private fun headersHandler(node: ASTNode): DocTag =
+ DocTagsFromIElementFactory.getInstance(
+ node.type,
+ visitNode(node.children.find { it.type == MarkdownTokenTypes.ATX_CONTENT }
+ ?: throw IllegalStateException("Wrong AST Tree. ATX Header does not contain expected content")).children
+ )
+
+ private fun horizontalRulesHandler(node: ASTNode): DocTag =
+ DocTagsFromIElementFactory.getInstance(MarkdownTokenTypes.HORIZONTAL_RULE)
+
+ private fun emphasisHandler(node: ASTNode): DocTag =
+ DocTagsFromIElementFactory.getInstance(
+ node.type,
+ children = listOf(visitNode(node.children[node.children.size / 2]))
+ )
+
+ private fun blockquotesHandler(node: ASTNode): DocTag =
+ DocTagsFromIElementFactory.getInstance(
+ node.type, children = node.children
+ .filterIsInstance<CompositeASTNode>()
+ .evaluateChildren()
+ )
+
+ private fun listsHandler(node: ASTNode): DocTag {
+
+ val children = node.children.filterIsInstance<ListItemCompositeNode>().flatMap {
+ if (it.children.last().type in listOf(
+ MarkdownElementTypes.ORDERED_LIST,
+ MarkdownElementTypes.UNORDERED_LIST
+ )
+ ) {
+ val nestedList = it.children.last()
+ (it.children as MutableList).removeAt(it.children.lastIndex)
+ listOf(it, nestedList)
+ } else
+ listOf(it)
+ }
+
+ return DocTagsFromIElementFactory.getInstance(
+ node.type,
+ children =
+ children
+ .map {
+ if (it.type == MarkdownElementTypes.LIST_ITEM)
+ DocTagsFromIElementFactory.getInstance(
+ it.type,
+ children = it
+ .children
+ .filterIsInstance<CompositeASTNode>()
+ .evaluateChildren()
+ )
+ else
+ visitNode(it)
+ },
+ params =
+ if (node.type == MarkdownElementTypes.ORDERED_LIST) {
+ val listNumberNode = node.children.first().children.first()
+ mapOf(
+ "start" to text.substring(
+ listNumberNode.startOffset,
+ listNumberNode.endOffset
+ ).trim().dropLast(1)
+ )
+ } else
+ emptyMap()
+ )
+ }
+
+ private fun resolveDRI(mdLink: String): DRI? =
+ mdLink
+ .removePrefix("[")
+ .removeSuffix("]")
+ .let { link ->
+ try {
+ java.net.URL(link)
+ null
+ } catch (e: MalformedURLException) {
+ try {
+ if (resolutionFacade != null && declarationDescriptor != null) {
+ resolveKDocLink(
+ resolutionFacade.resolveSession.bindingContext,
+ resolutionFacade,
+ declarationDescriptor,
+ null,
+ link.split('.')
+ ).minByOrNull { it is ClassDescriptor }?.let { DRI.from(it) }
+ } else null
+ } catch (e1: IllegalArgumentException) {
+ logger.warn("Couldn't resolve link for $mdLink")
+ null
+ }
+ }
+ }
+
+ private fun referenceLinksHandler(node: ASTNode): DocTag {
+ val linkLabel = node.children.find { it.type == MarkdownElementTypes.LINK_LABEL }
+ ?: throw IllegalStateException("Wrong AST Tree. Reference link does not contain expected content")
+ val linkText = node.children.findLast { it.type == MarkdownElementTypes.LINK_TEXT } ?: linkLabel
+
+ val linkKey = text.substring(linkLabel.startOffset, linkLabel.endOffset)
+
+ val link = destinationLinksMap[linkKey.toLowerCase()] ?: linkKey
+
+ return linksHandler(linkText, link)
+ }
+
+ private fun inlineLinksHandler(node: ASTNode): DocTag {
+ val linkText = node.children.find { it.type == MarkdownElementTypes.LINK_TEXT }
+ ?: throw IllegalStateException("Wrong AST Tree. Inline link does not contain expected content")
+ val linkDestination = node.children.find { it.type == MarkdownElementTypes.LINK_DESTINATION }
+ ?: throw IllegalStateException("Wrong AST Tree. Inline link does not contain expected content")
+ val linkTitle = node.children.find { it.type == MarkdownElementTypes.LINK_TITLE }
+
+ val link = text.substring(linkDestination.startOffset, linkDestination.endOffset)
+
+ return linksHandler(linkText, link, linkTitle)
+ }
+
+ private fun autoLinksHandler(node: ASTNode): DocTag {
+ val link = text.substring(node.startOffset + 1, node.endOffset - 1)
+
+ return linksHandler(node, link)
+ }
+
+ private fun linksHandler(linkText: ASTNode, link: String, linkTitle: ASTNode? = null): DocTag {
+ val dri: DRI? = resolveDRI(link)
+ val params = if (linkTitle == null)
+ mapOf("href" to link)
+ else
+ mapOf("href" to link, "title" to text.substring(linkTitle.startOffset + 1, linkTitle.endOffset - 1))
+
+ return DocTagsFromIElementFactory.getInstance(
+ MarkdownElementTypes.INLINE_LINK,
+ params = params,
+ children = linkText.children.drop(1).dropLast(1).evaluateChildren(),
+ dri = dri
+ )
+ }
+
+ private fun imagesHandler(node: ASTNode): DocTag {
+ val linkNode =
+ node.children.last().children.find { it.type == MarkdownElementTypes.LINK_LABEL }!!.children[1]
+ val link = text.substring(linkNode.startOffset, linkNode.endOffset)
+ val src = mapOf("src" to link)
+ return DocTagsFromIElementFactory.getInstance(
+ node.type,
+ params = src,
+ children = listOf(visitNode(node.children.last().children.find { it.type == MarkdownElementTypes.LINK_TEXT }!!))
+ )
+ }
+
+ private fun codeSpansHandler(node: ASTNode): DocTag =
+ DocTagsFromIElementFactory.getInstance(
+ node.type,
+ children = listOf(
+ DocTagsFromIElementFactory.getInstance(
+ MarkdownTokenTypes.TEXT,
+ body = text.substring(node.startOffset + 1, node.endOffset - 1).replace('\n', ' ').trimIndent()
+ )
+
+ )
+ )
+
+ private fun codeFencesHandler(node: ASTNode): DocTag =
+ DocTagsFromIElementFactory.getInstance(
+ node.type,
+ children = node
+ .children
+ .dropWhile { it.type != MarkdownTokenTypes.CODE_FENCE_CONTENT }
+ .dropLastWhile { it.type != MarkdownTokenTypes.CODE_FENCE_CONTENT }
+ .map {
+ if (it.type == MarkdownTokenTypes.EOL)
+ LeafASTNode(MarkdownTokenTypes.HARD_LINE_BREAK, 0, 0)
+ else
+ it
+ }.evaluateChildren(),
+ params = node
+ .children
+ .find { it.type == MarkdownTokenTypes.FENCE_LANG }
+ ?.let { mapOf("lang" to text.substring(it.startOffset, it.endOffset)) }
+ ?: emptyMap()
+ )
+
+ private fun codeBlocksHandler(node: ASTNode): DocTag =
+ DocTagsFromIElementFactory.getInstance(node.type, children = node.children.evaluateChildren())
+
+ private fun defaultHandler(node: ASTNode): DocTag =
+ DocTagsFromIElementFactory.getInstance(
+ MarkdownElementTypes.PARAGRAPH,
+ children = node.children.evaluateChildren()
+ )
+
+ fun visitNode(node: ASTNode): DocTag =
+ when (node.type) {
+ MarkdownElementTypes.ATX_1,
+ MarkdownElementTypes.ATX_2,
+ MarkdownElementTypes.ATX_3,
+ MarkdownElementTypes.ATX_4,
+ MarkdownElementTypes.ATX_5,
+ MarkdownElementTypes.ATX_6 -> headersHandler(node)
+ MarkdownTokenTypes.HORIZONTAL_RULE -> horizontalRulesHandler(node)
+ MarkdownElementTypes.STRONG,
+ MarkdownElementTypes.EMPH -> emphasisHandler(node)
+ MarkdownElementTypes.FULL_REFERENCE_LINK,
+ MarkdownElementTypes.SHORT_REFERENCE_LINK -> referenceLinksHandler(node)
+ MarkdownElementTypes.INLINE_LINK -> inlineLinksHandler(node)
+ MarkdownElementTypes.AUTOLINK -> autoLinksHandler(node)
+ MarkdownElementTypes.BLOCK_QUOTE -> blockquotesHandler(node)
+ MarkdownElementTypes.UNORDERED_LIST,
+ MarkdownElementTypes.ORDERED_LIST -> listsHandler(node)
+ MarkdownElementTypes.CODE_BLOCK -> codeBlocksHandler(node)
+ MarkdownElementTypes.CODE_FENCE -> codeFencesHandler(node)
+ MarkdownElementTypes.CODE_SPAN -> codeSpansHandler(node)
+ MarkdownElementTypes.IMAGE -> imagesHandler(node)
+ MarkdownTokenTypes.HARD_LINE_BREAK -> DocTagsFromIElementFactory.getInstance(node.type)
+ MarkdownTokenTypes.CODE_FENCE_CONTENT,
+ MarkdownTokenTypes.CODE_LINE,
+ MarkdownTokenTypes.TEXT -> DocTagsFromIElementFactory.getInstance(
+ MarkdownTokenTypes.TEXT,
+ body = text
+ .substring(node.startOffset, node.endOffset).transform()
+ )
+ MarkdownElementTypes.MARKDOWN_FILE -> if (node.children.size == 1) visitNode(node.children.first()) else defaultHandler(
+ node
+ )
+ GFMElementTypes.STRIKETHROUGH -> DocTagsFromIElementFactory.getInstance(
+ GFMElementTypes.STRIKETHROUGH,
+ body = text
+ .substring(node.startOffset, node.endOffset).transform()
+ )
+ GFMElementTypes.TABLE -> DocTagsFromIElementFactory.getInstance(
+ GFMElementTypes.TABLE,
+ children = node.children.filterTabSeparators().evaluateChildren()
+ )
+ GFMElementTypes.HEADER -> DocTagsFromIElementFactory.getInstance(
+ GFMElementTypes.HEADER,
+ children = node.children.filterTabSeparators().evaluateChildren()
+ )
+ GFMElementTypes.ROW -> DocTagsFromIElementFactory.getInstance(
+ GFMElementTypes.ROW,
+ children = node.children.filterTabSeparators().evaluateChildren()
+ )
+ else -> defaultHandler(node)
+ }
+
+ private fun List<ASTNode>.filterTabSeparators() =
+ this.filterNot { it.type == GFMTokenTypes.TABLE_SEPARATOR }
+
+ private fun List<ASTNode>.evaluateChildren(): List<DocTag> =
+ this.removeUselessTokens().mergeLeafASTNodes().map { visitNode(it) }
+
+ private fun List<ASTNode>.removeUselessTokens(): List<ASTNode> =
+ this.filterIndexed { index, node ->
+ !(node.type == MarkdownElementTypes.LINK_DEFINITION || (
+ node.type == MarkdownTokenTypes.EOL &&
+ this.getOrNull(index - 1)?.type == MarkdownTokenTypes.HARD_LINE_BREAK
+ ))
+ }
+
+ private val notLeafNodes = listOf(MarkdownTokenTypes.HORIZONTAL_RULE, MarkdownTokenTypes.HARD_LINE_BREAK)
+
+ private fun List<ASTNode>.isNotLeaf(index: Int): Boolean =
+ if (index in 0..this.lastIndex)
+ (this[index] is CompositeASTNode) || this[index].type in notLeafNodes
+ else
+ false
+
+ private fun List<ASTNode>.mergeLeafASTNodes(): List<ASTNode> {
+ val children: MutableList<ASTNode> = mutableListOf()
+ var index = 0
+ while (index <= this.lastIndex) {
+ if (this.isNotLeaf(index)) {
+ children += this[index]
+ } else {
+ val startOffset = this[index].startOffset
+ while (index < this.lastIndex) {
+ if (this.isNotLeaf(index + 1) || this[index + 1].startOffset != this[index].endOffset) {
+ val endOffset = this[index].endOffset
+ if (text.substring(startOffset, endOffset).transform().trim().isNotEmpty())
+ children += LeafASTNode(MarkdownTokenTypes.TEXT, startOffset, endOffset)
+ break
+ }
+ index++
+ }
+ if (index == this.lastIndex) {
+ val endOffset = this[index].endOffset
+ if (text.substring(startOffset, endOffset).transform().trim().isNotEmpty())
+ children += LeafASTNode(MarkdownTokenTypes.TEXT, startOffset, endOffset)
+ }
+ }
+ index++
+ }
+ return children
+ }
+
+ private fun String.transform() = this
+ .replace(Regex("\n\n+"), "") // Squashing new lines between paragraphs
+ .replace(Regex("\n"), " ")
+ .replace(Regex(" >+ +"), " ") // Replacement used in blockquotes, get rid of garbage
+ }
+
+
+ private fun getAllDestinationLinks(text: String, node: ASTNode): List<Pair<String, String>> =
+ node.children
+ .filter { it.type == MarkdownElementTypes.LINK_DEFINITION }
+ .map {
+ text.substring(it.children[0].startOffset, it.children[0].endOffset).toLowerCase() to
+ text.substring(it.children[2].startOffset, it.children[2].endOffset)
+ } +
+ node.children.filterIsInstance<CompositeASTNode>().flatMap { getAllDestinationLinks(text, it) }
+
+
+ private fun markdownToDocNode(text: String): DocTag {
+
+ val flavourDescriptor = GFMFlavourDescriptor()
+ val markdownAstRoot: ASTNode = IntellijMarkdownParser(flavourDescriptor).buildMarkdownTreeFromString(text)
+
+ return MarkdownVisitor(text, getAllDestinationLinks(text, markdownAstRoot).toMap()).visitNode(markdownAstRoot)
+ }
+
+ override fun parseStringToDocNode(extractedString: String) = markdownToDocNode(extractedString)
+ override fun preparse(text: String) = text
+
+ private fun findParent(kDoc: PsiElement): PsiElement =
+ if (kDoc is KDocSection) findParent(kDoc.parent) else kDoc
+
+ private fun getAllKDocTags(kDocImpl: PsiElement): List<KDocTag> =
+ kDocImpl.children.filterIsInstance<KDocTag>().filterNot { it is KDocSection } + kDocImpl.children.flatMap {
+ getAllKDocTags(
+ it
+ )
+ }
+
+ fun parseFromKDocTag(kDocTag: KDocTag?): DocumentationNode {
+ return if (kDocTag == null)
+ DocumentationNode(emptyList())
+ else
+ DocumentationNode(
+ (listOf(kDocTag) + getAllKDocTags(findParent(kDocTag))).map {
+ when (it.knownTag) {
+ null -> if (it.name == null) Description(parseStringToDocNode(it.getContent())) else CustomTagWrapper(
+ parseStringToDocNode(it.getContent()),
+ it.name!!
+ )
+ KDocKnownTag.AUTHOR -> Author(parseStringToDocNode(it.getContent()))
+ KDocKnownTag.THROWS -> Throws(
+ parseStringToDocNode(it.getContent()),
+ it.getSubjectName().orEmpty()
+ )
+ KDocKnownTag.EXCEPTION -> Throws(
+ parseStringToDocNode(it.getContent()),
+ it.getSubjectName().orEmpty()
+ )
+ KDocKnownTag.PARAM -> Param(
+ parseStringToDocNode(it.getContent()),
+ it.getSubjectName().orEmpty()
+ )
+ KDocKnownTag.RECEIVER -> Receiver(parseStringToDocNode(it.getContent()))
+ KDocKnownTag.RETURN -> Return(parseStringToDocNode(it.getContent()))
+ KDocKnownTag.SEE -> See(
+ parseStringToDocNode(it.getContent()),
+ it.getSubjectName().orEmpty(),
+ parseStringToDocNode("[${it.getSubjectName()}]")
+ .let {
+ val link = it.children[0]
+ if (link is DocumentationLink) link.dri
+ else null
+ }
+ )
+ KDocKnownTag.SINCE -> Since(parseStringToDocNode(it.getContent()))
+ KDocKnownTag.CONSTRUCTOR -> Constructor(parseStringToDocNode(it.getContent()))
+ KDocKnownTag.PROPERTY -> Property(
+ parseStringToDocNode(it.getContent()),
+ it.getSubjectName().orEmpty()
+ )
+ KDocKnownTag.SAMPLE -> Sample(
+ parseStringToDocNode(it.getContent()),
+ it.getSubjectName().orEmpty()
+ )
+ KDocKnownTag.SUPPRESS -> Suppress(parseStringToDocNode(it.getContent()))
+ }
+ }
+ )
+ }
+} \ No newline at end of file