aboutsummaryrefslogtreecommitdiff
path: root/subprojects/analysis-markdown-jb/src
diff options
context:
space:
mode:
Diffstat (limited to 'subprojects/analysis-markdown-jb/src')
-rw-r--r--subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/MarkdownApi.kt12
-rw-r--r--subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/MarkdownParser.kt554
-rw-r--r--subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/ParseUtils.kt43
-rw-r--r--subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/Parser.kt135
-rw-r--r--subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/factories/DocTagsFromIElementFactory.kt90
5 files changed, 0 insertions, 834 deletions
diff --git a/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/MarkdownApi.kt b/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/MarkdownApi.kt
deleted file mode 100644
index bc56b596..00000000
--- a/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/MarkdownApi.kt
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- * Copyright 2014-2023 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license.
- */
-
-package org.jetbrains.dokka.analysis.markdown.jb
-
-import org.intellij.markdown.MarkdownElementTypes
-import org.jetbrains.dokka.InternalDokkaApi
-
-// TODO [beresnev] move/rename if it's only used for CustomDocTag. for now left as is for compatibility
-@InternalDokkaApi
-public val MARKDOWN_ELEMENT_FILE_NAME: String = MarkdownElementTypes.MARKDOWN_FILE.name
diff --git a/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/MarkdownParser.kt b/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/MarkdownParser.kt
deleted file mode 100644
index 130c6def..00000000
--- a/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/MarkdownParser.kt
+++ /dev/null
@@ -1,554 +0,0 @@
-/*
- * Copyright 2014-2023 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license.
- */
-
-package org.jetbrains.dokka.analysis.markdown.jb
-
-import org.intellij.markdown.MarkdownElementTypes
-import org.intellij.markdown.MarkdownTokenTypes
-import org.intellij.markdown.ast.ASTNode
-import org.intellij.markdown.ast.CompositeASTNode
-import org.intellij.markdown.ast.LeafASTNode
-import org.intellij.markdown.ast.impl.ListItemCompositeNode
-import org.intellij.markdown.flavours.gfm.GFMElementTypes
-import org.intellij.markdown.flavours.gfm.GFMFlavourDescriptor
-import org.intellij.markdown.flavours.gfm.GFMTokenTypes
-import org.intellij.markdown.html.HtmlGenerator
-import org.jetbrains.dokka.InternalDokkaApi
-import org.jetbrains.dokka.analysis.markdown.jb.factories.DocTagsFromIElementFactory
-import org.jetbrains.dokka.links.DRI
-import org.jetbrains.dokka.links.PointingToDeclaration
-import org.jetbrains.dokka.model.doc.*
-import java.net.MalformedURLException
-import java.net.URL
-import org.intellij.markdown.parser.MarkdownParser as IntellijMarkdownParser
-
-@InternalDokkaApi
-public open class MarkdownParser(
- private val externalDri: (String) -> DRI?,
- private val kdocLocation: String?,
-) : Parser() {
-
- private lateinit var destinationLinksMap: Map<String, String>
- private lateinit var text: String
-
- override fun parseStringToDocNode(extractedString: String): DocTag {
- val gfmFlavourDescriptor = GFMFlavourDescriptor()
- val markdownAstRoot = IntellijMarkdownParser(gfmFlavourDescriptor).buildMarkdownTreeFromString(extractedString)
- destinationLinksMap = getAllDestinationLinks(extractedString, markdownAstRoot).toMap()
- text = extractedString
-
- val parsed = visitNode(markdownAstRoot)
- if (parsed.size == 1) {
- return parsed.first()
- }
- return CustomDocTag(children = parsed, params = emptyMap(), name = "")
- }
-
- override fun preparse(text: String): String = text.replace("\r\n", "\n").replace("\r", "\n")
-
- override fun parseTagWithBody(tagName: String, content: String): TagWrapper =
- when (tagName) {
- "see" -> {
- val referencedName = content.substringBefore(' ')
- val dri = externalDri(referencedName)
- See(
- parseStringToDocNode(content.substringAfter(' ')),
- dri?.fqDeclarationName() ?: referencedName,
- dri
- )
- }
- "throws", "exception" -> {
- val dri = externalDri(content.substringBefore(' '))
- Throws(
- parseStringToDocNode(content.substringAfter(' ')),
- dri?.fqDeclarationName() ?: content.substringBefore(' '),
- dri
- )
- }
- else -> super.parseTagWithBody(tagName, content)
- }
-
- private fun headersHandler(node: ASTNode) =
- DocTagsFromIElementFactory.getInstance(
- node.type,
- visitNode(node.children.find { it.type == MarkdownTokenTypes.ATX_CONTENT }
- ?: throw detailedException("Wrong AST Tree. Header does not contain expected content", node)
- ).flatMap { it.children }
- )
-
- /**
- * Handler for [MarkdownTokenTypes.ATX_CONTENT], which is the content of the header
- * elements like [MarkdownElementTypes.ATX_1], [MarkdownElementTypes.ATX_2] and so on.
- *
- * For example, a header line like `# Header text` is expected to be parsed into:
- * - One [MarkdownTokenTypes.ATX_HEADER] with startOffset = 0, endOffset = 1 (only the `#` symbol)
- * - Composite [MarkdownTokenTypes.ATX_CONTENT] with four children: WHITE_SPACE, TEXT, WHITE_SPACE, TEXT.
- */
- private fun headerContentHandler(node: ASTNode): List<DocTag> {
- // ATX_CONTENT contains everything after the `#` symbol, so if there's a space
- // in-between the `#` symbol and the text (like `# header`), it will be present here too.
- // However, we don't need the leading space between the `#` symbol and the text, nor do we need trailing spaces,
- // so we just skip it (otherwise the header text will be parsed as `<whitespace>header` instead of `header`).
- // If there's more space between `#` and text, like `# header`, it will still be a single WHITE_SPACE
- // element, but it will be wider, so the solution below should still hold. The same applies to trailing spaces.
- val trimmedChildren = node.children.trimWhitespaceToken()
-
- val children = trimmedChildren.evaluateChildren()
- return DocTagsFromIElementFactory.getInstance(
- MarkdownElementTypes.PARAGRAPH, // PARAGRAPH instead of TEXT to preserve compatibility with prev. versions
- children = children
- )
- }
-
- /**
- * @return a sublist of [this] list that does not contain
- * leading and trailing [MarkdownTokenTypes.WHITE_SPACE] elements
- */
- private fun List<ASTNode>.trimWhitespaceToken(): List<ASTNode> {
- val firstNonWhitespaceIndex = this.indexOfFirst { it.type != MarkdownTokenTypes.WHITE_SPACE }
- if (firstNonWhitespaceIndex == -1) {
- return this
- }
- val lastNonWhitespaceIndex = this.indexOfLast { it.type != MarkdownTokenTypes.WHITE_SPACE }
-
- return this.subList(firstNonWhitespaceIndex, lastNonWhitespaceIndex + 1)
- }
-
- private fun horizontalRulesHandler() =
- DocTagsFromIElementFactory.getInstance(MarkdownTokenTypes.HORIZONTAL_RULE)
-
- private fun emphasisHandler(node: ASTNode) =
- DocTagsFromIElementFactory.getInstance(
- node.type,
- children = node.children.evaluateChildrenWithDroppedEnclosingTokens(1)
- )
-
- private fun strongHandler(node: ASTNode) =
- DocTagsFromIElementFactory.getInstance(
- node.type,
- children = node.children.evaluateChildrenWithDroppedEnclosingTokens(2)
- )
-
- private fun List<ASTNode>.evaluateChildrenWithDroppedEnclosingTokens(count: Int) =
- drop(count).dropLast(count).evaluateChildren()
-
- private fun blockquotesHandler(node: ASTNode) =
- DocTagsFromIElementFactory.getInstance(
- node.type, children = node.children
- .filterIsInstance<CompositeASTNode>()
- .evaluateChildren()
- )
-
- private fun listsHandler(node: ASTNode): List<DocTag> {
-
- val children = node.children.filterIsInstance<ListItemCompositeNode>().flatMap {
- if (it.children.last().type in listOf(
- MarkdownElementTypes.ORDERED_LIST,
- MarkdownElementTypes.UNORDERED_LIST
- )
- ) {
- val nestedList = it.children.last()
- (it.children as MutableList).removeAt(it.children.lastIndex)
- listOf(it, nestedList)
- } else
- listOf(it)
- }
-
- return DocTagsFromIElementFactory.getInstance(
- node.type,
- children =
- children
- .flatMap {
- if (it.type == MarkdownElementTypes.LIST_ITEM)
- DocTagsFromIElementFactory.getInstance(
- it.type,
- children = it
- .children
- .filterIsInstance<CompositeASTNode>()
- .evaluateChildren()
- )
- else
- visitNode(it)
- },
- params =
- if (node.type == MarkdownElementTypes.ORDERED_LIST) {
- val listNumberNode = node.children.first().children.first()
- mapOf(
- "start" to text.substring(
- listNumberNode.startOffset,
- listNumberNode.endOffset
- ).trim().dropLast(1)
- )
- } else
- emptyMap()
- )
- }
-
- private fun resolveDRI(mdLink: String): DRI? =
- mdLink
- .removePrefix("[")
- .removeSuffix("]")
- .let { link ->
- try {
- URL(link)
- null
- } catch (e: MalformedURLException) {
- externalDri(link)
- }
- }
-
- private fun getAllDestinationLinks(text: String, node: ASTNode): List<Pair<String, String>> =
- node.children
- .filter { it.type == MarkdownElementTypes.LINK_DEFINITION }
- .map {
- text.substring(it.children[0].startOffset, it.children[0].endOffset).toLowerCase() to
- text.substring(it.children[2].startOffset, it.children[2].endOffset)
- } +
- node.children.filterIsInstance<CompositeASTNode>().flatMap { getAllDestinationLinks(text, it) }
-
-
- private fun referenceLinksHandler(node: ASTNode): List<DocTag> {
- val linkLabel = node.children.find { it.type == MarkdownElementTypes.LINK_LABEL }
- ?: throw detailedException("Wrong AST Tree. Reference link does not contain link label", node)
- val linkText = node.children.findLast { it.type == MarkdownElementTypes.LINK_TEXT } ?: linkLabel
-
- val linkKey = text.substring(linkLabel.startOffset, linkLabel.endOffset)
-
- val link = destinationLinksMap[linkKey.toLowerCase()] ?: linkKey
-
- return linksHandler(linkText, link)
- }
-
- private fun inlineLinksHandler(node: ASTNode): List<DocTag> {
- val linkText = node.children.find { it.type == MarkdownElementTypes.LINK_TEXT }
- ?: throw detailedException("Wrong AST Tree. Inline link does not contain link text", node)
- val linkDestination = node.children.find { it.type == MarkdownElementTypes.LINK_DESTINATION }
- val linkTitle = node.children.find { it.type == MarkdownElementTypes.LINK_TITLE }
-
- // Link destination may be ommited: https://github.github.com/gfm/#example-495
- val link = linkDestination?.let { text.substring(it.startOffset, it.endOffset) }
-
- return linksHandler(linkText, link, linkTitle)
- }
-
- private fun markdownFileHandler(node: ASTNode) =
- DocTagsFromIElementFactory.getInstance(
- node.type,
- children = node.children
- .filterSpacesAndEOL()
- .evaluateChildren()
- )
-
- private fun autoLinksHandler(node: ASTNode): List<DocTag> {
- val link = text.substring(node.startOffset + 1, node.endOffset - 1)
-
- return linksHandler(node, link)
- }
-
- private fun linksHandler(linkText: ASTNode, link: String?, linkTitle: ASTNode? = null): List<DocTag> {
- val dri: DRI? = link?.let { resolveDRI(it) }
- val linkOrEmpty = link ?: ""
- val linkTextString =
- if (linkTitle == null) linkOrEmpty else text.substring(linkTitle.startOffset + 1, linkTitle.endOffset - 1)
-
- val params = if (linkTitle == null)
- mapOf("href" to linkOrEmpty)
- else
- mapOf("href" to linkOrEmpty, "title" to linkTextString)
-
- return if (link != null && dri == null && !linkOrEmpty.isRemoteLink()) {
- DocTagsFromIElementFactory.getInstance(
- MarkdownTokenTypes.TEXT,
- params = params,
- children = linkText.children.drop(1).dropLast(1).evaluateChildren(),
- body = linkTextString.removeSurrounding("[", "]")
- )
- } else {
- DocTagsFromIElementFactory.getInstance(
- MarkdownElementTypes.INLINE_LINK,
- params = params,
- children = linkText.children.drop(1).dropLast(1).evaluateChildren(),
- dri = dri
- )
- }
- }
-
- private fun codeLineHandler(node: ASTNode) = DocTagsFromIElementFactory.getInstance(
- MarkdownElementTypes.CODE_BLOCK,
- body = text.substring(node.startOffset, node.endOffset)
- )
-
- private fun textHandler(node: ASTNode, keepAllFormatting: Boolean) = DocTagsFromIElementFactory.getInstance(
- MarkdownTokenTypes.TEXT,
- body = text.substring(node.startOffset, node.endOffset).transform(),
- keepFormatting = keepAllFormatting
- )
-
- private fun strikeThroughHandler(node: ASTNode) = DocTagsFromIElementFactory.getInstance(
- node.type,
- children = node.children.evaluateChildrenWithDroppedEnclosingTokens(2)
- )
-
- private fun tableHandler(node: ASTNode) = DocTagsFromIElementFactory.getInstance(
- GFMElementTypes.TABLE,
- children = node.children
- .filter { it.type == GFMElementTypes.ROW || it.type == GFMElementTypes.HEADER }
- .evaluateChildren()
- )
-
- private fun headerHandler(node: ASTNode) = DocTagsFromIElementFactory.getInstance(
- GFMElementTypes.HEADER,
- children = node.children
- .filter { it.type == GFMTokenTypes.CELL }
- .evaluateChildren()
- )
-
- private fun rowHandler(node: ASTNode) = DocTagsFromIElementFactory.getInstance(
- GFMElementTypes.ROW,
- children = node.children
- .filter { it.type == GFMTokenTypes.CELL }
- .evaluateChildren()
- )
-
- private fun cellHandler(node: ASTNode) = DocTagsFromIElementFactory.getInstance(
- GFMTokenTypes.CELL,
- children = node.children.filterTabSeparators().evaluateChildren().trimSurroundingTokensIfText()
- )
-
- private fun String.isRemoteLink() = try {
- URL(this)
- true
- } catch (e: MalformedURLException) {
- false
- }
-
- private fun imagesHandler(node: ASTNode): List<DocTag> =
- with(node.children.last().children) {
- val destination = find { it.type == MarkdownElementTypes.LINK_DESTINATION }
- val description = find { it.type == MarkdownElementTypes.LINK_TEXT }
-
- val src = destination?.let {
- mapOf("href" to text.substring(it.startOffset, it.endOffset))
- } ?: emptyMap()
-
- val alt = description?.let {
- mapOf("alt" to text.substring(it.startOffset + 1, it.endOffset - 1))
- } ?: emptyMap()
-
- return DocTagsFromIElementFactory.getInstance(
- node.type,
- params = src + alt
- )
- }
-
-
- private fun rawHtmlHandler(node: ASTNode): List<DocTag> =
- DocTagsFromIElementFactory.getInstance(
- node.type,
- body = text.substring(node.startOffset, node.endOffset)
- )
-
- private fun codeSpansHandler(node: ASTNode) =
- DocTagsFromIElementFactory.getInstance(
- node.type,
- children = DocTagsFromIElementFactory.getInstance(
- MarkdownTokenTypes.TEXT,
- body = text.substring(node.startOffset + 1, node.endOffset - 1).replace('\n', ' ').trimIndent(),
- keepFormatting = true
- )
- )
-
- private fun codeFencesHandler(node: ASTNode) =
- DocTagsFromIElementFactory.getInstance(
- node.type,
- children = node
- .children
- .dropWhile { it.type != MarkdownTokenTypes.CODE_FENCE_CONTENT }
- .dropLastWhile { it.type != MarkdownTokenTypes.CODE_FENCE_CONTENT }
- .filter { it.type != MarkdownTokenTypes.WHITE_SPACE }
- .map {
- if (it.type == MarkdownTokenTypes.EOL)
- LeafASTNode(MarkdownTokenTypes.HARD_LINE_BREAK, 0, 0)
- else
- it
- }.evaluateChildren(keepAllFormatting = true),
- params = node
- .children
- .find { it.type == MarkdownTokenTypes.FENCE_LANG }
- ?.let { mapOf("lang" to text.substring(it.startOffset, it.endOffset)) }
- ?: emptyMap()
- )
-
- private fun codeBlocksHandler(node: ASTNode) =
- DocTagsFromIElementFactory.getInstance(node.type, children = node.children.mergeLeafASTNodes().flatMap {
- DocTagsFromIElementFactory.getInstance(
- MarkdownTokenTypes.TEXT,
- body = HtmlGenerator.trimIndents(text.substring(it.startOffset, it.endOffset), 4).toString()
- )
- })
-
- private fun defaultHandler(node: ASTNode) =
- DocTagsFromIElementFactory.getInstance(
- MarkdownElementTypes.PARAGRAPH,
- children = node.children.evaluateChildren()
- )
-
- private fun visitNode(node: ASTNode, keepAllFormatting: Boolean = false): List<DocTag> =
- when (node.type) {
- MarkdownElementTypes.ATX_1,
- MarkdownElementTypes.ATX_2,
- MarkdownElementTypes.ATX_3,
- MarkdownElementTypes.ATX_4,
- MarkdownElementTypes.ATX_5,
- MarkdownElementTypes.ATX_6,
- -> headersHandler(node)
- MarkdownTokenTypes.ATX_CONTENT -> headerContentHandler(node)
- MarkdownTokenTypes.HORIZONTAL_RULE -> horizontalRulesHandler()
- MarkdownElementTypes.STRONG -> strongHandler(node)
- MarkdownElementTypes.EMPH -> emphasisHandler(node)
- MarkdownElementTypes.FULL_REFERENCE_LINK,
- MarkdownElementTypes.SHORT_REFERENCE_LINK,
- -> referenceLinksHandler(node)
- MarkdownElementTypes.INLINE_LINK -> inlineLinksHandler(node)
- MarkdownElementTypes.AUTOLINK -> autoLinksHandler(node)
- MarkdownElementTypes.BLOCK_QUOTE -> blockquotesHandler(node)
- MarkdownElementTypes.UNORDERED_LIST,
- MarkdownElementTypes.ORDERED_LIST,
- -> listsHandler(node)
- MarkdownElementTypes.CODE_BLOCK -> codeBlocksHandler(node)
- MarkdownElementTypes.CODE_FENCE -> codeFencesHandler(node)
- MarkdownElementTypes.CODE_SPAN -> codeSpansHandler(node)
- MarkdownElementTypes.IMAGE -> imagesHandler(node)
- MarkdownElementTypes.HTML_BLOCK,
- MarkdownTokenTypes.HTML_TAG,
- MarkdownTokenTypes.HTML_BLOCK_CONTENT,
- -> rawHtmlHandler(node)
- MarkdownTokenTypes.HARD_LINE_BREAK -> DocTagsFromIElementFactory.getInstance(node.type)
- MarkdownTokenTypes.CODE_FENCE_CONTENT,
- MarkdownTokenTypes.CODE_LINE,
- -> codeLineHandler(node)
- MarkdownTokenTypes.TEXT -> textHandler(node, keepAllFormatting)
- MarkdownElementTypes.MARKDOWN_FILE -> markdownFileHandler(node)
- GFMElementTypes.STRIKETHROUGH -> strikeThroughHandler(node)
- GFMElementTypes.TABLE -> tableHandler(node)
- GFMElementTypes.HEADER -> headerHandler(node)
- GFMElementTypes.ROW -> rowHandler(node)
- GFMTokenTypes.CELL -> cellHandler(node)
- else -> defaultHandler(node)
- }
-
- private fun List<ASTNode>.filterTabSeparators() =
- this.filterNot { it.type == GFMTokenTypes.TABLE_SEPARATOR }
-
- private fun List<ASTNode>.filterSpacesAndEOL() =
- this.filterNot { it.type == MarkdownTokenTypes.WHITE_SPACE || it.type == MarkdownTokenTypes.EOL }
-
- private fun List<ASTNode>.evaluateChildren(keepAllFormatting: Boolean = false): List<DocTag> =
- this.removeUselessTokens().swapImagesThatShouldBeLinks(keepAllFormatting).mergeLeafASTNodes().flatMap { visitNode(it, keepAllFormatting) }
-
- private fun List<ASTNode>.swapImagesThatShouldBeLinks(keepAllFormatting: Boolean): List<ASTNode> =
- if (keepAllFormatting) {
- this
- } else {
- flatMap { node ->
- if (node.type == MarkdownElementTypes.IMAGE
- && node.children.firstOrNull()?.let { it is LeafASTNode && it.type.name == "!" } == true
- && node.children.lastOrNull()?.type == MarkdownElementTypes.SHORT_REFERENCE_LINK
- ) {
- node.children
- } else {
- listOf(node)
- }
- }
- }
-
- private fun List<ASTNode>.removeUselessTokens(): List<ASTNode> =
- this.filterIndexed { index, node ->
- !(node.type == MarkdownElementTypes.LINK_DEFINITION || (
- node.type == MarkdownTokenTypes.EOL &&
- this.getOrNull(index - 1)?.type == MarkdownTokenTypes.HARD_LINE_BREAK
- ))
- }
-
- private fun List<DocTag>.trimSurroundingTokensIfText() = mapIndexed { index, elem ->
- val elemTransformed = if (index == 0 && elem is Text) elem.copy(elem.body.trimStart()) else elem
- if (index == lastIndex && elemTransformed is Text) elemTransformed.copy(elemTransformed.body.trimEnd()) else elemTransformed
- }
-
- private val notLeafNodes = listOf(
- MarkdownTokenTypes.HORIZONTAL_RULE,
- MarkdownTokenTypes.HARD_LINE_BREAK,
- MarkdownTokenTypes.HTML_TAG,
- MarkdownTokenTypes.HTML_BLOCK_CONTENT
- )
-
- private fun ASTNode.isNotLeaf() = this is CompositeASTNode || this.type in notLeafNodes
-
- private fun List<ASTNode>.isNotLeaf(index: Int): Boolean =
- if (index in 0..this.lastIndex)
- this[index].isNotLeaf()
- else
- false
-
- private fun List<ASTNode>.mergeLeafASTNodes(): List<ASTNode> {
- val children: MutableList<ASTNode> = mutableListOf()
- var index = 0
- while (index <= this.lastIndex) {
- if (this.isNotLeaf(index)) {
- children += this[index]
- } else {
- val startOffset = this[index].startOffset
- val sIndex = index
- while (index < this.lastIndex) {
- if (this.isNotLeaf(index + 1) || this[index + 1].startOffset != this[index].endOffset) {
- children += mergedLeafNode(this, index, startOffset, sIndex)
- break
- }
- index++
- }
- if (index == this.lastIndex) {
- children += mergedLeafNode(this, index, startOffset, sIndex)
- }
- }
- index++
- }
- return children
- }
-
- private fun mergedLeafNode(nodes: List<ASTNode>, index: Int, startOffset: Int, sIndex: Int): LeafASTNode {
- val endOffset = nodes[index].endOffset
- val type = if (nodes.subList(sIndex, index)
- .any { it.type == MarkdownTokenTypes.CODE_LINE }
- ) MarkdownTokenTypes.CODE_LINE else MarkdownTokenTypes.TEXT
- return LeafASTNode(type, startOffset, endOffset)
- }
-
- private fun String.transform() = this
- .replace(Regex("\n\n+"), "") // Squashing new lines between paragraphs
- .replace(Regex("\n"), " ")
- .replace(Regex(" >+ +"), " ") // Replacement used in blockquotes, get rid of garbage
-
- private fun detailedException(baseMessage: String, node: ASTNode) =
- IllegalStateException(
- baseMessage + " in ${kdocLocation ?: "unspecified location"}, element starts from offset ${node.startOffset} and ends ${node.endOffset}: ${
- text.substring(
- node.startOffset,
- node.endOffset
- )
- }"
- )
-
-
- public companion object {
- public fun DRI.fqDeclarationName(): String? {
- if (this.target !is PointingToDeclaration) {
- return null
- }
- return listOfNotNull(this.packageName, this.classNames, this.callable?.name)
- .joinToString(separator = ".")
- .takeIf { it.isNotBlank() }
- }
- }
-}
-
diff --git a/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/ParseUtils.kt b/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/ParseUtils.kt
deleted file mode 100644
index 0293d470..00000000
--- a/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/ParseUtils.kt
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright 2014-2023 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license.
- */
-
-package org.jetbrains.dokka.analysis.markdown.jb
-
-import org.intellij.markdown.lexer.Compat
-import org.intellij.markdown.lexer.Compat.forEachCodePoint
-import org.jetbrains.dokka.InternalDokkaApi
-import org.jetbrains.dokka.model.doc.DocTag
-import org.jetbrains.dokka.model.doc.Text
-import org.jsoup.internal.StringUtil
-import org.jsoup.nodes.Entities
-
-@InternalDokkaApi
-public fun String.parseHtmlEncodedWithNormalisedSpaces(
- renderWhiteCharactersAsSpaces: Boolean
-): List<DocTag> {
- val accum = StringBuilder()
- val tags = mutableListOf<DocTag>()
- var lastWasWhite = false
-
- forEachCodePoint { c ->
- if (renderWhiteCharactersAsSpaces && StringUtil.isWhitespace(c)) {
- if (!lastWasWhite) {
- accum.append(' ')
- lastWasWhite = true
- }
- } else if (Compat.codePointToString(c).let { it != Entities.escape(it) }) {
- accum.toString().takeIf { it.isNotBlank() }?.let { tags.add(Text(it)) }
- accum.delete(0, accum.length)
-
- accum.appendCodePoint(c)
- tags.add(Text(accum.toString(), params = DocTag.contentTypeParam("html")))
- accum.delete(0, accum.length)
- } else if (!StringUtil.isInvisibleChar(c)) {
- accum.appendCodePoint(c)
- lastWasWhite = false
- }
- }
- accum.toString().takeIf { it.isNotBlank() }?.let { tags.add(Text(it)) }
- return tags
-}
diff --git a/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/Parser.kt b/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/Parser.kt
deleted file mode 100644
index 28afa0c4..00000000
--- a/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/Parser.kt
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Copyright 2014-2023 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license.
- */
-
-package org.jetbrains.dokka.analysis.markdown.jb
-
-import org.jetbrains.dokka.InternalDokkaApi
-import org.jetbrains.dokka.model.doc.*
-
-@InternalDokkaApi
-public abstract class Parser {
-
- public abstract fun parseStringToDocNode(extractedString: String): DocTag
-
- protected abstract fun preparse(text: String): String
-
- public open fun parse(text: String): DocumentationNode =
- DocumentationNode(extractTagsToListOfPairs(preparse(text)).map { (tag, content) -> parseTagWithBody(tag, content) })
-
- protected open fun parseTagWithBody(tagName: String, content: String): TagWrapper =
- when (tagName) {
- "description" -> Description(parseStringToDocNode(content))
- "author" -> Author(parseStringToDocNode(content))
- "version" -> Version(parseStringToDocNode(content))
- "since" -> Since(parseStringToDocNode(content))
- "see" -> See(
- parseStringToDocNode(content.substringAfter(' ')),
- content.substringBefore(' '),
- null
- )
- "param" -> Param(
- parseStringToDocNode(content.substringAfter(' ')),
- content.substringBefore(' ')
- )
- "property" -> Property(
- parseStringToDocNode(content.substringAfter(' ')),
- content.substringBefore(' ')
- )
- "return" -> Return(parseStringToDocNode(content))
- "constructor" -> Constructor(parseStringToDocNode(content))
- "receiver" -> Receiver(parseStringToDocNode(content))
- "throws", "exception" -> Throws(
- parseStringToDocNode(content.substringAfter(' ')),
- content.substringBefore(' '),
- null
- )
- "deprecated" -> Deprecated(parseStringToDocNode(content))
- "sample" -> Sample(
- parseStringToDocNode(content.substringAfter(' ')),
- content.substringBefore(' ')
- )
- "suppress" -> Suppress(parseStringToDocNode(content))
- else -> CustomTagWrapper(parseStringToDocNode(content), tagName)
- }
-
- /**
- * KDoc parser from Kotlin compiler relies on a comment asterisk
- * So there is a mini parser here
- * TODO: at least to adapt [org.jetbrains.kotlin.kdoc.lexer.KDocLexer] to analyze KDoc without the asterisks and use it here
- */
- private fun extractTagsToListOfPairs(text: String): List<Pair<String, String>> =
- "description $text"
- .extractKDocSections()
- .map { content ->
- val contentWithEscapedAts = content.replace("\\@", "@")
- val (tag, body) = contentWithEscapedAts.split(" ", limit = 2)
- tag to body
- }
-
- /**
- * Ignore a doc tag inside code spans and blocks
- * @see org.jetbrains.kotlin.kdoc.psi.impl.KDocSection
- */
- private fun CharSequence.extractKDocSections(delimiter: String = "\n@"): List<String> {
- var countOfBackticks = 0
- var countOfTildes = 0
- var countOfBackticksInOpeningFence = 0
- var countOfTildesInOpeningFence = 0
-
- var isInCode = false
- val result = mutableListOf<String>()
- var rangeStart = 0
- var rangeEnd = 0
- var currentOffset = 0
- while (currentOffset < length) {
-
- when (get(currentOffset)) {
- '`' -> {
- countOfBackticks++
- countOfTildes = 0
- }
- '~' -> {
- countOfTildes++
- countOfBackticks = 0
- }
- else -> {
- if (isInCode) {
- // The closing code fence must be at least as long as the opening fence
- if(countOfBackticks >= countOfBackticksInOpeningFence
- || countOfTildes >= countOfTildesInOpeningFence)
- isInCode = false
- } else {
- // as per CommonMark spec, there can be any number of backticks for a code span, not only one or three
- if (countOfBackticks > 0) {
- isInCode = true
- countOfBackticksInOpeningFence = countOfBackticks
- countOfTildesInOpeningFence = Int.MAX_VALUE
- }
- // tildes are only for a code block, not code span
- if (countOfTildes >= 3) {
- isInCode = true
- countOfTildesInOpeningFence = countOfTildes
- countOfBackticksInOpeningFence = Int.MAX_VALUE
- }
- }
- countOfTildes = 0
- countOfBackticks = 0
- }
- }
- if (!isInCode && startsWith(delimiter, currentOffset)) {
- result.add(substring(rangeStart, rangeEnd))
- currentOffset += delimiter.length
- rangeStart = currentOffset
- rangeEnd = currentOffset
- continue
- }
-
- ++rangeEnd
- ++currentOffset
- }
- result.add(substring(rangeStart, rangeEnd))
- return result
- }
-
-}
diff --git a/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/factories/DocTagsFromIElementFactory.kt b/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/factories/DocTagsFromIElementFactory.kt
deleted file mode 100644
index 77ca92d5..00000000
--- a/subprojects/analysis-markdown-jb/src/main/kotlin/org/jetbrains/dokka/analysis/markdown/jb/factories/DocTagsFromIElementFactory.kt
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright 2014-2023 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license.
- */
-
-package org.jetbrains.dokka.analysis.markdown.jb.factories
-
-import org.intellij.markdown.IElementType
-import org.intellij.markdown.MarkdownElementTypes
-import org.intellij.markdown.MarkdownTokenTypes
-import org.intellij.markdown.flavours.gfm.GFMElementTypes
-import org.intellij.markdown.flavours.gfm.GFMTokenTypes
-import org.jetbrains.dokka.analysis.markdown.jb.MARKDOWN_ELEMENT_FILE_NAME
-import org.jetbrains.dokka.analysis.markdown.jb.parseHtmlEncodedWithNormalisedSpaces
-import org.jetbrains.dokka.links.DRI
-import org.jetbrains.dokka.model.doc.*
-import org.jetbrains.dokka.model.doc.DocTag.Companion.contentTypeParam
-import org.jsoup.Jsoup
-
-internal object DocTagsFromIElementFactory {
-
- @Suppress("IMPLICIT_CAST_TO_ANY")
- fun getInstance(type: IElementType, children: List<DocTag> = emptyList(), params: Map<String, String> = emptyMap(), body: String? = null, dri: DRI? = null, keepFormatting: Boolean = false) =
- when(type) {
- MarkdownElementTypes.SHORT_REFERENCE_LINK,
- MarkdownElementTypes.FULL_REFERENCE_LINK,
- MarkdownElementTypes.INLINE_LINK -> if(dri == null) A(children, params) else DocumentationLink(dri, children, params)
- MarkdownElementTypes.STRONG -> B(children, params)
- MarkdownElementTypes.BLOCK_QUOTE -> BlockQuote(children, params)
- MarkdownElementTypes.CODE_SPAN -> CodeInline(children, params)
- MarkdownElementTypes.CODE_BLOCK,
- MarkdownElementTypes.CODE_FENCE -> CodeBlock(children, params)
- MarkdownElementTypes.ATX_1 -> H1(children, params)
- MarkdownElementTypes.ATX_2 -> H2(children, params)
- MarkdownElementTypes.ATX_3 -> H3(children, params)
- MarkdownElementTypes.ATX_4 -> H4(children, params)
- MarkdownElementTypes.ATX_5 -> H5(children, params)
- MarkdownElementTypes.ATX_6 -> H6(children, params)
- MarkdownElementTypes.EMPH -> I(children, params)
- MarkdownElementTypes.IMAGE -> Img(children, params)
- MarkdownElementTypes.LIST_ITEM -> Li(children, params)
- MarkdownElementTypes.ORDERED_LIST -> Ol(children, params)
- MarkdownElementTypes.UNORDERED_LIST -> Ul(children, params)
- MarkdownElementTypes.PARAGRAPH -> P(children, params)
- MarkdownTokenTypes.TEXT -> if (keepFormatting) Text(
- body.orEmpty(),
- children,
- params
- ) else {
- // corner case: there are only spaces between two Markdown nodes
- val containsOnlySpaces = body?.isNotEmpty() == true && body.all { it.isWhitespace() }
- if (containsOnlySpaces) Text(" ", children, params)
- else body?.parseWithNormalisedSpaces(renderWhiteCharactersAsSpaces = false).orEmpty()
- }
- MarkdownTokenTypes.HORIZONTAL_RULE -> HorizontalRule
- MarkdownTokenTypes.HARD_LINE_BREAK -> Br
- GFMElementTypes.STRIKETHROUGH -> Strikethrough(children, params)
- GFMElementTypes.TABLE -> Table(children, params)
- GFMElementTypes.HEADER -> Th(children, params)
- GFMElementTypes.ROW -> Tr(children, params)
- GFMTokenTypes.CELL -> Td(children, params)
- MarkdownElementTypes.MARKDOWN_FILE -> CustomDocTag(children, params, MARKDOWN_ELEMENT_FILE_NAME)
- MarkdownElementTypes.HTML_BLOCK,
- MarkdownTokenTypes.HTML_TAG,
- MarkdownTokenTypes.HTML_BLOCK_CONTENT -> Text(body.orEmpty(), params = params + contentTypeParam("html"))
- else -> CustomDocTag(children, params, type.name)
- }.let {
- @Suppress("UNCHECKED_CAST")
- when (it) {
- is List<*> -> it as List<DocTag>
- else -> listOf(it as DocTag)
- }
- }
-
- /**
- * Parses string into [Text] doc tags that can have either value of the string or html-encoded value with content-type=html parameter.
- * Content type is added when dealing with html entries like `&nbsp;`
- */
- private fun String.parseWithNormalisedSpaces(
- renderWhiteCharactersAsSpaces: Boolean
- ): List<DocTag> {
- if (!requiresHtmlEncoding()) {
- return parseHtmlEncodedWithNormalisedSpaces(renderWhiteCharactersAsSpaces)
- }
- // parsing it using jsoup is required to get codePoints, otherwise they are interpreted separately, as chars
- // But we dont need to do it for java as it is already parsed with jsoup
- return Jsoup.parseBodyFragment(this).body().wholeText().parseHtmlEncodedWithNormalisedSpaces(renderWhiteCharactersAsSpaces)
- }
-
- private fun String.requiresHtmlEncoding(): Boolean = indexOf('&') != -1
-}