aboutsummaryrefslogtreecommitdiff
path: root/core/src/main/kotlin/parsers
diff options
context:
space:
mode:
authorBarkingBad <32793002+BarkingBad@users.noreply.github.com>2019-12-13 14:01:25 +0100
committerKamil Doległo <kamilok1965@interia.pl>2019-12-13 14:02:13 +0100
commitdd017a44ed7baae83f4f09a92d9691231f424eaa (patch)
treead9a7b6634ff4e4ead43122b13b0fb6dcdfcea85 /core/src/main/kotlin/parsers
parent0900b0f1c3a593301a6229ce93a23b8228771d24 (diff)
downloaddokka-dd017a44ed7baae83f4f09a92d9691231f424eaa.tar.gz
dokka-dd017a44ed7baae83f4f09a92d9691231f424eaa.tar.bz2
dokka-dd017a44ed7baae83f4f09a92d9691231f424eaa.zip
Add abstract structure for MD/HTML comments and MD parser
Diffstat (limited to 'core/src/main/kotlin/parsers')
-rw-r--r--core/src/main/kotlin/parsers/HtmlParser.kt89
-rw-r--r--core/src/main/kotlin/parsers/MarkdownParser.kt206
-rw-r--r--core/src/main/kotlin/parsers/Parser.kt44
-rw-r--r--core/src/main/kotlin/parsers/factories/DocNodesFromIElementFactory.kt36
-rw-r--r--core/src/main/kotlin/parsers/factories/DocNodesFromStringFactory.kt76
5 files changed, 451 insertions, 0 deletions
diff --git a/core/src/main/kotlin/parsers/HtmlParser.kt b/core/src/main/kotlin/parsers/HtmlParser.kt
new file mode 100644
index 00000000..30f882b1
--- /dev/null
+++ b/core/src/main/kotlin/parsers/HtmlParser.kt
@@ -0,0 +1,89 @@
+package parsers
+
+import model.doc.*
+import org.jetbrains.dokka.parsers.factories.DocNodesFromStringFactory
+import org.jsoup.Jsoup
+import org.jsoup.nodes.Node
+import org.jsoup.select.NodeFilter
+import org.jsoup.select.NodeTraversor
+
+class HtmlParser : Parser() {
+
+ inner class NodeFilterImpl : NodeFilter {
+
+ private val nodesCache: MutableMap<Int, MutableList<DocNode>> = mutableMapOf()
+ private var currentDepth = 0
+
+ fun collect(): DocNode = nodesCache[currentDepth]!![0]
+
+ override fun tail(node: Node?, depth: Int): NodeFilter.FilterResult {
+ val nodeName = node!!.nodeName()
+ val nodeAttributes = node.attributes()
+
+ if(nodeName in listOf("#document", "html", "head"))
+ return NodeFilter.FilterResult.CONTINUE
+
+ val body: String
+ val params: Map<String, String>
+
+
+ if(nodeName != "#text") {
+ body = ""
+ params = nodeAttributes.map { it.key to it.value }.toMap()
+ } else {
+ body = nodeAttributes["#text"]
+ params = emptyMap()
+ }
+
+ val docNode = if(depth < currentDepth) {
+ DocNodesFromStringFactory.getInstance(nodeName, nodesCache.getOrDefault(currentDepth, mutableListOf()).toList(), params, body).also {
+ nodesCache[currentDepth] = mutableListOf()
+ currentDepth = depth
+ }
+ } else {
+ DocNodesFromStringFactory.getInstance(nodeName, emptyList(), params, body)
+ }
+
+ nodesCache.getOrDefault(depth, mutableListOf()) += docNode
+ return NodeFilter.FilterResult.CONTINUE
+ }
+
+ override fun head(node: Node?, depth: Int): NodeFilter.FilterResult {
+
+ val nodeName = node!!.nodeName()
+
+ if(currentDepth < depth) {
+ currentDepth = depth
+ nodesCache[currentDepth] = mutableListOf()
+ }
+
+ if(nodeName in listOf("#document", "html", "head"))
+ return NodeFilter.FilterResult.CONTINUE
+
+ return NodeFilter.FilterResult.CONTINUE
+ }
+ }
+
+
+ private fun htmlToDocNode(string: String): DocNode {
+ val document = Jsoup.parse(string)
+ val nodeFilterImpl = NodeFilterImpl()
+ NodeTraversor.filter(nodeFilterImpl, document.root())
+ return nodeFilterImpl.collect()
+ }
+
+ private fun replaceLinksWithHrefs(javadoc: String): String = Regex("\\{@link .*?}").replace(javadoc) {
+ val split = it.value.dropLast(1).split(" ")
+ if(split.size !in listOf(2, 3))
+ return@replace it.value
+ if(split.size == 3)
+ return@replace "<documentationlink href=\"${split[1]}\">${split[2]}</documentationlink>"
+ else
+ return@replace "<documentationlink href=\"${split[1]}\">${split[1]}</documentationlink>"
+ }
+
+ override fun parseStringToDocNode(extractedString: String) = htmlToDocNode(extractedString)
+ override fun preparse(text: String) = replaceLinksWithHrefs(text)
+}
+
+
diff --git a/core/src/main/kotlin/parsers/MarkdownParser.kt b/core/src/main/kotlin/parsers/MarkdownParser.kt
new file mode 100644
index 00000000..44c917e3
--- /dev/null
+++ b/core/src/main/kotlin/parsers/MarkdownParser.kt
@@ -0,0 +1,206 @@
+package parsers
+
+import model.doc.*
+import org.intellij.markdown.MarkdownElementTypes
+import org.intellij.markdown.MarkdownTokenTypes
+import org.intellij.markdown.ast.ASTNode
+import org.intellij.markdown.ast.impl.ListItemCompositeNode
+import org.intellij.markdown.flavours.commonmark.CommonMarkFlavourDescriptor
+import org.jetbrains.dokka.analysis.DokkaResolutionFacade
+import org.jetbrains.dokka.links.DRI
+import org.jetbrains.dokka.parsers.factories.DocNodesFromIElementFactory
+import org.jetbrains.kotlin.descriptors.DeclarationDescriptor
+import org.jetbrains.kotlin.idea.kdoc.resolveKDocLink
+import org.jetbrains.kotlin.kdoc.parser.KDocKnownTag
+import org.jetbrains.kotlin.kdoc.psi.impl.KDocTag
+import org.intellij.markdown.parser.MarkdownParser as IntellijMarkdownParser
+
+class MarkdownParser (
+ private val resolutionFacade: DokkaResolutionFacade,
+ private val declarationDescriptor: DeclarationDescriptor
+ ) : Parser() {
+
+ inner class MarkdownVisitor(val text: String) {
+
+ private fun headersHandler(node: ASTNode): DocNode =
+ DocNodesFromIElementFactory.getInstance(node.type, visitNode(node.children.find { it.type == MarkdownTokenTypes.ATX_CONTENT }!!).children.drop(1))
+
+ private fun horizontalRulesHandler(node: ASTNode): DocNode =
+ DocNodesFromIElementFactory.getInstance(MarkdownTokenTypes.HORIZONTAL_RULE)
+
+ private fun emphasisHandler(node: ASTNode): DocNode =
+ DocNodesFromIElementFactory.getInstance(node.type, children = listOf(visitNode(node.children[node.children.size/2])))
+
+ private fun blockquotesHandler(node: ASTNode): DocNode =
+ DocNodesFromIElementFactory.getInstance(node.type, children = node.children.drop(1).map { visitNode(it) })
+
+ private fun listsHandler(node: ASTNode): DocNode {
+
+ val children = node.children.filterIsInstance<ListItemCompositeNode>().flatMap {
+ if( it.children.last().type in listOf(MarkdownElementTypes.ORDERED_LIST, MarkdownElementTypes.UNORDERED_LIST) ) {
+ val nestedList = it.children.last()
+ (it.children as MutableList).removeAt(it.children.lastIndex)
+ listOf(it, nestedList)
+ }
+ else
+ listOf(it)
+ }
+
+ return DocNodesFromIElementFactory.getInstance(
+ node.type,
+ children =
+ children
+ .map {
+ if(it.type == MarkdownElementTypes.LIST_ITEM)
+ DocNodesFromIElementFactory.getInstance(
+ it.type,
+ children = it
+ .children
+ .drop(1)
+ .filter { it.type !in listOf(MarkdownTokenTypes.WHITE_SPACE, MarkdownTokenTypes.EOL) }
+ .map { visitNode(it) }
+ )
+ else
+ visitNode(it)
+ },
+ params =
+ if (node.type == MarkdownElementTypes.ORDERED_LIST) {
+ val listNumberNode = node.children.first().children.first()
+ mapOf("start" to text.substring(listNumberNode.startOffset, listNumberNode.endOffset).dropLast(2))
+ } else
+ emptyMap()
+ )
+ }
+
+ private fun linksHandler(node: ASTNode): DocNode {
+ val linkNode = node.children.find { it.type == MarkdownElementTypes.LINK_LABEL }!!
+ val link = text.substring(linkNode.startOffset+1, linkNode.endOffset-1)
+
+ val dri: DRI? = if (link.startsWith("http") || link.startsWith("www")) {
+ null
+ } else {
+ DRI.from(
+ resolveKDocLink(
+ resolutionFacade.resolveSession.bindingContext,
+ resolutionFacade,
+ declarationDescriptor,
+ null,
+ link.split('.')
+ ).single()
+ )
+ }
+ val href = mapOf("href" to link)
+ return when (node.type) {
+ MarkdownElementTypes.FULL_REFERENCE_LINK -> DocNodesFromIElementFactory.getInstance(node.type, params = href, children = node.children.find { it.type == MarkdownElementTypes.LINK_TEXT }!!.children.drop(1).dropLast(1).map { visitNode(it) }, dri = dri)
+ else -> DocNodesFromIElementFactory.getInstance(node.type, params = href, children = listOf(visitNode(linkNode)), dri = dri)
+ }
+ }
+
+ private fun imagesHandler(node: ASTNode): DocNode {
+ val linkNode = node.children.last().children.find { it.type == MarkdownElementTypes.LINK_LABEL }!!.children[1]
+ val link = text.substring(linkNode.startOffset, linkNode.endOffset)
+ val src = mapOf("src" to link)
+ return DocNodesFromIElementFactory.getInstance(node.type, params = src, children = listOf(visitNode(node.children.last().children.find { it.type == MarkdownElementTypes.LINK_TEXT }!!)))
+ }
+
+ private fun codeSpansHandler(node: ASTNode): DocNode =
+ DocNodesFromIElementFactory.getInstance(
+ node.type,
+ children = listOf(
+ DocNodesFromIElementFactory.getInstance(
+ MarkdownTokenTypes.TEXT,
+ body = text.substring(node.startOffset+1, node.endOffset-1).replace('\n', ' ').trimIndent()
+ )
+
+ )
+ )
+
+ private fun codeFencesHandler(node: ASTNode): DocNode =
+ DocNodesFromIElementFactory.getInstance(
+ node.type,
+ children = node
+ .children
+ .filter { it.type == MarkdownTokenTypes.CODE_FENCE_CONTENT }
+ .map { visitNode(it) },
+ params = node
+ .children
+ .find { it.type == MarkdownTokenTypes.FENCE_LANG }
+ ?.let { mapOf("lang" to text.substring(it.startOffset, it.endOffset)) }
+ ?: emptyMap()
+ )
+
+ private fun codeBlocksHandler(node: ASTNode): DocNode =
+ DocNodesFromIElementFactory.getInstance(node.type, children = node.children.map { visitNode(it) })
+
+ private fun defaultHandler(node: ASTNode): DocNode =
+ DocNodesFromIElementFactory.getInstance(MarkdownElementTypes.PARAGRAPH, children = node.children.map { visitNode(it) })
+
+ fun visitNode(node: ASTNode): DocNode =
+ when (node.type) {
+ MarkdownElementTypes.ATX_1,
+ MarkdownElementTypes.ATX_2,
+ MarkdownElementTypes.ATX_3,
+ MarkdownElementTypes.ATX_4,
+ MarkdownElementTypes.ATX_5,
+ MarkdownElementTypes.ATX_6 -> headersHandler(node)
+ MarkdownTokenTypes.HORIZONTAL_RULE -> horizontalRulesHandler(node)
+ MarkdownElementTypes.STRONG,
+ MarkdownElementTypes.EMPH -> emphasisHandler(node)
+ MarkdownElementTypes.FULL_REFERENCE_LINK,
+ MarkdownElementTypes.SHORT_REFERENCE_LINK -> linksHandler(node)
+ MarkdownElementTypes.BLOCK_QUOTE -> blockquotesHandler(node)
+ MarkdownElementTypes.UNORDERED_LIST,
+ MarkdownElementTypes.ORDERED_LIST -> listsHandler(node)
+ MarkdownElementTypes.CODE_BLOCK -> codeBlocksHandler(node)
+ MarkdownElementTypes.CODE_FENCE -> codeFencesHandler(node)
+ MarkdownElementTypes.CODE_SPAN -> codeSpansHandler(node)
+ MarkdownElementTypes.IMAGE -> imagesHandler(node)
+ MarkdownTokenTypes.EOL -> DocNodesFromIElementFactory.getInstance(MarkdownTokenTypes.TEXT, body = "\n")
+ MarkdownTokenTypes.WHITE_SPACE -> DocNodesFromIElementFactory.getInstance(MarkdownTokenTypes.TEXT, body = " ")
+ MarkdownTokenTypes.CODE_FENCE_CONTENT,
+ MarkdownTokenTypes.CODE_LINE,
+ MarkdownTokenTypes.TEXT -> DocNodesFromIElementFactory.getInstance(MarkdownTokenTypes.TEXT, body = text.substring(node.startOffset, node.endOffset))
+ else -> defaultHandler(node)
+ }
+ }
+
+ private fun markdownToDocNode(text: String): DocNode {
+
+ val flavourDescriptor = CommonMarkFlavourDescriptor()
+ val markdownAstRoot: ASTNode = IntellijMarkdownParser(flavourDescriptor).buildMarkdownTreeFromString(text)
+
+ return MarkdownVisitor(text).visitNode(markdownAstRoot)
+ }
+
+ override fun parseStringToDocNode(extractedString: String) = markdownToDocNode(extractedString)
+ override fun preparse(text: String) = text
+
+ fun parseFromKDocTag(kDocTag: KDocTag?): DocumentationNode {
+ return if(kDocTag == null)
+ DocumentationNode(emptyList())
+ else
+ DocumentationNode(
+ (listOf(kDocTag) + kDocTag.children).filterIsInstance<KDocTag>().map {
+ when( it.knownTag ) {
+ null -> Description(parseStringToDocNode(it.getContent()))
+ KDocKnownTag.AUTHOR -> Author(parseStringToDocNode(it.getContent()))
+ KDocKnownTag.THROWS -> Throws(parseStringToDocNode(it.getContent()), it.getSubjectName()!!)
+ KDocKnownTag.EXCEPTION -> Throws(parseStringToDocNode(it.getContent()), it.getSubjectName()!!)
+ KDocKnownTag.PARAM -> Param(parseStringToDocNode(it.getContent()), it.getSubjectName()!!)
+ KDocKnownTag.RECEIVER -> Receiver(parseStringToDocNode(it.getContent()))
+ KDocKnownTag.RETURN -> Return(parseStringToDocNode(it.getContent()))
+ KDocKnownTag.SEE -> See(parseStringToDocNode(it.getContent()), it.getSubjectName()!!)
+ KDocKnownTag.SINCE -> Since(parseStringToDocNode(it.getContent()))
+ KDocKnownTag.CONSTRUCTOR -> Constructor(parseStringToDocNode(it.getContent()))
+ KDocKnownTag.PROPERTY -> Property(parseStringToDocNode(it.getContent()), it.getSubjectName()!!)
+ KDocKnownTag.SAMPLE -> Sample(parseStringToDocNode(it.getContent()), it.getSubjectName()!!)
+ KDocKnownTag.SUPPRESS -> Suppress(parseStringToDocNode(it.getContent()))
+ }
+ }
+ )
+ }
+
+
+
+
+} \ No newline at end of file
diff --git a/core/src/main/kotlin/parsers/Parser.kt b/core/src/main/kotlin/parsers/Parser.kt
new file mode 100644
index 00000000..a2a90dcd
--- /dev/null
+++ b/core/src/main/kotlin/parsers/Parser.kt
@@ -0,0 +1,44 @@
+package parsers
+
+import model.doc.*
+import model.doc.Deprecated
+
+
+abstract class Parser {
+
+ abstract fun parseStringToDocNode(extractedString: String): DocNode
+ abstract fun preparse(text: String): String
+
+ fun parse(text: String): DocumentationNode {
+
+ val list = jkdocToListOfPairs(preparse(text))
+
+ val mappedList: List<DocType> = list.map {
+ when(it.first) {
+ "description" -> Description(parseStringToDocNode(it.second))
+ "author" -> Author(parseStringToDocNode(it.second))
+ "version" -> Version(parseStringToDocNode(it.second))
+ "since" -> Since(parseStringToDocNode(it.second))
+ "see" -> See(parseStringToDocNode(it.second.substringAfter(' ')), it.second.substringBefore(' '))
+ "param" -> Param(parseStringToDocNode(it.second.substringAfter(' ')), it.second.substringBefore(' '))
+ "property" -> Property(parseStringToDocNode(it.second.substringAfter(' ')), it.second.substringBefore(' '))
+ "return" -> Return(parseStringToDocNode(it.second))
+ "constructor" -> Constructor(parseStringToDocNode(it.second))
+ "receiver" -> Receiver(parseStringToDocNode(it.second))
+ "throws", "exception" -> Throws(parseStringToDocNode(it.second.substringAfter(' ')), it.second.substringBefore(' '))
+ "deprecated" -> Deprecated(parseStringToDocNode(it.second))
+ "sample" -> Sample(parseStringToDocNode(it.second.substringAfter(' ')), it.second.substringBefore(' '))
+ "suppress" -> Suppress(parseStringToDocNode(it.second))
+ else -> CustomTag(parseStringToDocNode(it.second), it.first)
+ }
+ }
+ return DocumentationNode(mappedList)
+ }
+
+ private fun jkdocToListOfPairs(javadoc: String): List<Pair<String, String>> =
+ "description $javadoc"
+ .split("\n@")
+ .map {
+ it.substringBefore(' ') to it.substringAfter(' ')
+ }
+} \ No newline at end of file
diff --git a/core/src/main/kotlin/parsers/factories/DocNodesFromIElementFactory.kt b/core/src/main/kotlin/parsers/factories/DocNodesFromIElementFactory.kt
new file mode 100644
index 00000000..a93be0d3
--- /dev/null
+++ b/core/src/main/kotlin/parsers/factories/DocNodesFromIElementFactory.kt
@@ -0,0 +1,36 @@
+package org.jetbrains.dokka.parsers.factories
+
+import model.doc.*
+import org.intellij.markdown.IElementType
+import org.intellij.markdown.MarkdownElementTypes
+import org.intellij.markdown.MarkdownTokenTypes
+import org.jetbrains.dokka.links.DRI
+import java.lang.NullPointerException
+
+object DocNodesFromIElementFactory {
+ fun getInstance(type: IElementType, children: List<DocNode> = emptyList(), params: Map<String, String> = emptyMap(), body: String? = null, dri: DRI? = null) =
+ when(type) {
+ MarkdownElementTypes.SHORT_REFERENCE_LINK,
+ MarkdownElementTypes.FULL_REFERENCE_LINK -> if(dri == null) A(children, params) else DocumentationLink(children, params, dri)
+ MarkdownElementTypes.STRONG -> B(children, params)
+ MarkdownElementTypes.BLOCK_QUOTE -> BlockQuote(children, params)
+ MarkdownElementTypes.CODE_SPAN,
+ MarkdownElementTypes.CODE_BLOCK,
+ MarkdownElementTypes.CODE_FENCE -> Code(children, params)
+ MarkdownElementTypes.ATX_1 -> H1(children, params)
+ MarkdownElementTypes.ATX_2 -> H2(children, params)
+ MarkdownElementTypes.ATX_3 -> H3(children, params)
+ MarkdownElementTypes.ATX_4 -> H4(children, params)
+ MarkdownElementTypes.ATX_5 -> H5(children, params)
+ MarkdownElementTypes.ATX_6 -> H6(children, params)
+ MarkdownElementTypes.EMPH -> I(children, params)
+ MarkdownElementTypes.IMAGE -> Img(children, params)
+ MarkdownElementTypes.LIST_ITEM -> Li(children, params)
+ MarkdownElementTypes.ORDERED_LIST -> Ol(children, params)
+ MarkdownElementTypes.UNORDERED_LIST -> Ul(children, params)
+ MarkdownElementTypes.PARAGRAPH -> P(children, params)
+ MarkdownTokenTypes.TEXT -> Text(children, params, body ?: throw NullPointerException("Text body should be at least empty string passed to DocNodes factory!"))
+ MarkdownTokenTypes.HORIZONTAL_RULE -> HorizontalRule()
+ else -> CustomNode(children, params)
+ }
+} \ No newline at end of file
diff --git a/core/src/main/kotlin/parsers/factories/DocNodesFromStringFactory.kt b/core/src/main/kotlin/parsers/factories/DocNodesFromStringFactory.kt
new file mode 100644
index 00000000..49102ed0
--- /dev/null
+++ b/core/src/main/kotlin/parsers/factories/DocNodesFromStringFactory.kt
@@ -0,0 +1,76 @@
+package org.jetbrains.dokka.parsers.factories
+
+import model.doc.*
+import org.jetbrains.dokka.links.DRI
+import java.lang.NullPointerException
+
+object DocNodesFromStringFactory {
+ fun getInstance(name: String, children: List<DocNode> = emptyList(), params: Map<String, String> = emptyMap(), body: String? = null, dri: DRI? = null) =
+ when(name) {
+ "a" -> A(children, params)
+ "big" -> Big(children, params)
+ "b" -> B(children, params)
+ "blockquote" -> BlockQuote(children, params)
+ "bite" -> Cite(children, params)
+ "bode" -> Code(children, params)
+ "dd" -> Dd(children, params)
+ "dfn" -> Dfn(children, params)
+ "dir" -> Dir(children, params)
+ "div" -> Div(children, params)
+ "dl" -> Dl(children, params)
+ "dt" -> Dt(children, params)
+ "Em" -> Em(children, params)
+ "font" -> Font(children, params)
+ "footer" -> Footer(children, params)
+ "frame" -> Frame(children, params)
+ "frameset" -> FrameSet(children, params)
+ "h1" -> H1(children, params)
+ "h2" -> H2(children, params)
+ "h3" -> H3(children, params)
+ "h4" -> H4(children, params)
+ "h5" -> H5(children, params)
+ "h6" -> H6(children, params)
+ "head" -> Head(children, params)
+ "header" -> Header(children, params)
+ "html" -> Html(children, params)
+ "i" -> I(children, params)
+ "iframe" -> IFrame(children, params)
+ "img" -> Img(children, params)
+ "input" -> Input(children, params)
+ "li" -> Li(children, params)
+ "link" -> Link(children, params)
+ "listing" -> Listing(children, params)
+ "main" -> Main(children, params)
+ "menu" -> Menu(children, params)
+ "meta" -> Meta(children, params)
+ "nav" -> Nav(children, params)
+ "noframes" -> NoFrames(children, params)
+ "noscript" -> NoScript(children, params)
+ "ol" -> Ol(children, params)
+ "p" -> P(children, params)
+ "pre" -> Pre(children, params)
+ "script" -> Script(children, params)
+ "section" -> Section(children, params)
+ "small" -> Small(children, params)
+ "span" -> Span(children, params)
+ "strong" -> Strong(children, params)
+ "sub" -> Sub(children, params)
+ "sup" -> Sup(children, params)
+ "table" -> Table(children, params)
+ "#text" -> Text(children, params, body ?: throw NullPointerException("Text body should be at least empty string passed to DocNodes factory!"))
+ "tBody" -> TBody(children, params)
+ "td" -> Td(children, params)
+ "tFoot" -> TFoot(children, params)
+ "th" -> Th(children, params)
+ "tHead" -> THead(children, params)
+ "title" -> Title(children, params)
+ "tr" -> Tr(children, params)
+ "tt" -> Tt(children, params)
+ "u" -> U(children, params)
+ "ul" -> Ul(children, params)
+ "var" -> Var(children, params)
+ "documentationlink" -> DocumentationLink(children, params, dri ?: throw NullPointerException("DRI cannot be passed null while constructing documentation link!"))
+ "hr" -> HorizontalRule()
+ else -> CustomNode(children, params)
+ }
+} \ No newline at end of file