diff options
author | BarkingBad <32793002+BarkingBad@users.noreply.github.com> | 2019-12-13 14:01:25 +0100 |
---|---|---|
committer | Kamil Doległo <kamilok1965@interia.pl> | 2019-12-13 14:02:13 +0100 |
commit | dd017a44ed7baae83f4f09a92d9691231f424eaa (patch) | |
tree | ad9a7b6634ff4e4ead43122b13b0fb6dcdfcea85 /core/src/main/kotlin/parsers | |
parent | 0900b0f1c3a593301a6229ce93a23b8228771d24 (diff) | |
download | dokka-dd017a44ed7baae83f4f09a92d9691231f424eaa.tar.gz dokka-dd017a44ed7baae83f4f09a92d9691231f424eaa.tar.bz2 dokka-dd017a44ed7baae83f4f09a92d9691231f424eaa.zip |
Add abstract structure for MD/HTML comments and MD parser
Diffstat (limited to 'core/src/main/kotlin/parsers')
5 files changed, 451 insertions, 0 deletions
diff --git a/core/src/main/kotlin/parsers/HtmlParser.kt b/core/src/main/kotlin/parsers/HtmlParser.kt new file mode 100644 index 00000000..30f882b1 --- /dev/null +++ b/core/src/main/kotlin/parsers/HtmlParser.kt @@ -0,0 +1,89 @@ +package parsers + +import model.doc.* +import org.jetbrains.dokka.parsers.factories.DocNodesFromStringFactory +import org.jsoup.Jsoup +import org.jsoup.nodes.Node +import org.jsoup.select.NodeFilter +import org.jsoup.select.NodeTraversor + +class HtmlParser : Parser() { + + inner class NodeFilterImpl : NodeFilter { + + private val nodesCache: MutableMap<Int, MutableList<DocNode>> = mutableMapOf() + private var currentDepth = 0 + + fun collect(): DocNode = nodesCache[currentDepth]!![0] + + override fun tail(node: Node?, depth: Int): NodeFilter.FilterResult { + val nodeName = node!!.nodeName() + val nodeAttributes = node.attributes() + + if(nodeName in listOf("#document", "html", "head")) + return NodeFilter.FilterResult.CONTINUE + + val body: String + val params: Map<String, String> + + + if(nodeName != "#text") { + body = "" + params = nodeAttributes.map { it.key to it.value }.toMap() + } else { + body = nodeAttributes["#text"] + params = emptyMap() + } + + val docNode = if(depth < currentDepth) { + DocNodesFromStringFactory.getInstance(nodeName, nodesCache.getOrDefault(currentDepth, mutableListOf()).toList(), params, body).also { + nodesCache[currentDepth] = mutableListOf() + currentDepth = depth + } + } else { + DocNodesFromStringFactory.getInstance(nodeName, emptyList(), params, body) + } + + nodesCache.getOrDefault(depth, mutableListOf()) += docNode + return NodeFilter.FilterResult.CONTINUE + } + + override fun head(node: Node?, depth: Int): NodeFilter.FilterResult { + + val nodeName = node!!.nodeName() + + if(currentDepth < depth) { + currentDepth = depth + nodesCache[currentDepth] = mutableListOf() + } + + if(nodeName in listOf("#document", "html", "head")) + return NodeFilter.FilterResult.CONTINUE + + return NodeFilter.FilterResult.CONTINUE + } + } + + + private fun htmlToDocNode(string: String): DocNode { + val document = Jsoup.parse(string) + val nodeFilterImpl = NodeFilterImpl() + NodeTraversor.filter(nodeFilterImpl, document.root()) + return nodeFilterImpl.collect() + } + + private fun replaceLinksWithHrefs(javadoc: String): String = Regex("\\{@link .*?}").replace(javadoc) { + val split = it.value.dropLast(1).split(" ") + if(split.size !in listOf(2, 3)) + return@replace it.value + if(split.size == 3) + return@replace "<documentationlink href=\"${split[1]}\">${split[2]}</documentationlink>" + else + return@replace "<documentationlink href=\"${split[1]}\">${split[1]}</documentationlink>" + } + + override fun parseStringToDocNode(extractedString: String) = htmlToDocNode(extractedString) + override fun preparse(text: String) = replaceLinksWithHrefs(text) +} + + diff --git a/core/src/main/kotlin/parsers/MarkdownParser.kt b/core/src/main/kotlin/parsers/MarkdownParser.kt new file mode 100644 index 00000000..44c917e3 --- /dev/null +++ b/core/src/main/kotlin/parsers/MarkdownParser.kt @@ -0,0 +1,206 @@ +package parsers + +import model.doc.* +import org.intellij.markdown.MarkdownElementTypes +import org.intellij.markdown.MarkdownTokenTypes +import org.intellij.markdown.ast.ASTNode +import org.intellij.markdown.ast.impl.ListItemCompositeNode +import org.intellij.markdown.flavours.commonmark.CommonMarkFlavourDescriptor +import org.jetbrains.dokka.analysis.DokkaResolutionFacade +import org.jetbrains.dokka.links.DRI +import org.jetbrains.dokka.parsers.factories.DocNodesFromIElementFactory +import org.jetbrains.kotlin.descriptors.DeclarationDescriptor +import org.jetbrains.kotlin.idea.kdoc.resolveKDocLink +import org.jetbrains.kotlin.kdoc.parser.KDocKnownTag +import org.jetbrains.kotlin.kdoc.psi.impl.KDocTag +import org.intellij.markdown.parser.MarkdownParser as IntellijMarkdownParser + +class MarkdownParser ( + private val resolutionFacade: DokkaResolutionFacade, + private val declarationDescriptor: DeclarationDescriptor + ) : Parser() { + + inner class MarkdownVisitor(val text: String) { + + private fun headersHandler(node: ASTNode): DocNode = + DocNodesFromIElementFactory.getInstance(node.type, visitNode(node.children.find { it.type == MarkdownTokenTypes.ATX_CONTENT }!!).children.drop(1)) + + private fun horizontalRulesHandler(node: ASTNode): DocNode = + DocNodesFromIElementFactory.getInstance(MarkdownTokenTypes.HORIZONTAL_RULE) + + private fun emphasisHandler(node: ASTNode): DocNode = + DocNodesFromIElementFactory.getInstance(node.type, children = listOf(visitNode(node.children[node.children.size/2]))) + + private fun blockquotesHandler(node: ASTNode): DocNode = + DocNodesFromIElementFactory.getInstance(node.type, children = node.children.drop(1).map { visitNode(it) }) + + private fun listsHandler(node: ASTNode): DocNode { + + val children = node.children.filterIsInstance<ListItemCompositeNode>().flatMap { + if( it.children.last().type in listOf(MarkdownElementTypes.ORDERED_LIST, MarkdownElementTypes.UNORDERED_LIST) ) { + val nestedList = it.children.last() + (it.children as MutableList).removeAt(it.children.lastIndex) + listOf(it, nestedList) + } + else + listOf(it) + } + + return DocNodesFromIElementFactory.getInstance( + node.type, + children = + children + .map { + if(it.type == MarkdownElementTypes.LIST_ITEM) + DocNodesFromIElementFactory.getInstance( + it.type, + children = it + .children + .drop(1) + .filter { it.type !in listOf(MarkdownTokenTypes.WHITE_SPACE, MarkdownTokenTypes.EOL) } + .map { visitNode(it) } + ) + else + visitNode(it) + }, + params = + if (node.type == MarkdownElementTypes.ORDERED_LIST) { + val listNumberNode = node.children.first().children.first() + mapOf("start" to text.substring(listNumberNode.startOffset, listNumberNode.endOffset).dropLast(2)) + } else + emptyMap() + ) + } + + private fun linksHandler(node: ASTNode): DocNode { + val linkNode = node.children.find { it.type == MarkdownElementTypes.LINK_LABEL }!! + val link = text.substring(linkNode.startOffset+1, linkNode.endOffset-1) + + val dri: DRI? = if (link.startsWith("http") || link.startsWith("www")) { + null + } else { + DRI.from( + resolveKDocLink( + resolutionFacade.resolveSession.bindingContext, + resolutionFacade, + declarationDescriptor, + null, + link.split('.') + ).single() + ) + } + val href = mapOf("href" to link) + return when (node.type) { + MarkdownElementTypes.FULL_REFERENCE_LINK -> DocNodesFromIElementFactory.getInstance(node.type, params = href, children = node.children.find { it.type == MarkdownElementTypes.LINK_TEXT }!!.children.drop(1).dropLast(1).map { visitNode(it) }, dri = dri) + else -> DocNodesFromIElementFactory.getInstance(node.type, params = href, children = listOf(visitNode(linkNode)), dri = dri) + } + } + + private fun imagesHandler(node: ASTNode): DocNode { + val linkNode = node.children.last().children.find { it.type == MarkdownElementTypes.LINK_LABEL }!!.children[1] + val link = text.substring(linkNode.startOffset, linkNode.endOffset) + val src = mapOf("src" to link) + return DocNodesFromIElementFactory.getInstance(node.type, params = src, children = listOf(visitNode(node.children.last().children.find { it.type == MarkdownElementTypes.LINK_TEXT }!!))) + } + + private fun codeSpansHandler(node: ASTNode): DocNode = + DocNodesFromIElementFactory.getInstance( + node.type, + children = listOf( + DocNodesFromIElementFactory.getInstance( + MarkdownTokenTypes.TEXT, + body = text.substring(node.startOffset+1, node.endOffset-1).replace('\n', ' ').trimIndent() + ) + + ) + ) + + private fun codeFencesHandler(node: ASTNode): DocNode = + DocNodesFromIElementFactory.getInstance( + node.type, + children = node + .children + .filter { it.type == MarkdownTokenTypes.CODE_FENCE_CONTENT } + .map { visitNode(it) }, + params = node + .children + .find { it.type == MarkdownTokenTypes.FENCE_LANG } + ?.let { mapOf("lang" to text.substring(it.startOffset, it.endOffset)) } + ?: emptyMap() + ) + + private fun codeBlocksHandler(node: ASTNode): DocNode = + DocNodesFromIElementFactory.getInstance(node.type, children = node.children.map { visitNode(it) }) + + private fun defaultHandler(node: ASTNode): DocNode = + DocNodesFromIElementFactory.getInstance(MarkdownElementTypes.PARAGRAPH, children = node.children.map { visitNode(it) }) + + fun visitNode(node: ASTNode): DocNode = + when (node.type) { + MarkdownElementTypes.ATX_1, + MarkdownElementTypes.ATX_2, + MarkdownElementTypes.ATX_3, + MarkdownElementTypes.ATX_4, + MarkdownElementTypes.ATX_5, + MarkdownElementTypes.ATX_6 -> headersHandler(node) + MarkdownTokenTypes.HORIZONTAL_RULE -> horizontalRulesHandler(node) + MarkdownElementTypes.STRONG, + MarkdownElementTypes.EMPH -> emphasisHandler(node) + MarkdownElementTypes.FULL_REFERENCE_LINK, + MarkdownElementTypes.SHORT_REFERENCE_LINK -> linksHandler(node) + MarkdownElementTypes.BLOCK_QUOTE -> blockquotesHandler(node) + MarkdownElementTypes.UNORDERED_LIST, + MarkdownElementTypes.ORDERED_LIST -> listsHandler(node) + MarkdownElementTypes.CODE_BLOCK -> codeBlocksHandler(node) + MarkdownElementTypes.CODE_FENCE -> codeFencesHandler(node) + MarkdownElementTypes.CODE_SPAN -> codeSpansHandler(node) + MarkdownElementTypes.IMAGE -> imagesHandler(node) + MarkdownTokenTypes.EOL -> DocNodesFromIElementFactory.getInstance(MarkdownTokenTypes.TEXT, body = "\n") + MarkdownTokenTypes.WHITE_SPACE -> DocNodesFromIElementFactory.getInstance(MarkdownTokenTypes.TEXT, body = " ") + MarkdownTokenTypes.CODE_FENCE_CONTENT, + MarkdownTokenTypes.CODE_LINE, + MarkdownTokenTypes.TEXT -> DocNodesFromIElementFactory.getInstance(MarkdownTokenTypes.TEXT, body = text.substring(node.startOffset, node.endOffset)) + else -> defaultHandler(node) + } + } + + private fun markdownToDocNode(text: String): DocNode { + + val flavourDescriptor = CommonMarkFlavourDescriptor() + val markdownAstRoot: ASTNode = IntellijMarkdownParser(flavourDescriptor).buildMarkdownTreeFromString(text) + + return MarkdownVisitor(text).visitNode(markdownAstRoot) + } + + override fun parseStringToDocNode(extractedString: String) = markdownToDocNode(extractedString) + override fun preparse(text: String) = text + + fun parseFromKDocTag(kDocTag: KDocTag?): DocumentationNode { + return if(kDocTag == null) + DocumentationNode(emptyList()) + else + DocumentationNode( + (listOf(kDocTag) + kDocTag.children).filterIsInstance<KDocTag>().map { + when( it.knownTag ) { + null -> Description(parseStringToDocNode(it.getContent())) + KDocKnownTag.AUTHOR -> Author(parseStringToDocNode(it.getContent())) + KDocKnownTag.THROWS -> Throws(parseStringToDocNode(it.getContent()), it.getSubjectName()!!) + KDocKnownTag.EXCEPTION -> Throws(parseStringToDocNode(it.getContent()), it.getSubjectName()!!) + KDocKnownTag.PARAM -> Param(parseStringToDocNode(it.getContent()), it.getSubjectName()!!) + KDocKnownTag.RECEIVER -> Receiver(parseStringToDocNode(it.getContent())) + KDocKnownTag.RETURN -> Return(parseStringToDocNode(it.getContent())) + KDocKnownTag.SEE -> See(parseStringToDocNode(it.getContent()), it.getSubjectName()!!) + KDocKnownTag.SINCE -> Since(parseStringToDocNode(it.getContent())) + KDocKnownTag.CONSTRUCTOR -> Constructor(parseStringToDocNode(it.getContent())) + KDocKnownTag.PROPERTY -> Property(parseStringToDocNode(it.getContent()), it.getSubjectName()!!) + KDocKnownTag.SAMPLE -> Sample(parseStringToDocNode(it.getContent()), it.getSubjectName()!!) + KDocKnownTag.SUPPRESS -> Suppress(parseStringToDocNode(it.getContent())) + } + } + ) + } + + + + +}
\ No newline at end of file diff --git a/core/src/main/kotlin/parsers/Parser.kt b/core/src/main/kotlin/parsers/Parser.kt new file mode 100644 index 00000000..a2a90dcd --- /dev/null +++ b/core/src/main/kotlin/parsers/Parser.kt @@ -0,0 +1,44 @@ +package parsers + +import model.doc.* +import model.doc.Deprecated + + +abstract class Parser { + + abstract fun parseStringToDocNode(extractedString: String): DocNode + abstract fun preparse(text: String): String + + fun parse(text: String): DocumentationNode { + + val list = jkdocToListOfPairs(preparse(text)) + + val mappedList: List<DocType> = list.map { + when(it.first) { + "description" -> Description(parseStringToDocNode(it.second)) + "author" -> Author(parseStringToDocNode(it.second)) + "version" -> Version(parseStringToDocNode(it.second)) + "since" -> Since(parseStringToDocNode(it.second)) + "see" -> See(parseStringToDocNode(it.second.substringAfter(' ')), it.second.substringBefore(' ')) + "param" -> Param(parseStringToDocNode(it.second.substringAfter(' ')), it.second.substringBefore(' ')) + "property" -> Property(parseStringToDocNode(it.second.substringAfter(' ')), it.second.substringBefore(' ')) + "return" -> Return(parseStringToDocNode(it.second)) + "constructor" -> Constructor(parseStringToDocNode(it.second)) + "receiver" -> Receiver(parseStringToDocNode(it.second)) + "throws", "exception" -> Throws(parseStringToDocNode(it.second.substringAfter(' ')), it.second.substringBefore(' ')) + "deprecated" -> Deprecated(parseStringToDocNode(it.second)) + "sample" -> Sample(parseStringToDocNode(it.second.substringAfter(' ')), it.second.substringBefore(' ')) + "suppress" -> Suppress(parseStringToDocNode(it.second)) + else -> CustomTag(parseStringToDocNode(it.second), it.first) + } + } + return DocumentationNode(mappedList) + } + + private fun jkdocToListOfPairs(javadoc: String): List<Pair<String, String>> = + "description $javadoc" + .split("\n@") + .map { + it.substringBefore(' ') to it.substringAfter(' ') + } +}
\ No newline at end of file diff --git a/core/src/main/kotlin/parsers/factories/DocNodesFromIElementFactory.kt b/core/src/main/kotlin/parsers/factories/DocNodesFromIElementFactory.kt new file mode 100644 index 00000000..a93be0d3 --- /dev/null +++ b/core/src/main/kotlin/parsers/factories/DocNodesFromIElementFactory.kt @@ -0,0 +1,36 @@ +package org.jetbrains.dokka.parsers.factories + +import model.doc.* +import org.intellij.markdown.IElementType +import org.intellij.markdown.MarkdownElementTypes +import org.intellij.markdown.MarkdownTokenTypes +import org.jetbrains.dokka.links.DRI +import java.lang.NullPointerException + +object DocNodesFromIElementFactory { + fun getInstance(type: IElementType, children: List<DocNode> = emptyList(), params: Map<String, String> = emptyMap(), body: String? = null, dri: DRI? = null) = + when(type) { + MarkdownElementTypes.SHORT_REFERENCE_LINK, + MarkdownElementTypes.FULL_REFERENCE_LINK -> if(dri == null) A(children, params) else DocumentationLink(children, params, dri) + MarkdownElementTypes.STRONG -> B(children, params) + MarkdownElementTypes.BLOCK_QUOTE -> BlockQuote(children, params) + MarkdownElementTypes.CODE_SPAN, + MarkdownElementTypes.CODE_BLOCK, + MarkdownElementTypes.CODE_FENCE -> Code(children, params) + MarkdownElementTypes.ATX_1 -> H1(children, params) + MarkdownElementTypes.ATX_2 -> H2(children, params) + MarkdownElementTypes.ATX_3 -> H3(children, params) + MarkdownElementTypes.ATX_4 -> H4(children, params) + MarkdownElementTypes.ATX_5 -> H5(children, params) + MarkdownElementTypes.ATX_6 -> H6(children, params) + MarkdownElementTypes.EMPH -> I(children, params) + MarkdownElementTypes.IMAGE -> Img(children, params) + MarkdownElementTypes.LIST_ITEM -> Li(children, params) + MarkdownElementTypes.ORDERED_LIST -> Ol(children, params) + MarkdownElementTypes.UNORDERED_LIST -> Ul(children, params) + MarkdownElementTypes.PARAGRAPH -> P(children, params) + MarkdownTokenTypes.TEXT -> Text(children, params, body ?: throw NullPointerException("Text body should be at least empty string passed to DocNodes factory!")) + MarkdownTokenTypes.HORIZONTAL_RULE -> HorizontalRule() + else -> CustomNode(children, params) + } +}
\ No newline at end of file diff --git a/core/src/main/kotlin/parsers/factories/DocNodesFromStringFactory.kt b/core/src/main/kotlin/parsers/factories/DocNodesFromStringFactory.kt new file mode 100644 index 00000000..49102ed0 --- /dev/null +++ b/core/src/main/kotlin/parsers/factories/DocNodesFromStringFactory.kt @@ -0,0 +1,76 @@ +package org.jetbrains.dokka.parsers.factories + +import model.doc.* +import org.jetbrains.dokka.links.DRI +import java.lang.NullPointerException + +object DocNodesFromStringFactory { + fun getInstance(name: String, children: List<DocNode> = emptyList(), params: Map<String, String> = emptyMap(), body: String? = null, dri: DRI? = null) = + when(name) { + "a" -> A(children, params) + "big" -> Big(children, params) + "b" -> B(children, params) + "blockquote" -> BlockQuote(children, params) + "bite" -> Cite(children, params) + "bode" -> Code(children, params) + "dd" -> Dd(children, params) + "dfn" -> Dfn(children, params) + "dir" -> Dir(children, params) + "div" -> Div(children, params) + "dl" -> Dl(children, params) + "dt" -> Dt(children, params) + "Em" -> Em(children, params) + "font" -> Font(children, params) + "footer" -> Footer(children, params) + "frame" -> Frame(children, params) + "frameset" -> FrameSet(children, params) + "h1" -> H1(children, params) + "h2" -> H2(children, params) + "h3" -> H3(children, params) + "h4" -> H4(children, params) + "h5" -> H5(children, params) + "h6" -> H6(children, params) + "head" -> Head(children, params) + "header" -> Header(children, params) + "html" -> Html(children, params) + "i" -> I(children, params) + "iframe" -> IFrame(children, params) + "img" -> Img(children, params) + "input" -> Input(children, params) + "li" -> Li(children, params) + "link" -> Link(children, params) + "listing" -> Listing(children, params) + "main" -> Main(children, params) + "menu" -> Menu(children, params) + "meta" -> Meta(children, params) + "nav" -> Nav(children, params) + "noframes" -> NoFrames(children, params) + "noscript" -> NoScript(children, params) + "ol" -> Ol(children, params) + "p" -> P(children, params) + "pre" -> Pre(children, params) + "script" -> Script(children, params) + "section" -> Section(children, params) + "small" -> Small(children, params) + "span" -> Span(children, params) + "strong" -> Strong(children, params) + "sub" -> Sub(children, params) + "sup" -> Sup(children, params) + "table" -> Table(children, params) + "#text" -> Text(children, params, body ?: throw NullPointerException("Text body should be at least empty string passed to DocNodes factory!")) + "tBody" -> TBody(children, params) + "td" -> Td(children, params) + "tFoot" -> TFoot(children, params) + "th" -> Th(children, params) + "tHead" -> THead(children, params) + "title" -> Title(children, params) + "tr" -> Tr(children, params) + "tt" -> Tt(children, params) + "u" -> U(children, params) + "ul" -> Ul(children, params) + "var" -> Var(children, params) + "documentationlink" -> DocumentationLink(children, params, dri ?: throw NullPointerException("DRI cannot be passed null while constructing documentation link!")) + "hr" -> HorizontalRule() + else -> CustomNode(children, params) + } +}
\ No newline at end of file |