diff options
author | Linnea Gräf <nea@nea.moe> | 2024-03-24 14:35:37 +0100 |
---|---|---|
committer | Linnea Gräf <nea@nea.moe> | 2024-03-24 14:35:37 +0100 |
commit | c57016dc5a1f502b637c2f77bcf724165387ef83 (patch) | |
tree | 40f82e94e0f3229d081860d893a0433244bcba4c /src/main/kotlin/moe/nea | |
download | blog-infra-c57016dc5a1f502b637c2f77bcf724165387ef83.tar.gz blog-infra-c57016dc5a1f502b637c2f77bcf724165387ef83.tar.bz2 blog-infra-c57016dc5a1f502b637c2f77bcf724165387ef83.zip |
Add markdown parser
Diffstat (limited to 'src/main/kotlin/moe/nea')
25 files changed, 446 insertions, 0 deletions
diff --git a/src/main/kotlin/moe/nea/blog/md/Begin.kt b/src/main/kotlin/moe/nea/blog/md/Begin.kt new file mode 100644 index 0000000..482c114 --- /dev/null +++ b/src/main/kotlin/moe/nea/blog/md/Begin.kt @@ -0,0 +1,8 @@ +package moe.nea.blog.md + +import java.io.PrintStream + +class Begin : MarkdownFormat { + override fun debugFormat(printStream: PrintStream) { + } +}
\ No newline at end of file diff --git a/src/main/kotlin/moe/nea/blog/md/BlockParser.kt b/src/main/kotlin/moe/nea/blog/md/BlockParser.kt new file mode 100644 index 0000000..2ca812f --- /dev/null +++ b/src/main/kotlin/moe/nea/blog/md/BlockParser.kt @@ -0,0 +1,7 @@ +package moe.nea.blog.md + +interface BlockParser { + fun detect(line: String): Boolean + fun parse(parser: MarkdownParser): MarkdownBlock + val prio: Int +}
\ No newline at end of file diff --git a/src/main/kotlin/moe/nea/blog/md/BlogPost.kt b/src/main/kotlin/moe/nea/blog/md/BlogPost.kt new file mode 100644 index 0000000..25ec13f --- /dev/null +++ b/src/main/kotlin/moe/nea/blog/md/BlogPost.kt @@ -0,0 +1,11 @@ +package moe.nea.blog.md + +import java.io.File + + +fun main() { + val parser = MarkdownParser(File("data/test.md").readText()) + parser.addDefaultParsers() + val doc = parser.readDocument() + doc.debugFormat(System.out) +}
\ No newline at end of file diff --git a/src/main/kotlin/moe/nea/blog/md/Bold.kt b/src/main/kotlin/moe/nea/blog/md/Bold.kt new file mode 100644 index 0000000..aa46622 --- /dev/null +++ b/src/main/kotlin/moe/nea/blog/md/Bold.kt @@ -0,0 +1,11 @@ +package moe.nea.blog.md + +import java.io.PrintStream + +data class Bold(val inner: MarkdownFormat) : MarkdownFormat { + override fun debugFormat(printStream: PrintStream) { + printStream.print("<b>") + inner.debugFormat(printStream) + printStream.print("</b>") + } +}
\ No newline at end of file diff --git a/src/main/kotlin/moe/nea/blog/md/CodeBlock.kt b/src/main/kotlin/moe/nea/blog/md/CodeBlock.kt new file mode 100644 index 0000000..2882b77 --- /dev/null +++ b/src/main/kotlin/moe/nea/blog/md/CodeBlock.kt @@ -0,0 +1,18 @@ +package moe.nea.blog.md + +import moe.nea.blog.util.indent +import java.io.PrintStream + +data class CodeBlock(val language: String, val lines: List<String>) : MarkdownBlock { + override fun debugFormat(indent: Int, printStream: PrintStream) { + printStream.indent(indent) + printStream.println("<code lang=$language>") + lines.forEach { + printStream.indent(indent) + printStream.println(it) + } + printStream.indent(indent) + printStream.println("</code>") + } +} + diff --git a/src/main/kotlin/moe/nea/blog/md/CodeBlockParser.kt b/src/main/kotlin/moe/nea/blog/md/CodeBlockParser.kt new file mode 100644 index 0000000..0c894fe --- /dev/null +++ b/src/main/kotlin/moe/nea/blog/md/CodeBlockParser.kt @@ -0,0 +1,23 @@ +package moe.nea.blog.md + +object CodeBlockParser : BlockParser { + override fun detect(line: String): Boolean { + return line.startsWith("```") + } + + override fun parse(parser: MarkdownParser): MarkdownBlock { + val language = parser.consumeLine()!!.removePrefix("```") + val lines = mutableListOf<String>() + while (true) { + val line = parser.consumeLine() ?: error("Unfinished code block") + if (line == "```") + break + lines.add(line) + } + return CodeBlock(language, lines) + } + + override val prio: Int + get() = 10 + +}
\ No newline at end of file diff --git a/src/main/kotlin/moe/nea/blog/md/Document.kt b/src/main/kotlin/moe/nea/blog/md/Document.kt new file mode 100644 index 0000000..ce83ef1 --- /dev/null +++ b/src/main/kotlin/moe/nea/blog/md/Document.kt @@ -0,0 +1,11 @@ +package moe.nea.blog.md + +import java.io.PrintStream + +data class Document( + val list: List<MarkdownBlock> +) : MarkdownElement { + fun debugFormat(printStream: PrintStream) { + list.forEach { it.debugFormat(0, printStream) } + } +}
\ No newline at end of file diff --git a/src/main/kotlin/moe/nea/blog/md/FormatSequence.kt b/src/main/kotlin/moe/nea/blog/md/FormatSequence.kt new file mode 100644 index 0000000..94b55b9 --- /dev/null +++ b/src/main/kotlin/moe/nea/blog/md/FormatSequence.kt @@ -0,0 +1,11 @@ +package moe.nea.blog.md + +import java.io.PrintStream + +data class FormatSequence(val list: List<MarkdownFormat>) : MarkdownFormat { + constructor(vararg list: MarkdownFormat) : this(list.toList()) + + override fun debugFormat(printStream: PrintStream) { + list.forEach { it.debugFormat(printStream) } + } +}
\ No newline at end of file diff --git a/src/main/kotlin/moe/nea/blog/md/Header.kt b/src/main/kotlin/moe/nea/blog/md/Header.kt new file mode 100644 index 0000000..fab2306 --- /dev/null +++ b/src/main/kotlin/moe/nea/blog/md/Header.kt @@ -0,0 +1,11 @@ +package moe.nea.blog.md + +import moe.nea.blog.util.indent +import java.io.PrintStream + +data class Header(val level: Int, val text: String) : MarkdownBlock { + override fun debugFormat(indent: Int, printStream: PrintStream) { + printStream.indent(indent) + printStream.println("header level=$level: $text") + } +}
\ No newline at end of file diff --git a/src/main/kotlin/moe/nea/blog/md/HeaderParser.kt b/src/main/kotlin/moe/nea/blog/md/HeaderParser.kt new file mode 100644 index 0000000..928cf58 --- /dev/null +++ b/src/main/kotlin/moe/nea/blog/md/HeaderParser.kt @@ -0,0 +1,17 @@ +package moe.nea.blog.md + +object HeaderParser : BlockParser { + override fun detect(line: String): Boolean { + return line.matches("#+ .*".toRegex()) + } + + override fun parse(parser: MarkdownParser): MarkdownBlock { + val line = parser.consumeLine()!! + val level = line.indexOf(" ") + val text = line.substring(level).trim() + return Header(level, text) + } + + override val prio: Int + get() = 10 +}
\ No newline at end of file diff --git a/src/main/kotlin/moe/nea/blog/md/InlineParser.kt b/src/main/kotlin/moe/nea/blog/md/InlineParser.kt new file mode 100644 index 0000000..4b60963 --- /dev/null +++ b/src/main/kotlin/moe/nea/blog/md/InlineParser.kt @@ -0,0 +1,7 @@ +package moe.nea.blog.md + +interface InlineParser { + fun detect(lookback: MarkdownFormat, rest: String): Boolean + val specialSyntax: Set<Char> + fun parse(parser: MarkdownParser, text: String): Pair<MarkdownFormat, String> +}
\ No newline at end of file diff --git a/src/main/kotlin/moe/nea/blog/md/Italics.kt b/src/main/kotlin/moe/nea/blog/md/Italics.kt new file mode 100644 index 0000000..47d8db2 --- /dev/null +++ b/src/main/kotlin/moe/nea/blog/md/Italics.kt @@ -0,0 +1,11 @@ +package moe.nea.blog.md + +import java.io.PrintStream + +data class Italics(val inner: MarkdownFormat) : MarkdownFormat { + override fun debugFormat(printStream: PrintStream) { + printStream.print("<i>") + inner.debugFormat(printStream) + printStream.print("</i>") + } +}
\ No newline at end of file diff --git a/src/main/kotlin/moe/nea/blog/md/ItalicsParser.kt b/src/main/kotlin/moe/nea/blog/md/ItalicsParser.kt new file mode 100644 index 0000000..afecf79 --- /dev/null +++ b/src/main/kotlin/moe/nea/blog/md/ItalicsParser.kt @@ -0,0 +1,91 @@ +package moe.nea.blog.md + +object ItalicsParser : InlineParser { + override fun detect(lookback: MarkdownFormat, rest: String): Boolean { + return lookback is Whitespace && "\\*+[^ ].*".toRegex().matches(rest) + } + + override val specialSyntax: Set<Char> + get() = setOf('*') + + override fun parse(parser: MarkdownParser, text: String): Pair<MarkdownFormat, String> { + var firstStarCount = 0 + while (firstStarCount in text.indices) { + if (text[firstStarCount] != '*') break + firstStarCount++ + } + + if (firstStarCount < 1 || firstStarCount > 3) error("Invalid italics/bold sequence") + + val isBold = firstStarCount >= 2 + val isItalics = (firstStarCount % 2) == 1 + + val firstSequence = mutableListOf<MarkdownFormat>() + var remainingText = text.substring(firstStarCount) + var lastToken: MarkdownFormat = Begin() // TODO: dedicated begin token + while (true) { + val (element, next) = parser.parseInlineTextOnce(lastToken, remainingText) + remainingText = next + lastToken = element + firstSequence.add(element) + if (element !is Whitespace && next.startsWith("*")) break + } + + var secondStarCount = 0 + while (secondStarCount in remainingText.indices) { + if (remainingText[secondStarCount] != '*') break + secondStarCount++ + } + + if (secondStarCount > firstStarCount) + secondStarCount = firstStarCount + if (secondStarCount < 1) error("Invalid italics/bold sequence") + + remainingText = remainingText.substring(secondStarCount) + var firstElement = parser.collapseInlineFormat(firstSequence) + if (secondStarCount == 2) + firstElement = Bold(firstElement) + if (secondStarCount == 1) + firstElement = Italics(firstElement) + if (secondStarCount == firstStarCount) { + if (isBold) + firstElement = Bold(firstElement) + if (isItalics) + firstElement = Italics(firstElement) + return Pair(firstElement, remainingText) + } + + val secondSequence = mutableListOf<MarkdownFormat>() + lastToken = Begin() + while (true) { + val (element, next) = parser.parseInlineTextOnce(lastToken, remainingText) + remainingText = next + lastToken = element + secondSequence.add(element) + if (element !is Whitespace && next.startsWith("*")) break + } + + var thirdStarCount = 0 + while (thirdStarCount in remainingText.indices) { + if (remainingText[thirdStarCount] != '*') break + thirdStarCount++ + } + + if (thirdStarCount > firstStarCount - secondStarCount) + thirdStarCount = firstStarCount + + remainingText = remainingText.substring(thirdStarCount) + + if (thirdStarCount != firstStarCount - secondStarCount) { + error("Invalid italics/bold sequence") + } + var secondElement = parser.collapseInlineFormat(secondSequence) + var combined: MarkdownFormat = FormatSequence(firstElement, secondElement) + if (thirdStarCount == 1) + combined = Italics(combined) + if (thirdStarCount == 2) + combined = Bold(combined) + return Pair(combined, remainingText) + } + +}
\ No newline at end of file diff --git a/src/main/kotlin/moe/nea/blog/md/Link.kt b/src/main/kotlin/moe/nea/blog/md/Link.kt new file mode 100644 index 0000000..f8fc858 --- /dev/null +++ b/src/main/kotlin/moe/nea/blog/md/Link.kt @@ -0,0 +1,11 @@ +package moe.nea.blog.md + +import java.io.PrintStream + +data class Link(val label: MarkdownFormat?, val target: String) : MarkdownFormat { + override fun debugFormat(printStream: PrintStream) { + printStream.print("<link target=$target>") + label?.debugFormat(printStream) + printStream.print("</link>") + } +}
\ No newline at end of file diff --git a/src/main/kotlin/moe/nea/blog/md/MarkdownBlock.kt b/src/main/kotlin/moe/nea/blog/md/MarkdownBlock.kt new file mode 100644 index 0000000..fc432ef --- /dev/null +++ b/src/main/kotlin/moe/nea/blog/md/MarkdownBlock.kt @@ -0,0 +1,7 @@ +package moe.nea.blog.md + +import java.io.PrintStream + +interface MarkdownBlock : MarkdownElement { + fun debugFormat(indent: Int, printStream: PrintStream) +}
\ No newline at end of file diff --git a/src/main/kotlin/moe/nea/blog/md/MarkdownElement.kt b/src/main/kotlin/moe/nea/blog/md/MarkdownElement.kt new file mode 100644 index 0000000..74aac6d --- /dev/null +++ b/src/main/kotlin/moe/nea/blog/md/MarkdownElement.kt @@ -0,0 +1,3 @@ +package moe.nea.blog.md + +interface MarkdownElement {}
\ No newline at end of file diff --git a/src/main/kotlin/moe/nea/blog/md/MarkdownFormat.kt b/src/main/kotlin/moe/nea/blog/md/MarkdownFormat.kt new file mode 100644 index 0000000..d647854 --- /dev/null +++ b/src/main/kotlin/moe/nea/blog/md/MarkdownFormat.kt @@ -0,0 +1,7 @@ +package moe.nea.blog.md + +import java.io.PrintStream + +interface MarkdownFormat : MarkdownElement { + fun debugFormat(printStream: PrintStream) +}
\ No newline at end of file diff --git a/src/main/kotlin/moe/nea/blog/md/MarkdownParser.kt b/src/main/kotlin/moe/nea/blog/md/MarkdownParser.kt new file mode 100644 index 0000000..86b92d2 --- /dev/null +++ b/src/main/kotlin/moe/nea/blog/md/MarkdownParser.kt @@ -0,0 +1,96 @@ +package moe.nea.blog.md + +import moe.nea.blog.util.indentSize +import java.util.* + + +class MarkdownParser(source: String) { + private val lines = source.lines() + private var lineIndex = 0 + private var blockIndents = 0 + private val indentStack = Stack<Int>() + + private val blockParsers = mutableListOf<BlockParser>() + private val inlineParsers = mutableListOf<InlineParser>() + + fun findParserFor(line: String): BlockParser? { + return blockParsers.filter { it.detect(line) } + .maxByOrNull { it.prio } + } + + fun readChildBlock(): MarkdownBlock? { + val peek = peekLine() ?: return null + val blockParser = findParserFor(peek) ?: ParagraphParser + return blockParser.parse(this) + } + + fun pushIndent(newIndent: Int) { + require(newIndent > blockIndents) + indentStack.push(blockIndents) + blockIndents = newIndent + } + + fun popIndent() { + blockIndents = indentStack.pop() + } + + fun consumeLine(): String? { + val line = peekLine() + if (line != null) + lineIndex++ + return line + } + + fun peekLine(): String? { + if (lineIndex !in lines.indices) return null + val line = lines[lineIndex] + val indent = line.indentSize() + if (indent != null && indent < blockIndents) + return null + return line.substring(blockIndents) + } + + fun parseInlineTextOnce(lookback: MarkdownFormat, text: String): Pair<MarkdownFormat, String> { + val parser = inlineParsers.find { it.detect(lookback, text) } + if (parser != null) + return parser.parse(this, text) + require(!text.isEmpty()) // TODO handle empty string + if (text[0] == ' ') + return Pair(Whitespace(), text.substring(1)) + val nextSpecial = text.indexOfFirst { it in inlineParsers.flatMap { it.specialSyntax } || it == ' ' } + return Pair(Word(text.substring(0, nextSpecial)), text.substring(nextSpecial)) + } + + fun parseInlineText(text: String): MarkdownFormat { + val seq = mutableListOf<MarkdownFormat>() + var remaining = text + var lastToken: MarkdownFormat = Begin() + while (remaining.isNotEmpty()) { + val (tok, next) = parseInlineTextOnce(lastToken, remaining) + seq.add(tok) + lastToken = tok + remaining = next + } + return collapseInlineFormat(seq) + } + + fun collapseInlineFormat(sequence: List<MarkdownFormat>): MarkdownFormat { + return FormatSequence(sequence) + } + + fun readDocument(): Document { + val list = mutableListOf<MarkdownBlock>() + while (true) { + val block = readChildBlock() ?: break + list.add(block) + } + return Document(list) + } + + fun addDefaultParsers() { + blockParsers.add(CodeBlockParser) + blockParsers.add(HeaderParser) + inlineParsers.add(ItalicsParser) + } +} + diff --git a/src/main/kotlin/moe/nea/blog/md/Paragraph.kt b/src/main/kotlin/moe/nea/blog/md/Paragraph.kt new file mode 100644 index 0000000..54f688b --- /dev/null +++ b/src/main/kotlin/moe/nea/blog/md/Paragraph.kt @@ -0,0 +1,12 @@ +package moe.nea.blog.md + +import moe.nea.blog.util.indent +import java.io.PrintStream + +data class Paragraph(val format: MarkdownFormat) : MarkdownBlock { + override fun debugFormat(indent: Int, printStream: PrintStream) { + printStream.indent(indent) + format.debugFormat(printStream) + printStream.println() + } +}
\ No newline at end of file diff --git a/src/main/kotlin/moe/nea/blog/md/ParagraphParser.kt b/src/main/kotlin/moe/nea/blog/md/ParagraphParser.kt new file mode 100644 index 0000000..328c49b --- /dev/null +++ b/src/main/kotlin/moe/nea/blog/md/ParagraphParser.kt @@ -0,0 +1,31 @@ +package moe.nea.blog.md + +object ParagraphParser : BlockParser { + override fun detect(line: String): Boolean { + return true + } + + override fun parse(parser: MarkdownParser): MarkdownBlock { + var text = "" + var emptyLineCount = 0 + while (true) { + val line = parser.peekLine() ?: break + if (parser.findParserFor(line) != null) break + val trimmedLine = parser.consumeLine()!!.trim() + if (trimmedLine == "") { + emptyLineCount++ + } else { + emptyLineCount = 0 + } + if (emptyLineCount == 2) { + break + } + text += " $trimmedLine" + } + return Paragraph(parser.parseInlineText(text)) + } + + override val prio: Int + get() = -10 + +}
\ No newline at end of file diff --git a/src/main/kotlin/moe/nea/blog/md/Text.kt b/src/main/kotlin/moe/nea/blog/md/Text.kt new file mode 100644 index 0000000..fb89ff3 --- /dev/null +++ b/src/main/kotlin/moe/nea/blog/md/Text.kt @@ -0,0 +1,9 @@ +package moe.nea.blog.md + +import java.io.PrintStream + +data class Text(val text: String) : MarkdownFormat { + override fun debugFormat(printStream: PrintStream) { + printStream.print(printStream) + } +}
\ No newline at end of file diff --git a/src/main/kotlin/moe/nea/blog/md/Whitespace.kt b/src/main/kotlin/moe/nea/blog/md/Whitespace.kt new file mode 100644 index 0000000..6a44152 --- /dev/null +++ b/src/main/kotlin/moe/nea/blog/md/Whitespace.kt @@ -0,0 +1,9 @@ +package moe.nea.blog.md + +import java.io.PrintStream + +class Whitespace : MarkdownFormat { + override fun debugFormat(printStream: PrintStream) { + printStream.print(" ") + } +}
\ No newline at end of file diff --git a/src/main/kotlin/moe/nea/blog/md/Word.kt b/src/main/kotlin/moe/nea/blog/md/Word.kt new file mode 100644 index 0000000..ff89af5 --- /dev/null +++ b/src/main/kotlin/moe/nea/blog/md/Word.kt @@ -0,0 +1,9 @@ +package moe.nea.blog.md + +import java.io.PrintStream + +data class Word(val text: String) : MarkdownFormat { + override fun debugFormat(printStream: PrintStream) { + printStream.print(text) + } +}
\ No newline at end of file diff --git a/src/main/kotlin/moe/nea/blog/util/Indent.kt b/src/main/kotlin/moe/nea/blog/util/Indent.kt new file mode 100644 index 0000000..5a61a48 --- /dev/null +++ b/src/main/kotlin/moe/nea/blog/util/Indent.kt @@ -0,0 +1,5 @@ +package moe.nea.blog.util + +fun Appendable.indent(indent: Int) { + append(" ".repeat(indent)) +}
\ No newline at end of file diff --git a/src/main/kotlin/moe/nea/blog/util/IndentSize.kt b/src/main/kotlin/moe/nea/blog/util/IndentSize.kt new file mode 100644 index 0000000..de6a91a --- /dev/null +++ b/src/main/kotlin/moe/nea/blog/util/IndentSize.kt @@ -0,0 +1,10 @@ +package moe.nea.blog.util + +fun String.indentSize(): Int? { + var i = 0 + while (i in indices) { + if (this[i] != ' ') return i + i++ + } + return null +}
\ No newline at end of file |