diff options
author | Ilya Ryzhenkov <orangy@jetbrains.com> | 2014-09-25 22:20:58 +0400 |
---|---|---|
committer | Ilya Ryzhenkov <orangy@jetbrains.com> | 2014-09-25 22:20:58 +0400 |
commit | f7bab78839cea5674658a6a0298f88ef5ccca019 (patch) | |
tree | 51d17c02f8f935b26a9c9d85905fc33c18ebd6a1 /src/Markdown/markdown.leg | |
parent | a070217e942a16ed7b5ee63e98a4183788c8e660 (diff) | |
download | dokka-f7bab78839cea5674658a6a0298f88ef5ccca019.tar.gz dokka-f7bab78839cea5674658a6a0298f88ef5ccca019.tar.bz2 dokka-f7bab78839cea5674658a6a0298f88ef5ccca019.zip |
Markdown, sections, styles and lots more.
Diffstat (limited to 'src/Markdown/markdown.leg')
-rw-r--r-- | src/Markdown/markdown.leg | 781 |
1 files changed, 781 insertions, 0 deletions
diff --git a/src/Markdown/markdown.leg b/src/Markdown/markdown.leg new file mode 100644 index 00000000..ea8bc522 --- /dev/null +++ b/src/Markdown/markdown.leg @@ -0,0 +1,781 @@ +%{ +/********************************************************************** + + markdown_parser.leg - markdown parser in C using a PEG grammar. + (c) 2008 John MacFarlane (jgm at berkeley dot edu). + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License or the MIT + license. See LICENSE for details. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + ***********************************************************************/ + +#include <stdbool.h> +#include <assert.h> +#include "markdown_peg.h" +#include "utility_functions.h" + + + +/********************************************************************** + + Definitions for leg parser generator. + YY_INPUT is the function the parser calls to get new input. + We take all new input from (static) charbuf. + + ***********************************************************************/ + + + +# define YYSTYPE element * +#ifdef __DEBUG__ +# define YY_DEBUG 1 +#endif + +#define YY_INPUT(buf, result, max_size) \ +{ \ + int yyc; \ + if (charbuf && *charbuf != '\0') { \ + yyc= *charbuf++; \ + } else { \ + yyc= EOF; \ + } \ + result= (EOF == yyc) ? 0 : (*(buf)= yyc, 1); \ +} + +#define YY_RULE(T) T + + +/********************************************************************** + + PEG grammar and parser actions for markdown syntax. + + ***********************************************************************/ + +%} + +Doc = BOM? a:StartList ( Block { a = cons($$, a); } )* + { parse_result = reverse(a); } + +Block = BlankLine* + ( BlockQuote + | Verbatim + | Note + | Reference + | HorizontalRule + | Heading + | OrderedList + | BulletList + | HtmlBlock + | StyleBlock + | Para + | Plain ) + +Para = NonindentSpace a:Inlines BlankLine+ + { $$ = a; $$->key = PARA; } + +Plain = a:Inlines + { $$ = a; $$->key = PLAIN; } + +AtxInline = !Newline !(Sp '#'* Sp Newline) Inline + +AtxStart = < ( "######" | "#####" | "####" | "###" | "##" | "#" ) > + { $$ = mk_element(H1 + (strlen(yytext) - 1)); } + +AtxHeading = s:AtxStart Sp a:StartList ( AtxInline { a = cons($$, a); } )+ (Sp '#'* Sp)? Newline + { $$ = mk_list(s->key, a); + free(s); } + +SetextHeading = SetextHeading1 | SetextHeading2 + +SetextBottom1 = '='+ Newline + +SetextBottom2 = '-'+ Newline + +SetextHeading1 = &(RawLine SetextBottom1) + a:StartList ( !Endline Inline { a = cons($$, a); } )+ Sp Newline + SetextBottom1 { $$ = mk_list(H1, a); } + +SetextHeading2 = &(RawLine SetextBottom2) + a:StartList ( !Endline Inline { a = cons($$, a); } )+ Sp Newline + SetextBottom2 { $$ = mk_list(H2, a); } + +Heading = SetextHeading | AtxHeading + +BlockQuote = a:BlockQuoteRaw + { $$ = mk_element(BLOCKQUOTE); + $$->children = a; + } + +BlockQuoteRaw = a:StartList + (( '>' ' '? Line { a = cons($$, a); } ) + ( !'>' !BlankLine Line { a = cons($$, a); } )* + ( BlankLine { a = cons(mk_str("\n"), a); } )* + )+ + { $$ = mk_str_from_list(a, true); + $$->key = RAW; + } + +NonblankIndentedLine = !BlankLine IndentedLine + +VerbatimChunk = a:StartList + ( BlankLine { a = cons(mk_str("\n"), a); } )* + ( NonblankIndentedLine { a = cons($$, a); } )+ + { $$ = mk_str_from_list(a, false); } + +Verbatim = a:StartList ( VerbatimChunk { a = cons($$, a); } )+ + { $$ = mk_str_from_list(a, false); + $$->key = VERBATIM; } + +HorizontalRule = NonindentSpace + ( '*' Sp '*' Sp '*' (Sp '*')* + | '-' Sp '-' Sp '-' (Sp '-')* + | '_' Sp '_' Sp '_' (Sp '_')*) + Sp Newline BlankLine+ + { $$ = mk_element(HRULE); } + +Bullet = !HorizontalRule NonindentSpace ('+' | '*' | '-') Spacechar+ + +BulletList = &Bullet (ListTight | ListLoose) + { $$->key = BULLETLIST; } + +ListTight = a:StartList + ( ListItemTight { a = cons($$, a); } )+ + BlankLine* !(Bullet | Enumerator) + { $$ = mk_list(LIST, a); } + +ListLoose = a:StartList + ( b:ListItem BlankLine* + { element *li; + li = b->children; + li->contents.str = realloc(li->contents.str, strlen(li->contents.str) + 3); + strcat(li->contents.str, "\n\n"); /* In loose list, \n\n added to end of each element */ + a = cons(b, a); + } )+ + { $$ = mk_list(LIST, a); } + +ListItem = ( Bullet | Enumerator ) + a:StartList + ListBlock { a = cons($$, a); } + ( ListContinuationBlock { a = cons($$, a); } )* + { element *raw; + raw = mk_str_from_list(a, false); + raw->key = RAW; + $$ = mk_element(LISTITEM); + $$->children = raw; + } + +ListItemTight = + ( Bullet | Enumerator ) + a:StartList + ListBlock { a = cons($$, a); } + ( !BlankLine + ListContinuationBlock { a = cons($$, a); } )* + !ListContinuationBlock + { element *raw; + raw = mk_str_from_list(a, false); + raw->key = RAW; + $$ = mk_element(LISTITEM); + $$->children = raw; + } + +ListBlock = a:StartList + !BlankLine Line { a = cons($$, a); } + ( ListBlockLine { a = cons($$, a); } )* + { $$ = mk_str_from_list(a, false); } + +ListContinuationBlock = a:StartList + ( < BlankLine* > + { if (strlen(yytext) == 0) + a = cons(mk_str("\001"), a); /* block separator */ + else + a = cons(mk_str(yytext), a); } ) + ( Indent ListBlock { a = cons($$, a); } )+ + { $$ = mk_str_from_list(a, false); } + +Enumerator = NonindentSpace [0-9]+ '.' Spacechar+ + +OrderedList = &Enumerator (ListTight | ListLoose) + { $$->key = ORDEREDLIST; } + +ListBlockLine = !BlankLine + !( Indent? (Bullet | Enumerator) ) + !HorizontalRule + OptionallyIndentedLine + +# Parsers for different kinds of block-level HTML content. +# This is repetitive due to constraints of PEG grammar. + +HtmlBlockOpenAddress = '<' Spnl ("address" | "ADDRESS") Spnl HtmlAttribute* '>' +HtmlBlockCloseAddress = '<' Spnl '/' ("address" | "ADDRESS") Spnl '>' +HtmlBlockAddress = HtmlBlockOpenAddress (HtmlBlockAddress | !HtmlBlockCloseAddress .)* HtmlBlockCloseAddress + +HtmlBlockOpenBlockquote = '<' Spnl ("blockquote" | "BLOCKQUOTE") Spnl HtmlAttribute* '>' +HtmlBlockCloseBlockquote = '<' Spnl '/' ("blockquote" | "BLOCKQUOTE") Spnl '>' +HtmlBlockBlockquote = HtmlBlockOpenBlockquote (HtmlBlockBlockquote | !HtmlBlockCloseBlockquote .)* HtmlBlockCloseBlockquote + +HtmlBlockOpenCenter = '<' Spnl ("center" | "CENTER") Spnl HtmlAttribute* '>' +HtmlBlockCloseCenter = '<' Spnl '/' ("center" | "CENTER") Spnl '>' +HtmlBlockCenter = HtmlBlockOpenCenter (HtmlBlockCenter | !HtmlBlockCloseCenter .)* HtmlBlockCloseCenter + +HtmlBlockOpenDir = '<' Spnl ("dir" | "DIR") Spnl HtmlAttribute* '>' +HtmlBlockCloseDir = '<' Spnl '/' ("dir" | "DIR") Spnl '>' +HtmlBlockDir = HtmlBlockOpenDir (HtmlBlockDir | !HtmlBlockCloseDir .)* HtmlBlockCloseDir + +HtmlBlockOpenDiv = '<' Spnl ("div" | "DIV") Spnl HtmlAttribute* '>' +HtmlBlockCloseDiv = '<' Spnl '/' ("div" | "DIV") Spnl '>' +HtmlBlockDiv = HtmlBlockOpenDiv (HtmlBlockDiv | !HtmlBlockCloseDiv .)* HtmlBlockCloseDiv + +HtmlBlockOpenDl = '<' Spnl ("dl" | "DL") Spnl HtmlAttribute* '>' +HtmlBlockCloseDl = '<' Spnl '/' ("dl" | "DL") Spnl '>' +HtmlBlockDl = HtmlBlockOpenDl (HtmlBlockDl | !HtmlBlockCloseDl .)* HtmlBlockCloseDl + +HtmlBlockOpenFieldset = '<' Spnl ("fieldset" | "FIELDSET") Spnl HtmlAttribute* '>' +HtmlBlockCloseFieldset = '<' Spnl '/' ("fieldset" | "FIELDSET") Spnl '>' +HtmlBlockFieldset = HtmlBlockOpenFieldset (HtmlBlockFieldset | !HtmlBlockCloseFieldset .)* HtmlBlockCloseFieldset + +HtmlBlockOpenForm = '<' Spnl ("form" | "FORM") Spnl HtmlAttribute* '>' +HtmlBlockCloseForm = '<' Spnl '/' ("form" | "FORM") Spnl '>' +HtmlBlockForm = HtmlBlockOpenForm (HtmlBlockForm | !HtmlBlockCloseForm .)* HtmlBlockCloseForm + +HtmlBlockOpenH1 = '<' Spnl ("h1" | "H1") Spnl HtmlAttribute* '>' +HtmlBlockCloseH1 = '<' Spnl '/' ("h1" | "H1") Spnl '>' +HtmlBlockH1 = HtmlBlockOpenH1 (HtmlBlockH1 | !HtmlBlockCloseH1 .)* HtmlBlockCloseH1 + +HtmlBlockOpenH2 = '<' Spnl ("h2" | "H2") Spnl HtmlAttribute* '>' +HtmlBlockCloseH2 = '<' Spnl '/' ("h2" | "H2") Spnl '>' +HtmlBlockH2 = HtmlBlockOpenH2 (HtmlBlockH2 | !HtmlBlockCloseH2 .)* HtmlBlockCloseH2 + +HtmlBlockOpenH3 = '<' Spnl ("h3" | "H3") Spnl HtmlAttribute* '>' +HtmlBlockCloseH3 = '<' Spnl '/' ("h3" | "H3") Spnl '>' +HtmlBlockH3 = HtmlBlockOpenH3 (HtmlBlockH3 | !HtmlBlockCloseH3 .)* HtmlBlockCloseH3 + +HtmlBlockOpenH4 = '<' Spnl ("h4" | "H4") Spnl HtmlAttribute* '>' +HtmlBlockCloseH4 = '<' Spnl '/' ("h4" | "H4") Spnl '>' +HtmlBlockH4 = HtmlBlockOpenH4 (HtmlBlockH4 | !HtmlBlockCloseH4 .)* HtmlBlockCloseH4 + +HtmlBlockOpenH5 = '<' Spnl ("h5" | "H5") Spnl HtmlAttribute* '>' +HtmlBlockCloseH5 = '<' Spnl '/' ("h5" | "H5") Spnl '>' +HtmlBlockH5 = HtmlBlockOpenH5 (HtmlBlockH5 | !HtmlBlockCloseH5 .)* HtmlBlockCloseH5 + +HtmlBlockOpenH6 = '<' Spnl ("h6" | "H6") Spnl HtmlAttribute* '>' +HtmlBlockCloseH6 = '<' Spnl '/' ("h6" | "H6") Spnl '>' +HtmlBlockH6 = HtmlBlockOpenH6 (HtmlBlockH6 | !HtmlBlockCloseH6 .)* HtmlBlockCloseH6 + +HtmlBlockOpenMenu = '<' Spnl ("menu" | "MENU") Spnl HtmlAttribute* '>' +HtmlBlockCloseMenu = '<' Spnl '/' ("menu" | "MENU") Spnl '>' +HtmlBlockMenu = HtmlBlockOpenMenu (HtmlBlockMenu | !HtmlBlockCloseMenu .)* HtmlBlockCloseMenu + +HtmlBlockOpenNoframes = '<' Spnl ("noframes" | "NOFRAMES") Spnl HtmlAttribute* '>' +HtmlBlockCloseNoframes = '<' Spnl '/' ("noframes" | "NOFRAMES") Spnl '>' +HtmlBlockNoframes = HtmlBlockOpenNoframes (HtmlBlockNoframes | !HtmlBlockCloseNoframes .)* HtmlBlockCloseNoframes + +HtmlBlockOpenNoscript = '<' Spnl ("noscript" | "NOSCRIPT") Spnl HtmlAttribute* '>' +HtmlBlockCloseNoscript = '<' Spnl '/' ("noscript" | "NOSCRIPT") Spnl '>' +HtmlBlockNoscript = HtmlBlockOpenNoscript (HtmlBlockNoscript | !HtmlBlockCloseNoscript .)* HtmlBlockCloseNoscript + +HtmlBlockOpenOl = '<' Spnl ("ol" | "OL") Spnl HtmlAttribute* '>' +HtmlBlockCloseOl = '<' Spnl '/' ("ol" | "OL") Spnl '>' +HtmlBlockOl = HtmlBlockOpenOl (HtmlBlockOl | !HtmlBlockCloseOl .)* HtmlBlockCloseOl + +HtmlBlockOpenP = '<' Spnl ("p" | "P") Spnl HtmlAttribute* '>' +HtmlBlockCloseP = '<' Spnl '/' ("p" | "P") Spnl '>' +HtmlBlockP = HtmlBlockOpenP (HtmlBlockP | !HtmlBlockCloseP .)* HtmlBlockCloseP + +HtmlBlockOpenPre = '<' Spnl ("pre" | "PRE") Spnl HtmlAttribute* '>' +HtmlBlockClosePre = '<' Spnl '/' ("pre" | "PRE") Spnl '>' +HtmlBlockPre = HtmlBlockOpenPre (HtmlBlockPre | !HtmlBlockClosePre .)* HtmlBlockClosePre + +HtmlBlockOpenTable = '<' Spnl ("table" | "TABLE") Spnl HtmlAttribute* '>' +HtmlBlockCloseTable = '<' Spnl '/' ("table" | "TABLE") Spnl '>' +HtmlBlockTable = HtmlBlockOpenTable (HtmlBlockTable | !HtmlBlockCloseTable .)* HtmlBlockCloseTable + +HtmlBlockOpenUl = '<' Spnl ("ul" | "UL") Spnl HtmlAttribute* '>' +HtmlBlockCloseUl = '<' Spnl '/' ("ul" | "UL") Spnl '>' +HtmlBlockUl = HtmlBlockOpenUl (HtmlBlockUl | !HtmlBlockCloseUl .)* HtmlBlockCloseUl + +HtmlBlockOpenDd = '<' Spnl ("dd" | "DD") Spnl HtmlAttribute* '>' +HtmlBlockCloseDd = '<' Spnl '/' ("dd" | "DD") Spnl '>' +HtmlBlockDd = HtmlBlockOpenDd (HtmlBlockDd | !HtmlBlockCloseDd .)* HtmlBlockCloseDd + +HtmlBlockOpenDt = '<' Spnl ("dt" | "DT") Spnl HtmlAttribute* '>' +HtmlBlockCloseDt = '<' Spnl '/' ("dt" | "DT") Spnl '>' +HtmlBlockDt = HtmlBlockOpenDt (HtmlBlockDt | !HtmlBlockCloseDt .)* HtmlBlockCloseDt + +HtmlBlockOpenFrameset = '<' Spnl ("frameset" | "FRAMESET") Spnl HtmlAttribute* '>' +HtmlBlockCloseFrameset = '<' Spnl '/' ("frameset" | "FRAMESET") Spnl '>' +HtmlBlockFrameset = HtmlBlockOpenFrameset (HtmlBlockFrameset | !HtmlBlockCloseFrameset .)* HtmlBlockCloseFrameset + +HtmlBlockOpenLi = '<' Spnl ("li" | "LI") Spnl HtmlAttribute* '>' +HtmlBlockCloseLi = '<' Spnl '/' ("li" | "LI") Spnl '>' +HtmlBlockLi = HtmlBlockOpenLi (HtmlBlockLi | !HtmlBlockCloseLi .)* HtmlBlockCloseLi + +HtmlBlockOpenTbody = '<' Spnl ("tbody" | "TBODY") Spnl HtmlAttribute* '>' +HtmlBlockCloseTbody = '<' Spnl '/' ("tbody" | "TBODY") Spnl '>' +HtmlBlockTbody = HtmlBlockOpenTbody (HtmlBlockTbody | !HtmlBlockCloseTbody .)* HtmlBlockCloseTbody + +HtmlBlockOpenTd = '<' Spnl ("td" | "TD") Spnl HtmlAttribute* '>' +HtmlBlockCloseTd = '<' Spnl '/' ("td" | "TD") Spnl '>' +HtmlBlockTd = HtmlBlockOpenTd (HtmlBlockTd | !HtmlBlockCloseTd .)* HtmlBlockCloseTd + +HtmlBlockOpenTfoot = '<' Spnl ("tfoot" | "TFOOT") Spnl HtmlAttribute* '>' +HtmlBlockCloseTfoot = '<' Spnl '/' ("tfoot" | "TFOOT") Spnl '>' +HtmlBlockTfoot = HtmlBlockOpenTfoot (HtmlBlockTfoot | !HtmlBlockCloseTfoot .)* HtmlBlockCloseTfoot + +HtmlBlockOpenTh = '<' Spnl ("th" | "TH") Spnl HtmlAttribute* '>' +HtmlBlockCloseTh = '<' Spnl '/' ("th" | "TH") Spnl '>' +HtmlBlockTh = HtmlBlockOpenTh (HtmlBlockTh | !HtmlBlockCloseTh .)* HtmlBlockCloseTh + +HtmlBlockOpenThead = '<' Spnl ("thead" | "THEAD") Spnl HtmlAttribute* '>' +HtmlBlockCloseThead = '<' Spnl '/' ("thead" | "THEAD") Spnl '>' +HtmlBlockThead = HtmlBlockOpenThead (HtmlBlockThead | !HtmlBlockCloseThead .)* HtmlBlockCloseThead + +HtmlBlockOpenTr = '<' Spnl ("tr" | "TR") Spnl HtmlAttribute* '>' +HtmlBlockCloseTr = '<' Spnl '/' ("tr" | "TR") Spnl '>' +HtmlBlockTr = HtmlBlockOpenTr (HtmlBlockTr | !HtmlBlockCloseTr .)* HtmlBlockCloseTr + +HtmlBlockOpenScript = '<' Spnl ("script" | "SCRIPT") Spnl HtmlAttribute* '>' +HtmlBlockCloseScript = '<' Spnl '/' ("script" | "SCRIPT") Spnl '>' +HtmlBlockScript = HtmlBlockOpenScript (!HtmlBlockCloseScript .)* HtmlBlockCloseScript + +HtmlBlockOpenHead = '<' Spnl ("head" | "HEAD") Spnl HtmlAttribute* '>' +HtmlBlockCloseHead = '<' Spnl '/' ("head" | "HEAD") Spnl '>' +HtmlBlockHead = HtmlBlockOpenHead (!HtmlBlockCloseHead .)* HtmlBlockCloseHead + +HtmlBlockInTags = HtmlBlockAddress + | HtmlBlockBlockquote + | HtmlBlockCenter + | HtmlBlockDir + | HtmlBlockDiv + | HtmlBlockDl + | HtmlBlockFieldset + | HtmlBlockForm + | HtmlBlockH1 + | HtmlBlockH2 + | HtmlBlockH3 + | HtmlBlockH4 + | HtmlBlockH5 + | HtmlBlockH6 + | HtmlBlockMenu + | HtmlBlockNoframes + | HtmlBlockNoscript + | HtmlBlockOl + | HtmlBlockP + | HtmlBlockPre + | HtmlBlockTable + | HtmlBlockUl + | HtmlBlockDd + | HtmlBlockDt + | HtmlBlockFrameset + | HtmlBlockLi + | HtmlBlockTbody + | HtmlBlockTd + | HtmlBlockTfoot + | HtmlBlockTh + | HtmlBlockThead + | HtmlBlockTr + | HtmlBlockScript + | HtmlBlockHead + +HtmlBlock = < ( HtmlBlockInTags | HtmlComment | HtmlBlockSelfClosing ) > + BlankLine+ + { if (extension(EXT_FILTER_HTML)) { + $$ = mk_list(LIST, NULL); + } else { + $$ = mk_str(yytext); + $$->key = HTMLBLOCK; + } + } + +HtmlBlockSelfClosing = '<' Spnl HtmlBlockType Spnl HtmlAttribute* '/' Spnl '>' + +HtmlBlockType = "address" | "blockquote" | "center" | "dir" | "div" | "dl" | "fieldset" | "form" | "h1" | "h2" | "h3" | + "h4" | "h5" | "h6" | "hr" | "isindex" | "menu" | "noframes" | "noscript" | "ol" | "p" | "pre" | "table" | + "ul" | "dd" | "dt" | "frameset" | "li" | "tbody" | "td" | "tfoot" | "th" | "thead" | "tr" | "script" | + "ADDRESS" | "BLOCKQUOTE" | "CENTER" | "DIR" | "DIV" | "DL" | "FIELDSET" | "FORM" | "H1" | "H2" | "H3" | + "H4" | "H5" | "H6" | "HR" | "ISINDEX" | "MENU" | "NOFRAMES" | "NOSCRIPT" | "OL" | "P" | "PRE" | "TABLE" | + "UL" | "DD" | "DT" | "FRAMESET" | "LI" | "TBODY" | "TD" | "TFOOT" | "TH" | "THEAD" | "TR" | "SCRIPT" + +StyleOpen = '<' Spnl ("style" | "STYLE") Spnl HtmlAttribute* '>' +StyleClose = '<' Spnl '/' ("style" | "STYLE") Spnl '>' +InStyleTags = StyleOpen (!StyleClose .)* StyleClose +StyleBlock = < InStyleTags > + BlankLine* + { if (extension(EXT_FILTER_STYLES)) { + $$ = mk_list(LIST, NULL); + } else { + $$ = mk_str(yytext); + $$->key = HTMLBLOCK; + } + } + +Inlines = a:StartList ( !Endline Inline { a = cons($$, a); } + | c:Endline &Inline { a = cons(c, a); } )+ Endline? + { $$ = mk_list(LIST, a); } + +Inline = Str + | Endline + | UlOrStarLine + | Space + | Strong + | Emph + | Strike + | Image + | Link + | NoteReference + | InlineNote + | Code + | RawHtml + | Entity + | EscapedChar + | Smart + | Symbol + +Space = Spacechar+ + { $$ = mk_str(" "); + $$->key = SPACE; } + +Str = a:StartList < NormalChar+ > { a = cons(mk_str(yytext), a); } + ( StrChunk { a = cons($$, a); } )* + { if (a->next == NULL) { $$ = a; } else { $$ = mk_list(LIST, a); } } + +StrChunk = < (NormalChar | '_'+ &Alphanumeric)+ > { $$ = mk_str(yytext); } | + AposChunk + +AposChunk = &{ extension(EXT_SMART) } '\'' &Alphanumeric + { $$ = mk_element(APOSTROPHE); } + +EscapedChar = '\\' !Newline < [-\\`|*_{}[\]()#+.!><] > + { $$ = mk_str(yytext); } + +Entity = ( HexEntity | DecEntity | CharEntity ) + { $$ = mk_str(yytext); $$->key = HTML; } + +Endline = LineBreak | TerminalEndline | NormalEndline + +NormalEndline = Sp Newline !BlankLine !'>' !AtxStart + !(Line ('='+ | '-'+) Newline) + { $$ = mk_str("\n"); + $$->key = SPACE; } + +TerminalEndline = Sp Newline Eof + { $$ = NULL; } + +LineBreak = " " NormalEndline + { $$ = mk_element(LINEBREAK); } + +Symbol = < SpecialChar > + { $$ = mk_str(yytext); } + +# This keeps the parser from getting bogged down on long strings of '*' or '_', +# or strings of '*' or '_' with space on each side: +UlOrStarLine = (UlLine | StarLine) { $$ = mk_str(yytext); } +StarLine = < "****" '*'* > | < Spacechar '*'+ &Spacechar > +UlLine = < "____" '_'* > | < Spacechar '_'+ &Spacechar > + +Emph = EmphStar | EmphUl + +Whitespace = Spacechar | Newline + +EmphStar = '*' !Whitespace + a:StartList + ( !'*' b:Inline { a = cons(b, a); } + | b:StrongStar { a = cons(b, a); } + )+ + '*' + { $$ = mk_list(EMPH, a); } + +EmphUl = '_' !Whitespace + a:StartList + ( !'_' b:Inline { a = cons(b, a); } + | b:StrongUl { a = cons(b, a); } + )+ + '_' + { $$ = mk_list(EMPH, a); } + +Strong = StrongStar | StrongUl + +StrongStar = "**" !Whitespace + a:StartList + ( !"**" b:Inline { a = cons(b, a); })+ + "**" + { $$ = mk_list(STRONG, a); } + +StrongUl = "__" !Whitespace + a:StartList + ( !"__" b:Inline { a = cons(b, a); })+ + "__" + { $$ = mk_list(STRONG, a); } + +Strike = &{ extension(EXT_STRIKE) } + "~~" !Whitespace + a:StartList + ( !"~~" b:Inline { a = cons(b, a); })+ + "~~" + { $$ = mk_list(STRIKE, a); } + +Image = '!' ( ExplicitLink | ReferenceLink ) + { if ($$->key == LINK) { + $$->key = IMAGE; + } else { + element *result; + result = $$; + $$->children = cons(mk_str("!"), result->children); + } } + +Link = ExplicitLink | ReferenceLink | AutoLink + +ReferenceLink = ReferenceLinkDouble | ReferenceLinkSingle + +ReferenceLinkDouble = a:Label < Spnl > !"[]" b:Label + { link match; + if (find_reference(&match, b->children)) { + $$ = mk_link(a->children, match.url, match.title); + free(a); + free_element_list(b); + } else { + element *result; + result = mk_element(LIST); + result->children = cons(mk_str("["), cons(a, cons(mk_str("]"), cons(mk_str(yytext), + cons(mk_str("["), cons(b, mk_str("]"))))))); + $$ = result; + } + } + +ReferenceLinkSingle = a:Label < (Spnl "[]")? > + { link match; + if (find_reference(&match, a->children)) { + $$ = mk_link(a->children, match.url, match.title); + free(a); + } + else { + element *result; + result = mk_element(LIST); + result->children = cons(mk_str("["), cons(a, cons(mk_str("]"), mk_str(yytext)))); + $$ = result; + } + } + +ExplicitLink = l:Label '(' Sp s:Source Spnl t:Title Sp ')' + { $$ = mk_link(l->children, s->contents.str, t->contents.str); + free_element(s); + free_element(t); + free(l); } + +Source = ( '<' < SourceContents > '>' | < SourceContents > ) + { $$ = mk_str(yytext); } + +SourceContents = ( ( !'(' !')' !'>' Nonspacechar )+ | '(' SourceContents ')')* + +Title = ( TitleSingle | TitleDouble | < "" > ) + { $$ = mk_str(yytext); } + +TitleSingle = '\'' < ( !( '\'' Sp ( ')' | Newline ) ) . )* > '\'' + +TitleDouble = '"' < ( !( '"' Sp ( ')' | Newline ) ) . )* > '"' + +AutoLink = AutoLinkUrl | AutoLinkEmail + +AutoLinkUrl = '<' < [A-Za-z]+ "://" ( !Newline !'>' . )+ > '>' + { $$ = mk_link(mk_str(yytext), yytext, ""); } + +AutoLinkEmail = '<' ( "mailto:" )? < [-A-Za-z0-9+_./!%~$]+ '@' ( !Newline !'>' . )+ > '>' + { char *mailto = malloc(strlen(yytext) + 8); + sprintf(mailto, "mailto:%s", yytext); + $$ = mk_link(mk_str(yytext), mailto, ""); + free(mailto); + } + +Reference = NonindentSpace !"[]" l:Label ':' Spnl s:RefSrc t:RefTitle BlankLine+ + { $$ = mk_link(l->children, s->contents.str, t->contents.str); + free_element(s); + free_element(t); + free(l); + $$->key = REFERENCE; } + +Label = '[' ( !'^' &{ extension(EXT_NOTES) } | &. &{ !extension(EXT_NOTES) } ) + a:StartList + ( !']' Inline { a = cons($$, a); } )* + ']' + { $$ = mk_list(LIST, a); } + +RefSrc = < Nonspacechar+ > + { $$ = mk_str(yytext); + $$->key = HTML; } + +RefTitle = ( RefTitleSingle | RefTitleDouble | RefTitleParens | EmptyTitle ) + { $$ = mk_str(yytext); } + +EmptyTitle = < "" > + +RefTitleSingle = Spnl '\'' < ( !( '\'' Sp Newline | Newline ) . )* > '\'' + +RefTitleDouble = Spnl '"' < ( !('"' Sp Newline | Newline) . )* > '"' + +RefTitleParens = Spnl '(' < ( !(')' Sp Newline | Newline) . )* > ')' + +References = a:StartList + ( b:Reference { a = cons(b, a); } | SkipBlock )* + { references = reverse(a); } + +Ticks1 = "`" !'`' +Ticks2 = "``" !'`' +Ticks3 = "```" !'`' +Ticks4 = "````" !'`' +Ticks5 = "`````" !'`' + +Code = ( Ticks1 Sp < ( ( !'`' Nonspacechar )+ | !Ticks1 '`'+ | !( Sp Ticks1 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks1 + | Ticks2 Sp < ( ( !'`' Nonspacechar )+ | !Ticks2 '`'+ | !( Sp Ticks2 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks2 + | Ticks3 Sp < ( ( !'`' Nonspacechar )+ | !Ticks3 '`'+ | !( Sp Ticks3 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks3 + | Ticks4 Sp < ( ( !'`' Nonspacechar )+ | !Ticks4 '`'+ | !( Sp Ticks4 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks4 + | Ticks5 Sp < ( ( !'`' Nonspacechar )+ | !Ticks5 '`'+ | !( Sp Ticks5 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks5 + ) + { $$ = mk_str(yytext); $$->key = CODE; } + +RawHtml = < (HtmlComment | HtmlBlockScript | HtmlTag) > + { if (extension(EXT_FILTER_HTML)) { + $$ = mk_list(LIST, NULL); + } else { + $$ = mk_str(yytext); + $$->key = HTML; + } + } + +BlankLine = Sp Newline + +Quoted = '"' (!'"' .)* '"' | '\'' (!'\'' .)* '\'' +HtmlAttribute = (AlphanumericAscii | '-')+ Spnl ('=' Spnl (Quoted | (!'>' Nonspacechar)+))? Spnl +HtmlComment = "<!--" (!"-->" .)* "-->" +HtmlTag = '<' Spnl '/'? AlphanumericAscii+ Spnl HtmlAttribute* '/'? Spnl '>' +Eof = !. +Spacechar = ' ' | '\t' +Nonspacechar = !Spacechar !Newline . +Newline = '\n' | '\r' '\n'? +Sp = Spacechar* +Spnl = Sp (Newline Sp)? +SpecialChar = '~' | '*' | '_' | '`' | '&' | '[' | ']' | '(' | ')' | '<' | '!' | '#' | '\\' | '\'' | '"' | ExtendedSpecialChar +NormalChar = !( SpecialChar | Spacechar | Newline ) . +Alphanumeric = [0-9A-Za-z] | '\200' | '\201' | '\202' | '\203' | '\204' | '\205' | '\206' | '\207' | '\210' | '\211' | '\212' | '\213' | '\214' | '\215' | '\216' | '\217' | '\220' | '\221' | '\222' | '\223' | '\224' | '\225' | '\226' | '\227' | '\230' | '\231' | '\232' | '\233' | '\234' | '\235' | '\236' | '\237' | '\240' | '\241' | '\242' | '\243' | '\244' | '\245' | '\246' | '\247' | '\250' | '\251' | '\252' | '\253' | '\254' | '\255' | '\256' | '\257' | '\260' | '\261' | '\262' | '\263' | '\264' | '\265' | '\266' | '\267' | '\270' | '\271' | '\272' | '\273' | '\274' | '\275' | '\276' | '\277' | '\300' | '\301' | '\302' | '\303' | '\304' | '\305' | '\306' | '\307' | '\310' | '\311' | '\312' | '\313' | '\314' | '\315' | '\316' | '\317' | '\320' | '\321' | '\322' | '\323' | '\324' | '\325' | '\326' | '\327' | '\330' | '\331' | '\332' | '\333' | '\334' | '\335' | '\336' | '\337' | '\340' | '\341' | '\342' | '\343' | '\344' | '\345' | '\346' | '\347' | '\350' | '\351' | '\352' | '\353' | '\354' | '\355' | '\356' | '\357' | '\360' | '\361' | '\362' | '\363' | '\364' | '\365' | '\366' | '\367' | '\370' | '\371' | '\372' | '\373' | '\374' | '\375' | '\376' | '\377' +AlphanumericAscii = [A-Za-z0-9] +Digit = [0-9] +BOM = "\357\273\277" + +HexEntity = < '&' '#' [Xx] [0-9a-fA-F]+ ';' > +DecEntity = < '&' '#' [0-9]+ > ';' > +CharEntity = < '&' [A-Za-z0-9]+ ';' > + +NonindentSpace = " " | " " | " " | "" +Indent = "\t" | " " +IndentedLine = Indent Line +OptionallyIndentedLine = Indent? Line + +# StartList starts a list data structure that can be added to with cons: +StartList = &. + { $$ = NULL; } + +Line = RawLine + { $$ = mk_str(yytext); } +RawLine = ( < (!'\r' !'\n' .)* Newline > | < .+ > Eof ) + +SkipBlock = HtmlBlock + | ( !'#' !SetextBottom1 !SetextBottom2 !BlankLine RawLine )+ BlankLine* + | BlankLine+ + | RawLine + +# Syntax extensions + +ExtendedSpecialChar = &{ extension(EXT_SMART) } ('.' | '-' | '\'' | '"') + | &{ extension(EXT_NOTES) } ( '^' ) + +Smart = &{ extension(EXT_SMART) } + ( Ellipsis | Dash | SingleQuoted | DoubleQuoted | Apostrophe ) + +Apostrophe = '\'' + { $$ = mk_element(APOSTROPHE); } + +Ellipsis = ("..." | ". . .") + { $$ = mk_element(ELLIPSIS); } + +Dash = EmDash | EnDash + +EnDash = '-' &Digit + { $$ = mk_element(ENDASH); } + +EmDash = ("---" | "--") + { $$ = mk_element(EMDASH); } + +SingleQuoteStart = '\'' !(Spacechar | Newline) + +SingleQuoteEnd = '\'' !Alphanumeric + +SingleQuoted = SingleQuoteStart + a:StartList + ( !SingleQuoteEnd b:Inline { a = cons(b, a); } )+ + SingleQuoteEnd + { $$ = mk_list(SINGLEQUOTED, a); } + +DoubleQuoteStart = '"' + +DoubleQuoteEnd = '"' + +DoubleQuoted = DoubleQuoteStart + a:StartList + ( !DoubleQuoteEnd b:Inline { a = cons(b, a); } )+ + DoubleQuoteEnd + { $$ = mk_list(DOUBLEQUOTED, a); } + +NoteReference = &{ extension(EXT_NOTES) } + ref:RawNoteReference + { element *match; + if (find_note(&match, ref->contents.str)) { + $$ = mk_element(NOTE); + assert(match->children != NULL); + $$->children = match->children; + $$->contents.str = 0; + } else { + char *s; + s = malloc(strlen(ref->contents.str) + 4); + sprintf(s, "[^%s]", ref->contents.str); + $$ = mk_str(s); + free(s); + } + } + +RawNoteReference = "[^" < ( !Newline !']' . )+ > ']' + { $$ = mk_str(yytext); } + +Note = &{ extension(EXT_NOTES) } + NonindentSpace ref:RawNoteReference ':' Sp + a:StartList + ( RawNoteBlock { a = cons($$, a); } ) + ( &Indent RawNoteBlock { a = cons($$, a); } )* + { $$ = mk_list(NOTE, a); + $$->contents.str = strdup(ref->contents.str); + } + +InlineNote = &{ extension(EXT_NOTES) } + "^[" + a:StartList + ( !']' Inline { a = cons($$, a); } )+ + ']' + { $$ = mk_list(NOTE, a); + $$->contents.str = 0; } + +Notes = a:StartList + ( b:Note { a = cons(b, a); } | SkipBlock )* + { notes = reverse(a); } + +RawNoteBlock = a:StartList + ( !BlankLine OptionallyIndentedLine { a = cons($$, a); } )+ + ( < BlankLine* > { a = cons(mk_str(yytext), a); } ) + { $$ = mk_str_from_list(a, true); + $$->key = RAW; + } + +%% + |