diff options
Diffstat (limited to 'filters')
| -rwxr-xr-x | filters/about-formatting.sh | 27 | ||||
| -rwxr-xr-x | filters/html-converters/man2html | 5 | ||||
| -rwxr-xr-x | filters/html-converters/md2html | 2 | ||||
| -rwxr-xr-x | filters/html-converters/resources/markdown.pl | 1731 | ||||
| -rw-r--r-- | filters/html-converters/resources/rst-template.txt | 4 | ||||
| -rwxr-xr-x | filters/html-converters/rst2html | 2 | ||||
| -rwxr-xr-x | filters/html-converters/txt2html | 4 | ||||
| -rwxr-xr-x | filters/syntax-highlighting.py | 38 |
8 files changed, 1813 insertions, 0 deletions
diff --git a/filters/about-formatting.sh b/filters/about-formatting.sh new file mode 100755 index 0000000..313a4e6 --- /dev/null +++ b/filters/about-formatting.sh @@ -0,0 +1,27 @@ +#!/bin/sh + +# This may be used with the about-filter or repo.about-filter setting in cgitrc. +# It passes formatting of about pages to differing programs, depending on the usage. + +# Markdown support requires perl. +# RestructuredText support requires python and docutils. +# Man page support requires groff. + +# The following environment variables can be used to retrieve the configuration +# of the repository for which this script is called: +# CGIT_REPO_URL ( = repo.url setting ) +# CGIT_REPO_NAME ( = repo.name setting ) +# CGIT_REPO_PATH ( = repo.path setting ) +# CGIT_REPO_OWNER ( = repo.owner setting ) +# CGIT_REPO_DEFBRANCH ( = repo.defbranch setting ) +# CGIT_REPO_SECTION ( = section setting ) +# CGIT_REPO_CLONE_URL ( = repo.clone-url setting ) + +cd "$(dirname $0)/html-converters/" +case "$(tr '[:upper:]' '[:lower:]' <<<"$1")" in + *.md|*.mkd) exec ./md2html; ;; + *.rst) exec ./rst2html; ;; + *.[1-9]) exec ./man2html; ;; + *.htm|*.html) exec cat; ;; + *.txt|*) exec ./txt2html; ;; +esac diff --git a/filters/html-converters/man2html b/filters/html-converters/man2html new file mode 100755 index 0000000..1b28437 --- /dev/null +++ b/filters/html-converters/man2html @@ -0,0 +1,5 @@ +#!/bin/sh +echo "<div style=\"font-family: monospace\">" +groff -mandoc -T html -P -r -P -l | egrep -v '(<html>|<head>|<meta|<title>|</title>|</head>|<body>|</body>|</html>|<!DOCTYPE|"http://www.w3.org)' +echo "</div>" + diff --git a/filters/html-converters/md2html b/filters/html-converters/md2html new file mode 100755 index 0000000..5cab749 --- /dev/null +++ b/filters/html-converters/md2html @@ -0,0 +1,2 @@ +#!/bin/sh +exec "$(dirname "$0")/resources/markdown.pl" diff --git a/filters/html-converters/resources/markdown.pl b/filters/html-converters/resources/markdown.pl new file mode 100755 index 0000000..abec173 --- /dev/null +++ b/filters/html-converters/resources/markdown.pl @@ -0,0 +1,1731 @@ +#!/usr/bin/perl + +# +# Markdown -- A text-to-HTML conversion tool for web writers +# +# Copyright (c) 2004 John Gruber +# <http://daringfireball.net/projects/markdown/> +# + + +package Markdown; +require 5.006_000; +use strict; +use warnings; + +use Digest::MD5 qw(md5_hex); +use vars qw($VERSION); +$VERSION = '1.0.1'; +# Tue 14 Dec 2004 + +## Disabled; causes problems under Perl 5.6.1: +use utf8; +binmode( STDOUT, ":utf8" ); # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html + + +# +# Global default settings: +# +my $g_empty_element_suffix = " />"; # Change to ">" for HTML output +my $g_tab_width = 4; + + +# +# Globals: +# + +# Regex to match balanced [brackets]. See Friedl's +# "Mastering Regular Expressions", 2nd Ed., pp. 328-331. +my $g_nested_brackets; +$g_nested_brackets = qr{ + (?> # Atomic matching + [^\[\]]+ # Anything other than brackets + | + \[ + (??{ $g_nested_brackets }) # Recursive set of nested brackets + \] + )* +}x; + + +# Table of hash values for escaped characters: +my %g_escape_table; +foreach my $char (split //, '\\`*_{}[]()>#+-.!') { + $g_escape_table{$char} = md5_hex($char); +} + + +# Global hashes, used by various utility routines +my %g_urls; +my %g_titles; +my %g_html_blocks; + +# Used to track when we're inside an ordered or unordered list +# (see _ProcessListItems() for details): +my $g_list_level = 0; + + +#### Blosxom plug-in interface ########################################## + +# Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine +# which posts Markdown should process, using a "meta-markup: markdown" +# header. If it's set to 0 (the default), Markdown will process all +# entries. +my $g_blosxom_use_meta = 0; + +sub start { 1; } +sub story { + my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_; + + if ( (! $g_blosxom_use_meta) or + (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i)) + ){ + $$body_ref = Markdown($$body_ref); + } + 1; +} + + +#### Movable Type plug-in interface ##################################### +eval {require MT}; # Test to see if we're running in MT. +unless ($@) { + require MT; + import MT; + require MT::Template::Context; + import MT::Template::Context; + + eval {require MT::Plugin}; # Test to see if we're running >= MT 3.0. + unless ($@) { + require MT::Plugin; + import MT::Plugin; + my $plugin = new MT::Plugin({ + name => "Markdown", + description => "A plain-text-to-HTML formatting plugin. (Version: $VERSION)", + doc_link => 'http://daringfireball.net/projects/markdown/' + }); + MT->add_plugin( $plugin ); + } + + MT::Template::Context->add_container_tag(MarkdownOptions => sub { + my $ctx = shift; + my $args = shift; + my $builder = $ctx->stash('builder'); + my $tokens = $ctx->stash('tokens'); + + if (defined ($args->{'output'}) ) { + $ctx->stash('markdown_output', lc $args->{'output'}); + } + + defined (my $str = $builder->build($ctx, $tokens) ) + or return $ctx->error($builder->errstr); + $str; # return value + }); + + MT->add_text_filter('markdown' => { + label => 'Markdown', + docs => 'http://daringfireball.net/projects/markdown/', + on_format => sub { + my $text = shift; + my $ctx = shift; + my $raw = 0; + if (defined $ctx) { + my $output = $ctx->stash('markdown_output'); + if (defined $output && $output =~ m/^html/i) { + $g_empty_element_suffix = ">"; + $ctx->stash('markdown_output', ''); + } + elsif (defined $output && $output eq 'raw') { + $raw = 1; + $ctx->stash('markdown_output', ''); + } + else { + $raw = 0; + $g_empty_element_suffix = " />"; + } + } + $text = $raw ? $text : Markdown($text); + $text; + }, + }); + + # If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter: + my $smartypants; + + { + no warnings "once"; + $smartypants = $MT::Template::Context::Global_filters{'smarty_pants'}; + } + + if ($smartypants) { + MT->add_text_filter('markdown_with_smartypants' => { + label => 'Markdown With SmartyPants', + docs => 'http://daringfireball.net/projects/markdown/', + on_format => sub { + my $text = shift; + my $ctx = shift; + if (defined $ctx) { + my $output = $ctx->stash('markdown_output'); + if (defined $output && $output eq 'html') { + $g_empty_element_suffix = ">"; + } + else { + $g_empty_element_suffix = " />"; + } + } + $text = Markdown($text); + $text = $smartypants->($text, '1'); + }, + }); + } +} +else { +#### BBEdit/command-line text filter interface ########################## +# Needs to be hidden from MT (and Blosxom when running in static mode). + + # We're only using $blosxom::version once; tell Perl not to warn us: + no warnings 'once'; + unless ( defined($blosxom::version) ) { + use warnings; + + #### Check for command-line switches: ################# + my %cli_opts; + use Getopt::Long; + Getopt::Long::Configure('pass_through'); + GetOptions(\%cli_opts, + 'version', + 'shortversion', + 'html4tags', + ); + if ($cli_opts{'version'}) { # Version info + print "\nThis is Markdown, version $VERSION.\n"; + print "Copyright 2004 John Gruber\n"; + print "http://daringfireball.net/projects/markdown/\n\n"; + exit 0; + } + if ($cli_opts{'shortversion'}) { # Just the version number string. + print $VERSION; + exit 0; + } + if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML + $g_empty_element_suffix = ">"; + } + + + #### Process incoming text: ########################### + my $text; + { + local $/; # Slurp the whole file + $text = <>; + } + print <<'EOT'; +<style> +.markdown-body { + font-size: 14px; + line-height: 1.6; + overflow: hidden; +} +.markdown-body>*:first-child { + margin-top: 0 !important; +} +.markdown-body>*:last-child { + margin-bottom: 0 !important; +} +.markdown-body a.absent { + color: #c00; +} +.markdown-body a.anchor { + display: block; + padding-left: 30px; + margin-left: -30px; + cursor: pointer; + position: absolute; + top: 0; + left: 0; + bottom: 0; +} +.markdown-body h1, .markdown-body h2, .markdown-body h3, .markdown-body h4, .markdown-body h5, .markdown-body h6 { + margin: 20px 0 10px; + padding: 0; + font-weight: bold; + -webkit-font-smoothing: antialiased; + cursor: text; + position: relative; +} +.markdown-body h1 .mini-icon-link, .markdown-body h2 .mini-icon-link, .markdown-body h3 .mini-icon-link, .markdown-body h4 .mini-icon-link, .markdown-body h5 .mini-icon-link, .markdown-body h6 .mini-icon-link { + display: none; + color: #000; +} +.markdown-body h1:hover a.anchor, .markdown-body h2:hover a.anchor, .markdown-body h3:hover a.anchor, .markdown-body h4:hover a.anchor, .markdown-body h5:hover a.anchor, .markdown-body h6:hover a.anchor { + text-decoration: none; + line-height: 1; + padding-left: 0; + margin-left: -22px; + top: 15%} +.markdown-body h1:hover a.anchor .mini-icon-link, .markdown-body h2:hover a.anchor .mini-icon-link, .markdown-body h3:hover a.anchor .mini-icon-link, .markdown-body h4:hover a.anchor .mini-icon-link, .markdown-body h5:hover a.anchor .mini-icon-link, .markdown-body h6:hover a.anchor .mini-icon-link { + display: inline-block; +} +.markdown-body h1 tt, .markdown-body h1 code, .markdown-body h2 tt, .markdown-body h2 code, .markdown-body h3 tt, .markdown-body h3 code, .markdown-body h4 tt, .markdown-body h4 code, .markdown-body h5 tt, .markdown-body h5 code, .markdown-body h6 tt, .markdown-body h6 code { + font-size: inherit; +} +.markdown-body h1 { + font-size: 28px; + color: #000; +} +.markdown-body h2 { + font-size: 24px; + border-bottom: 1px solid #ccc; + color: #000; +} +.markdown-body h3 { + font-size: 18px; +} +.markdown-body h4 { + font-size: 16px; +} +.markdown-body h5 { + font-size: 14px; +} +.markdown-body h6 { + color: #777; + font-size: 14px; +} +.markdown-body p, .markdown-body blockquote, .markdown-body ul, .markdown-body ol, .markdown-body dl, .markdown-body table, .markdown-body pre { + margin: 15px 0; +} +.markdown-body hr { + background: transparent url("/dirty-shade.png") repeat-x 0 0; + border: 0 none; + color: #ccc; + height: 4px; + padding: 0; +} +.markdown-body>h2:first-child, .markdown-body>h1:first-child, .markdown-body>h1:first-child+h2, .markdown-body>h3:first-child, .markdown-body>h4:first-child, .markdown-body>h5:first-child, .markdown-body>h6:first-child { + margin-top: 0; + padding-top: 0; +} +.markdown-body a:first-child h1, .markdown-body a:first-child h2, .markdown-body a:first-child h3, .markdown-body a:first-child h4, .markdown-body a:first-child h5, .markdown-body a:first-child h6 { + margin-top: 0; + padding-top: 0; +} +.markdown-body h1+p, .markdown-body h2+p, .markdown-body h3+p, .markdown-body h4+p, .markdown-body h5+p, .markdown-body h6+p { + margin-top: 0; +} +.markdown-body li p.first { + display: inline-block; +} +.markdown-body ul, .markdown-body ol { + padding-left: 30px; +} +.markdown-body ul.no-list, .markdown-body ol.no-list { + list-style-type: none; + padding: 0; +} +.markdown-body ul li>:first-child, .markdown-body ul li ul:first-of-type, .markdown-body ul li ol:first-of-type, .markdown-body ol li>:first-child, .markdown-body ol li ul:first-of-type, .markdown-body ol li ol:first-of-type { + margin-top: 0px; +} +.markdown-body ul li p:last-of-type, .markdown-body ol li p:last-of-type { + margin-bottom: 0; +} +.markdown-body ul ul, .markdown-body ul ol, .markdown-body ol ol, .markdown-body ol ul { + margin-bottom: 0; +} +.markdown-body dl { + padding: 0; +} +.markdown-body dl dt { + font-size: 14px; + font-weight: bold; + font-style: italic; + padding: 0; + margin: 15px 0 5px; +} +.markdown-body dl dt:first-child { + padding: 0; +} +.markdown-body dl dt>:first-child { + margin-top: 0px; +} +.markdown-body dl dt>:last-child { + margin-bottom: 0px; +} +.markdown-body dl dd { + margin: 0 0 15px; + padding: 0 15px; +} +.markdown-body dl dd>:first-child { + margin-top: 0px; +} +.markdown-body dl dd>:last-child { + margin-bottom: 0px; +} +.markdown-body blockquote { + border-left: 4px solid #DDD; + padding: 0 15px; + color: #777; +} +.markdown-body blockquote>:first-child { + margin-top: 0px; +} +.markdown-body blockquote>:last-child { + margin-bottom: 0px; +} +.markdown-body table th { + font-weight: bold; +} +.markdown-body table th, .markdown-body table td { + border: 1px solid #ccc; + padding: 6px 13px; +} +.markdown-body table tr { + border-top: 1px solid #ccc; + background-color: #fff; +} +.markdown-body table tr:nth-child(2n) { + background-color: #f8f8f8; +} +.markdown-body img { + max-width: 100%; + -moz-box-sizing: border-box; + box-sizing: border-box; +} +.markdown-body span.frame { + display: block; + overflow: hidden; +} +.markdown-body span.frame>span { + border: 1px solid #ddd; + display: block; + float: left; + overflow: hidden; + margin: 13px 0 0; + padding: 7px; + width: auto; +} +.markdown-body span.frame span img { + display: block; + float: left; +} +.markdown-body span.frame span span { + clear: both; + color: #333; + display: block; + padding: 5px 0 0; +} +.markdown-body span.align-center { + display: block; + overflow: hidden; + clear: both; +} +.markdown-body span.align-center>span { + display: block; + overflow: hidden; + margin: 13px auto 0; + text-align: center; +} +.markdown-body span.align-center span img { + margin: 0 auto; + text-align: center; +} +.markdown-body span.align-right { + display: block; + overflow: hidden; + clear: both; +} +.markdown-body span.align-right>span { + display: block; + overflow: hidden; + margin: 13px 0 0; + text-align: right; +} +.markdown-body span.align-right span img { + margin: 0; + text-align: right; +} +.markdown-body span.float-left { + display: block; + margin-right: 13px; + overflow: hidden; + float: left; +} +.markdown-body span.float-left span { + margin: 13px 0 0; +} +.markdown-body span.float-right { + display: block; + margin-left: 13px; + overflow: hidden; + float: right; +} +.markdown-body span.float-right>span { + display: block; + overflow: hidden; + margin: 13px auto 0; + text-align: right; +} +.markdown-body code, .markdown-body tt { + margin: 0 2px; + padding: 0px 5px; + border: 1px solid #eaeaea; + background-color: #f8f8f8; + border-radius: 3px; +} +.markdown-body code { + white-space: nowrap; +} +.markdown-body pre>code { + margin: 0; + padding: 0; + white-space: pre; + border: none; + background: transparent; +} +.markdown-body .highlight pre, .markdown-body pre { + background-color: #f8f8f8; + border: 1px solid #ccc; + font-size: 13px; + line-height: 19px; + overflow: auto; + padding: 6px 10px; + border-radius: 3px; +} +.markdown-body pre code, .markdown-body pre tt { + margin: 0; + padding: 0; + background-color: transparent; + border: none; +} +</style> +EOT + print "<div class='markdown-body'>"; + print Markdown($text); + print "</div>"; + } +} + + + +sub Markdown { +# +# Main function. The order in which other subs are called here is +# essential. Link and image substitutions need to happen before +# _EscapeSpecialChars(), so that any *'s or _'s in the <a> +# and <img> tags get encoded. +# + my $text = shift; + + # Clear the global hashes. If we don't clear these, you get conflicts + # from other articles when generating a page which contains more than + # one article (e.g. an index page that shows the N most recent + # articles): + %g_urls = (); + %g_titles = (); + %g_html_blocks = (); + + + # Standardize line endings: + $text =~ s{\r\n}{\n}g; # DOS to Unix + $text =~ s{\r}{\n}g; # Mac to Unix + + # Make sure $text ends with a couple of newlines: + $text .= "\n\n"; + + # Convert all tabs to spaces. + $text = _Detab($text); + + # Strip any lines consisting only of spaces and tabs. + # This makes subsequent regexen easier to write, because we can + # match consecutive blank lines with /\n+/ instead of something + # contorted like /[ \t]*\n+/ . + $text =~ s/^[ \t]+$//mg; + + # Turn block-level HTML blocks into hash entries + $text = _HashHTMLBlocks($text); + + # Strip link definitions, store in hashes. + $text = _StripLinkDefinitions($text); + + $text = _RunBlockGamut($text); + + $text = _UnescapeSpecialChars($text); + + return $text . "\n"; +} + + +sub _StripLinkDefinitions { +# +# Strips link definitions from text, stores the URLs and titles in +# hash references. +# + my $text = shift; + my $less_than_tab = $g_tab_width - 1; + + # Link defs are in the form: ^[id]: url "optional title" + while ($text =~ s{ + ^[ ]{0,$less_than_tab}\[(.+)\]: # id = $1 + [ \t]* + \n? # maybe *one* newline + [ \t]* + <?(\S+?)>? # url = $2 + [ \t]* + \n? # maybe one newline + [ \t]* + (?: + (?<=\s) # lookbehind for whitespace + ["(] + (.+?) # title = $3 + [")] + [ \t]* + )? # title is optional + (?:\n+|\Z) + } + {}mx) { + $g_urls{lc $1} = _EncodeAmpsAndAngles( $2 ); # Link IDs are case-insensitive + if ($3) { + $g_titles{lc $1} = $3; + $g_titles{lc $1} =~ s/"/"/g; + } + } + + return $text; +} + + +sub _HashHTMLBlocks { + my $text = shift; + my $less_than_tab = $g_tab_width - 1; + + # Hashify HTML blocks: + # We only want to do this for block-level HTML tags, such as headers, + # lists, and tables. That's because we still want to wrap <p>s around + # "paragraphs" that are wrapped in non-block-level tags, such as anchors, + # phrase emphasis, and spans. The list of tags we're looking for is + # hard-coded: + my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/; + my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/; + + # First, look for nested blocks, e.g.: + # <div> + # <div> + # tags for inner block must be indented. + # </div> + # </div> + # + # The outermost tags must start at the left margin for this to match, and + # the inner nested divs must be indented. + # We need to do this before the next, more liberal match, because the next + # match will start at the first `<div>` and stop at the first `</div>`. + $text =~ s{ + ( # save in $1 + ^ # start of line (with /m) + <($block_tags_a) # start tag = $2 + \b # word break + (.*\n)*? # any number of lines, minimally matching + </\2> # the matching end tag + [ \t]* # trailing spaces/tabs + (?=\n+|\Z) # followed by a newline or end of document + ) + }{ + my $key = md5_hex($1); + $g_html_blocks{$key} = $1; + "\n\n" . $key . "\n\n"; + }egmx; + + + # + # Now match more liberally, simply from `\n<tag>` to `</tag>\n` + # + $text =~ s{ + ( # save in $1 + ^ # start of line (with /m) + <($block_tags_b) # start tag = $2 + \b # word break + (.*\n)*? # any number of lines, minimally matching + .*</\2> # the matching end tag + [ \t]* # trailing spaces/tabs + (?=\n+|\Z) # followed by a newline or end of document + ) + }{ + my $key = md5_hex($1); + $g_html_blocks{$key} = $1; + "\n\n" . $key . "\n\n"; + }egmx; + # Special case just for <hr />. It was easier to make a special case than + # to make the other regex more complicated. + $text =~ s{ + (?: + (?<=\n\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in $1 + [ ]{0,$less_than_tab} + <(hr) # start tag = $2 + \b # word break + ([^<>])*? # + /?> # the matching end tag + [ \t]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + ) + }{ + my $key = md5_hex($1); + $g_html_blocks{$key} = $1; + "\n\n" . $key . "\n\n"; + }egx; + + # Special case for standalone HTML comments: + $text =~ s{ + (?: + (?<=\n\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in $1 + [ ]{0,$less_than_tab} + (?s: + <! + (--.*?--\s*)+ + > + ) + [ \t]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + ) + }{ + my $key = md5_hex($1); + $g_html_blocks{$key} = $1; + "\n\n" . $key . "\n\n"; + }egx; + + + return $text; +} + + +sub _RunBlockGamut { +# +# These are all the transformations that form block-level +# tags like paragraphs, headers, and list items. +# + my $text = shift; + + $text = _DoHeaders($text); + + # Do Horizontal Rules: + $text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx; + $text =~ s{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx; + $text =~ s{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx; + + $text = _DoLists($text); + + $text = _DoCodeBlocks($text); + + $text = _DoBlockQuotes($text); + + # We already ran _HashHTMLBlocks() before, in Markdown(), but that + # was to escape raw HTML in the original Markdown source. This time, + # we're escaping the markup we've just created, so that we don't wrap + # <p> tags around block-level tags. + $text = _HashHTMLBlocks($text); + + $text = _FormParagraphs($text); + + return $text; +} + + +sub _RunSpanGamut { +# +# These are all the transformations that occur *within* block-level +# tags like paragraphs, headers, and list items. +# + my $text = shift; + + $text = _DoCodeSpans($text); + + $text = _EscapeSpecialChars($text); + + # Process anchor and image tags. Images must come first, + # because ![foo][f] looks like an anchor. + $text = _DoImages($text); + $text = _DoAnchors($text); + + # Make links out of things like `<http://example.com/>` + # Must come after _DoAnchors(), because you can use < and > + # delimiters in inline links like [this](<url>). + $text = _DoAutoLinks($text); + + $text = _EncodeAmpsAndAngles($text); + + $text = _DoItalicsAndBold($text); + + # Do hard breaks: + $text =~ s/ {2,}\n/ <br$g_empty_element_suffix\n/g; + + return $text; +} + + +sub _EscapeSpecialChars { + my $text = shift; + my $tokens ||= _TokenizeHTML($text); + + $text = ''; # rebuild $text from the tokens +# my $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags. +# my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script|math)[\s>]!; + + foreach my $cur_token (@$tokens) { + if ($cur_token->[0] eq "tag") { + # Within tags, encode * and _ so they don't conflict + # with their use in Markdown for italics and strong. + # We're replacing each such character with its + # corresponding MD5 checksum value; this is likely + # overkill, but it should prevent us from colliding + # with the escape values by accident. + $cur_token->[1] =~ s! \* !$g_escape_table{'*'}!gx; + $cur_token->[1] =~ s! _ !$g_escape_table{'_'}!gx; + $text .= $cur_token->[1]; + } else { + my $t = $cur_token->[1]; + $t = _EncodeBackslashEscapes($t); + $text .= $t; + } + } + return $text; +} + + +sub _DoAnchors { +# +# Turn Markdown link shortcuts into XHTML <a> tags. +# + my $text = shift; + + # + # First, handle reference-style links: [link text] [id] + # + $text =~ s{ + ( # wrap whole match in $1 + \[ + ($g_nested_brackets) # link text = $2 + \] + + [ ]? # one optional space + (?:\n[ ]*)? # one optional newline followed by spaces + + \[ + (.*?) # id = $3 + \] + ) + }{ + my $result; + my $whole_match = $1; + my $link_text = $2; + my $link_id = lc $3; + + if ($link_id eq "") { + $link_id = lc $link_text; # for shortcut links like [this][]. + } + + if (defined $g_urls{$link_id}) { + my $url = $g_urls{$link_id}; + $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid + $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. + $result = "<a href=\"$url\""; + if ( defined $g_titles{$link_id} ) { + my $title = $g_titles{$link_id}; + $title =~ s! \* !$g_escape_table{'*'}!gx; + $title =~ s! _ !$g_escape_table{'_'}!gx; + $result .= " title=\"$title\""; + } + $result .= ">$link_text</a>"; + } + else { + $result = $whole_match; + } + $result; + }xsge; + + # + # Next, inline-style links: [link text](url "optional title") + # + $text =~ s{ + ( # wrap whole match in $1 + \[ + ($g_nested_brackets) # link text = $2 + \] + \( # literal paren + [ \t]* + <?(.*?)>? # href = $3 + [ \t]* + ( # $4 + (['"]) # quote char = $5 + (.*?) # Title = $6 + \5 # matching quote + )? # title is optional + \) + ) + }{ + my $result; + my $whole_match = $1; + my $link_text = $2; + my $url = $3; + my $title = $6; + + $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid + $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. + $result = "<a href=\"$url\""; + + if (defined $title) { + $title =~ s/"/"/g; + $title =~ s! \* !$g_escape_table{'*'}!gx; + $title =~ s! _ !$g_escape_table{'_'}!gx; + $result .= " title=\"$title\""; + } + + $result .= ">$link_text</a>"; + + $result; + }xsge; + + return $text; +} + + +sub _DoImages { +# +# Turn Markdown image shortcuts into <img> tags. +# + my $text = shift; + + # + # First, handle reference-style labeled images: ![alt text][id] + # + $text =~ s{ + ( # wrap whole match in $1 + !\[ + (.*?) # alt text = $2 + \] + + [ ]? # one optional space + (?:\n[ ]*)? # one optional newline followed by spaces + + \[ + (.*?) # id = $3 + \] + + ) + }{ + my $result; + my $whole_match = $1; + my $alt_text = $2; + my $link_id = lc $3; + + if ($link_id eq "") { + $link_id = lc $alt_text; # for shortcut links like ![this][]. + } + + $alt_text =~ s/"/"/g; + if (defined $g_urls{$link_id}) { + my $url = $g_urls{$link_id}; + $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid + $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. + $result = "<img src=\"$url\" alt=\"$alt_text\""; + if (defined $g_titles{$link_id}) { + my $title = $g_titles{$link_id}; + $title =~ s! \* !$g_escape_table{'*'}!gx; + $title =~ s! _ !$g_escape_table{'_'}!gx; + $result .= " title=\"$title\""; + } + $result .= $g_empty_element_suffix; + } + else { + # If there's no such link ID, leave intact: + $result = $whole_match; + } + + $result; + }xsge; + + # + # Next, handle inline images:  + # Don't forget: encode * and _ + + $text =~ s{ + ( # wrap whole match in $1 + !\[ + (.*?) # alt text = $2 + \] + \( # literal paren + [ \t]* + <?(\S+?)>? # src url = $3 + [ \t]* + ( # $4 + (['"]) # quote char = $5 + (.*?) # title = $6 + \5 # matching quote + [ \t]* + )? # title is optional + \) + ) + }{ + my $result; + my $whole_match = $1; + my $alt_text = $2; + my $url = $3; + my $title = ''; + if (defined($6)) { + $title = $6; + } + + $alt_text =~ s/"/"/g; + $title =~ s/"/"/g; + $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid + $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. + $result = "<img src=\"$url\" alt=\"$alt_text\""; + if (defined $title) { + $title =~ s! \* !$g_escape_table{'*'}!gx; + $title =~ s! _ !$g_escape_table{'_'}!gx; + $result .= " title=\"$title\""; + } + $result .= $g_empty_element_suffix; + + $result; + }xsge; + + return $text; +} + + +sub _DoHeaders { + my $text = shift; + + # Setext-style headers: + # Header 1 + # ======== + # + # Header 2 + # -------- + # + $text =~ s{ ^(.+)[ \t]*\n=+[ \t]*\n+ }{ + "<h1>" . _RunSpanGamut($1) . "</h1>\n\n"; + }egmx; + + $text =~ s{ ^(.+)[ \t]*\n-+[ \t]*\n+ }{ + "<h2>" . _RunSpanGamut($1) . "</h2>\n\n"; + }egmx; + + + # atx-style headers: + # # Header 1 + # ## Header 2 + # ## Header 2 with closing hashes ## + # ... + # ###### Header 6 + # + $text =~ s{ + ^(\#{1,6}) # $1 = string of #'s + [ \t]* + (.+?) # $2 = Header text + [ \t]* + \#* # optional closing #'s (not counted) + \n+ + }{ + my $h_level = length($1); + "<h$h_level>" . _RunSpanGamut($2) . "</h$h_level>\n\n"; + }egmx; + + return $text; +} + + +sub _DoLists { +# +# Form HTML ordered (numbered) and unordered (bulleted) lists. +# + my $text = shift; + my $less_than_tab = $g_tab_width - 1; + + # Re-usable patterns to match list item bullets and number markers: + my $marker_ul = qr/[*+-]/; + my $marker_ol = qr/\d+[.]/; + my $marker_any = qr/(?:$marker_ul|$marker_ol)/; + + # Re-usable pattern to match any entirel ul or ol list: + my $whole_list = qr{ + ( # $1 = whole list + ( # $2 + [ ]{0,$less_than_tab} + (${marker_any}) # $3 = first list item marker + [ \t]+ + ) + (?s:.+?) + ( # $4 + \z + | + \n{2,} + (?=\S) + (?! # Negative lookahead for another list item marker + [ \t]* + ${marker_any}[ \t]+ + ) + ) + ) + }mx; + + # We use a different prefix before nested lists than top-level lists. + # See extended comment in _ProcessListItems(). + # + # Note: There's a bit of duplication here. My original implementation + # created a scalar regex pattern as the conditional result of the test on + # $g_list_level, and then only ran the $text =~ s{...}{...}egmx + # substitution once, using the scalar as the pattern. This worked, + # everywhere except when running under MT on my hosting account at Pair + # Networks. There, this caused all rebuilds to be killed by the reaper (or + # perhaps they crashed, but that seems incredibly unlikely given that the + # same script on the same server ran fine *except* under MT. I've spent + # more time trying to figure out why this is happening than I'd like to + # admit. My only guess, backed up by the fact that this workaround works, + # is that Perl optimizes the substition when it can figure out that the + # pattern will never change, and when this optimization isn't on, we run + # afoul of the reaper. Thus, the slightly redundant code to that uses two + # static s/// patterns rather than one conditional pattern. + + if ($g_list_level) { + $text =~ s{ + ^ + $whole_list + }{ + my $list = $1; + my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol"; + # Turn double returns into triple returns, so that we can make a + # paragraph for the last item in a list, if necessary: + $list =~ s/\n{2,}/\n\n\n/g; + my $result = _ProcessListItems($list, $marker_any); + $result = "<$list_type>\n" . $result . "</$list_type>\n"; + $result; + }egmx; + } + else { + $text =~ s{ + (?:(?<=\n\n)|\A\n?) + $whole_list |
