diff options
| author | Petr Mrázek <peterix@gmail.com> | 2016-04-10 15:53:05 +0200 |
|---|---|---|
| committer | Petr Mrázek <peterix@gmail.com> | 2016-05-01 00:00:14 +0200 |
| commit | b6d455a02bd338e9dc0faa09d4d8177ecd8d569a (patch) | |
| tree | 41982bca1ede50049f2f8c7109dd18edeefde6d0 /libraries/hoedown/src | |
| parent | 47e37635f50c09b4f9a9ee7699e3120bab3e4088 (diff) | |
| download | PrismLauncher-b6d455a02bd338e9dc0faa09d4d8177ecd8d569a.tar.gz PrismLauncher-b6d455a02bd338e9dc0faa09d4d8177ecd8d569a.tar.bz2 PrismLauncher-b6d455a02bd338e9dc0faa09d4d8177ecd8d569a.zip | |
NOISSUE reorganize and document libraries
Diffstat (limited to 'libraries/hoedown/src')
| -rw-r--r-- | libraries/hoedown/src/autolink.c | 281 | ||||
| -rw-r--r-- | libraries/hoedown/src/buffer.c | 308 | ||||
| -rw-r--r-- | libraries/hoedown/src/document.c | 2958 | ||||
| -rw-r--r-- | libraries/hoedown/src/escape.c | 188 | ||||
| -rw-r--r-- | libraries/hoedown/src/html.c | 754 | ||||
| -rw-r--r-- | libraries/hoedown/src/html_blocks.c | 240 | ||||
| -rw-r--r-- | libraries/hoedown/src/html_smartypants.c | 435 | ||||
| -rw-r--r-- | libraries/hoedown/src/stack.c | 79 | ||||
| -rw-r--r-- | libraries/hoedown/src/version.c | 9 |
9 files changed, 5252 insertions, 0 deletions
diff --git a/libraries/hoedown/src/autolink.c b/libraries/hoedown/src/autolink.c new file mode 100644 index 00000000..9bc7fad5 --- /dev/null +++ b/libraries/hoedown/src/autolink.c @@ -0,0 +1,281 @@ +#include "hoedown/autolink.h" + +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <ctype.h> + +#ifndef _MSC_VER +#include <strings.h> +#else +#define strncasecmp _strnicmp +#endif + +int +hoedown_autolink_is_safe(const uint8_t *data, size_t size) +{ + static const size_t valid_uris_count = 6; + static const char *valid_uris[] = { + "http://", "https://", "/", "#", "ftp://", "mailto:" + }; + static const size_t valid_uris_size[] = { 7, 8, 1, 1, 6, 7 }; + size_t i; + + for (i = 0; i < valid_uris_count; ++i) { + size_t len = valid_uris_size[i]; + + if (size > len && + strncasecmp((char *)data, valid_uris[i], len) == 0 && + isalnum(data[len])) + return 1; + } + + return 0; +} + +static size_t +autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size) +{ + uint8_t cclose, copen = 0; + size_t i; + + for (i = 0; i < link_end; ++i) + if (data[i] == '<') { + link_end = i; + break; + } + + while (link_end > 0) { + if (strchr("?!.,:", data[link_end - 1]) != NULL) + link_end--; + + else if (data[link_end - 1] == ';') { + size_t new_end = link_end - 2; + + while (new_end > 0 && isalpha(data[new_end])) + new_end--; + + if (new_end < link_end - 2 && data[new_end] == '&') + link_end = new_end; + else + link_end--; + } + else break; + } + + if (link_end == 0) + return 0; + + cclose = data[link_end - 1]; + + switch (cclose) { + case '"': copen = '"'; break; + case '\'': copen = '\''; break; + case ')': copen = '('; break; + case ']': copen = '['; break; + case '}': copen = '{'; break; + } + + if (copen != 0) { + size_t closing = 0; + size_t opening = 0; + size_t i = 0; + + /* Try to close the final punctuation sign in this same line; + * if we managed to close it outside of the URL, that means that it's + * not part of the URL. If it closes inside the URL, that means it + * is part of the URL. + * + * Examples: + * + * foo http://www.pokemon.com/Pikachu_(Electric) bar + * => http://www.pokemon.com/Pikachu_(Electric) + * + * foo (http://www.pokemon.com/Pikachu_(Electric)) bar + * => http://www.pokemon.com/Pikachu_(Electric) + * + * foo http://www.pokemon.com/Pikachu_(Electric)) bar + * => http://www.pokemon.com/Pikachu_(Electric)) + * + * (foo http://www.pokemon.com/Pikachu_(Electric)) bar + * => foo http://www.pokemon.com/Pikachu_(Electric) + */ + + while (i < link_end) { + if (data[i] == copen) + opening++; + else if (data[i] == cclose) + closing++; + + i++; + } + + if (closing != opening) + link_end--; + } + + return link_end; +} + +static size_t +check_domain(uint8_t *data, size_t size, int allow_short) +{ + size_t i, np = 0; + + if (!isalnum(data[0])) + return 0; + + for (i = 1; i < size - 1; ++i) { + if (strchr(".:", data[i]) != NULL) np++; + else if (!isalnum(data[i]) && data[i] != '-') break; + } + + if (allow_short) { + /* We don't need a valid domain in the strict sense (with + * least one dot; so just make sure it's composed of valid + * domain characters and return the length of the the valid + * sequence. */ + return i; + } else { + /* a valid domain needs to have at least a dot. + * that's as far as we get */ + return np ? i : 0; + } +} + +size_t +hoedown_autolink__www( + size_t *rewind_p, + hoedown_buffer *link, + uint8_t *data, + size_t max_rewind, + size_t size, + unsigned int flags) +{ + size_t link_end; + + if (max_rewind > 0 && !ispunct(data[-1]) && !isspace(data[-1])) + return 0; + + if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0) + return 0; + + link_end = check_domain(data, size, 0); + + if (link_end == 0) + return 0; + + while (link_end < size && !isspace(data[link_end])) + link_end++; + + link_end = autolink_delim(data, link_end, max_rewind, size); + + if (link_end == 0) + return 0; + + hoedown_buffer_put(link, data, link_end); + *rewind_p = 0; + + return (int)link_end; +} + +size_t +hoedown_autolink__email( + size_t *rewind_p, + hoedown_buffer *link, + uint8_t *data, + size_t max_rewind, + size_t size, + unsigned int flags) +{ + size_t link_end, rewind; + int nb = 0, np = 0; + + for (rewind = 0; rewind < max_rewind; ++rewind) { + uint8_t c = data[-1 - rewind]; + + if (isalnum(c)) + continue; + + if (strchr(".+-_", c) != NULL) + continue; + + break; + } + + if (rewind == 0) + return 0; + + for (link_end = 0; link_end < size; ++link_end) { + uint8_t c = data[link_end]; + + if (isalnum(c)) + continue; + + if (c == '@') + nb++; + else if (c == '.' && link_end < size - 1) + np++; + else if (c != '-' && c != '_') + break; + } + + if (link_end < 2 || nb != 1 || np == 0 || + !isalpha(data[link_end - 1])) + return 0; + + link_end = autolink_delim(data, link_end, max_rewind, size); + + if (link_end == 0) + return 0; + + hoedown_buffer_put(link, data - rewind, link_end + rewind); + *rewind_p = rewind; + + return link_end; +} + +size_t +hoedown_autolink__url( + size_t *rewind_p, + hoedown_buffer *link, + uint8_t *data, + size_t max_rewind, + size_t size, + unsigned int flags) +{ + size_t link_end, rewind = 0, domain_len; + + if (size < 4 || data[1] != '/' || data[2] != '/') + return 0; + + while (rewind < max_rewind && isalpha(data[-1 - rewind])) + rewind++; + + if (!hoedown_autolink_is_safe(data - rewind, size + rewind)) + return 0; + + link_end = strlen("://"); + + domain_len = check_domain( + data + link_end, + size - link_end, + flags & HOEDOWN_AUTOLINK_SHORT_DOMAINS); + + if (domain_len == 0) + return 0; + + link_end += domain_len; + while (link_end < size && !isspace(data[link_end])) + link_end++; + + link_end = autolink_delim(data, link_end, max_rewind, size); + + if (link_end == 0) + return 0; + + hoedown_buffer_put(link, data - rewind, link_end + rewind); + *rewind_p = rewind; + + return link_end; +} diff --git a/libraries/hoedown/src/buffer.c b/libraries/hoedown/src/buffer.c new file mode 100644 index 00000000..1c7ba55a --- /dev/null +++ b/libraries/hoedown/src/buffer.c @@ -0,0 +1,308 @@ +#include "hoedown/buffer.h" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +void * +hoedown_malloc(size_t size) +{ + void *ret = malloc(size); + + if (!ret) { + fprintf(stderr, "Allocation failed.\n"); + abort(); + } + + return ret; +} + +void * +hoedown_calloc(size_t nmemb, size_t size) +{ + void *ret = calloc(nmemb, size); + + if (!ret) { + fprintf(stderr, "Allocation failed.\n"); + abort(); + } + + return ret; +} + +void * +hoedown_realloc(void *ptr, size_t size) +{ + void *ret = realloc(ptr, size); + + if (!ret) { + fprintf(stderr, "Allocation failed.\n"); + abort(); + } + + return ret; +} + +void +hoedown_buffer_init( + hoedown_buffer *buf, + size_t unit, + hoedown_realloc_callback data_realloc, + hoedown_free_callback data_free, + hoedown_free_callback buffer_free) +{ + assert(buf); + + buf->data = NULL; + buf->size = buf->asize = 0; + buf->unit = unit; + buf->data_realloc = data_realloc; + buf->data_free = data_free; + buf->buffer_free = buffer_free; +} + +void +hoedown_buffer_uninit(hoedown_buffer *buf) +{ + assert(buf && buf->unit); + buf->data_free(buf->data); +} + +hoedown_buffer * +hoedown_buffer_new(size_t unit) +{ + hoedown_buffer *ret = hoedown_malloc(sizeof (hoedown_buffer)); + hoedown_buffer_init(ret, unit, hoedown_realloc, free, free); + return ret; +} + +void +hoedown_buffer_free(hoedown_buffer *buf) +{ + if (!buf) return; + assert(buf && buf->unit); + + buf->data_free(buf->data); + + if (buf->buffer_free) + buf->buffer_free(buf); +} + +void +hoedown_buffer_reset(hoedown_buffer *buf) +{ + assert(buf && buf->unit); + + buf->data_free(buf->data); + buf->data = NULL; + buf->size = buf->asize = 0; +} + +void +hoedown_buffer_grow(hoedown_buffer *buf, size_t neosz) +{ + size_t neoasz; + assert(buf && buf->unit); + + if (buf->asize >= neosz) + return; + + neoasz = buf->asize + buf->unit; + while (neoasz < neosz) + neoasz += buf->unit; + + buf->data = (uint8_t *) buf->data_realloc(buf->data, neoasz); + buf->asize = neoasz; +} + +void +hoedown_buffer_put(hoedown_buffer *buf, const uint8_t *data, size_t size) +{ + assert(buf && buf->unit); + + if (buf->size + size > buf->asize) + hoedown_buffer_grow(buf, buf->size + size); + + memcpy(buf->data + buf->size, data, size); + buf->size += size; +} + +void +hoedown_buffer_puts(hoedown_buffer *buf, const char *str) +{ + hoedown_buffer_put(buf, (const uint8_t *)str, strlen(str)); +} + +void +hoedown_buffer_putc(hoedown_buffer *buf, uint8_t c) +{ + assert(buf && buf->unit); + + if (buf->size >= buf->asize) + hoedown_buffer_grow(buf, buf->size + 1); + + buf->data[buf->size] = c; + buf->size += 1; +} + +int +hoedown_buffer_putf(hoedown_buffer *buf, FILE *file) +{ + assert(buf && buf->unit); + + while (!(feof(file) || ferror(file))) { + hoedown_buffer_grow(buf, buf->size + buf->unit); + buf->size += fread(buf->data + buf->size, 1, buf->unit, file); + } + + return ferror(file); +} + +void +hoedown_buffer_set(hoedown_buffer *buf, const uint8_t *data, size_t size) +{ + assert(buf && buf->unit); + + if (size > buf->asize) + hoedown_buffer_grow(buf, size); + + memcpy(buf->data, data, size); + buf->size = size; +} + +void +hoedown_buffer_sets(hoedown_buffer *buf, const char *str) +{ + hoedown_buffer_set(buf, (const uint8_t *)str, strlen(str)); +} + +int +hoedown_buffer_eq(const hoedown_buffer *buf, const uint8_t *data, size_t size) +{ + if (buf->size != size) return 0; + return memcmp(buf->data, data, size) == 0; +} + +int +hoedown_buffer_eqs(const hoedown_buffer *buf, const char *str) +{ + return hoedown_buffer_eq(buf, (const uint8_t *)str, strlen(str)); +} + +int +hoedown_buffer_prefix(const hoedown_buffer *buf, const char *prefix) +{ + size_t i; + + for (i = 0; i < buf->size; ++i) { + if (prefix[i] == 0) + return 0; + + if (buf->data[i] != prefix[i]) + return buf->data[i] - prefix[i]; + } + + return 0; +} + +void +hoedown_buffer_slurp(hoedown_buffer *buf, size_t size) +{ + assert(buf && buf->unit); + + if (size >= buf->size) { + buf->size = 0; + return; + } + + buf->size -= size; + memmove(buf->data, buf->data + size, buf->size); +} + +const char * +hoedown_buffer_cstr(hoedown_buffer *buf) +{ + assert(buf && buf->unit); + + if (buf->size < buf->asize && buf->data[buf->size] == 0) + return (char *)buf->data; + + hoedown_buffer_grow(buf, buf->size + 1); + buf->data[buf->size] = 0; + + return (char *)buf->data; +} + +void +hoedown_buffer_printf(hoedown_buffer *buf, const char *fmt, ...) +{ + va_list ap; + int n; + + assert(buf && buf->unit); + + if (buf->size >= buf->asize) + hoedown_buffer_grow(buf, buf->size + 1); + + va_start(ap, fmt); + n = vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap); + va_end(ap); + + if (n < 0) { +#ifndef _MSC_VER + return; +#else + va_start(ap, fmt); + n = _vscprintf(fmt, ap); + va_end(ap); +#endif + } + + if ((size_t)n >= buf->asize - buf->size) { + hoedown_buffer_grow(buf, buf->size + n + 1); + + va_start(ap, fmt); + n = vsnprintf((char *)buf->data + buf->size, buf->asize - buf->size, fmt, ap); + va_end(ap); + } + + if (n < 0) + return; + + buf->size += n; +} + +void hoedown_buffer_put_utf8(hoedown_buffer *buf, unsigned int c) { + unsigned char unichar[4]; + + assert(buf && buf->unit); + + if (c < 0x80) { + hoedown_buffer_putc(buf, c); + } + else if (c < 0x800) { + unichar[0] = 192 + (c / 64); + unichar[1] = 128 + (c % 64); + hoedown_buffer_put(buf, unichar, 2); + } + else if (c - 0xd800u < 0x800) { + HOEDOWN_BUFPUTSL(buf, "\xef\xbf\xbd"); + } + else if (c < 0x10000) { + unichar[0] = 224 + (c / 4096); + unichar[1] = 128 + (c / 64) % 64; + unichar[2] = 128 + (c % 64); + hoedown_buffer_put(buf, unichar, 3); + } + else if (c < 0x110000) { + unichar[0] = 240 + (c / 262144); + unichar[1] = 128 + (c / 4096) % 64; + unichar[2] = 128 + (c / 64) % 64; + unichar[3] = 128 + (c % 64); + hoedown_buffer_put(buf, unichar, 4); + } + else { + HOEDOWN_BUFPUTSL(buf, "\xef\xbf\xbd"); + } +} diff --git a/libraries/hoedown/src/document.c b/libraries/hoedown/src/document.c new file mode 100644 index 00000000..8ba82e47 --- /dev/null +++ b/libraries/hoedown/src/document.c @@ -0,0 +1,2958 @@ +#include "hoedown/document.h" + +#include <assert.h> +#include <string.h> +#include <ctype.h> +#include <stdio.h> + +#include "hoedown/stack.h" + +#ifndef _MSC_VER +#include <strings.h> +#else +#define strncasecmp _strnicmp +#endif + +#define REF_TABLE_SIZE 8 + +#define BUFFER_BLOCK 0 +#define BUFFER_SPAN 1 + +#define HOEDOWN_LI_END 8 /* internal list flag */ + +const char *hoedown_find_block_tag(const char *str, unsigned int len); + +/*************** + * LOCAL TYPES * + ***************/ + +/* link_ref: reference to a link */ +struct link_ref { + unsigned int id; + + hoedown_buffer *link; + hoedown_buffer *title; + + struct link_ref *next; +}; + +/* footnote_ref: reference to a footnote */ +struct footnote_ref { + unsigned int id; + + int is_used; + unsigned int num; + + hoedown_buffer *contents; +}; + +/* footnote_item: an item in a footnote_list */ +struct footnote_item { + struct footnote_ref *ref; + struct footnote_item *next; +}; + +/* footnote_list: linked list of footnote_item */ +struct footnote_list { + unsigned int count; + struct footnote_item *head; + struct footnote_item *tail; +}; + +/* char_trigger: function pointer to render active chars */ +/* returns the number of chars taken care of */ +/* data is the pointer of the beginning of the span */ +/* offset is the number of valid chars before data */ +typedef size_t +(*char_trigger)(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); + +static size_t char_emphasis(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); +static size_t char_quote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); +static size_t char_linebreak(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); +static size_t char_codespan(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); +static size_t char_escape(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); +static size_t char_entity(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); +static size_t char_langle_tag(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); +static size_t char_autolink_url(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); +static size_t char_autolink_email(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); +static size_t char_autolink_www(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); +static size_t char_link(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); +static size_t char_superscript(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); +static size_t char_math(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); + +enum markdown_char_t { + MD_CHAR_NONE = 0, + MD_CHAR_EMPHASIS, + MD_CHAR_CODESPAN, + MD_CHAR_LINEBREAK, + MD_CHAR_LINK, + MD_CHAR_LANGLE, + MD_CHAR_ESCAPE, + MD_CHAR_ENTITY, + MD_CHAR_AUTOLINK_URL, + MD_CHAR_AUTOLINK_EMAIL, + MD_CHAR_AUTOLINK_WWW, + MD_CHAR_SUPERSCRIPT, + MD_CHAR_QUOTE, + MD_CHAR_MATH +}; + +static char_trigger markdown_char_ptrs[] = { + NULL, + &char_emphasis, + &char_codespan, + &char_linebreak, + &char_link, + &char_langle_tag, + &char_escape, + &char_entity, + &char_autolink_url, + &char_autolink_email, + &char_autolink_www, + &char_superscript, + &char_quote, + &char_math +}; + +struct hoedown_document { + hoedown_renderer md; + hoedown_renderer_data data; + + struct link_ref *refs[REF_TABLE_SIZE]; + struct footnote_list footnotes_found; + struct footnote_list footnotes_used; + uint8_t active_char[256]; + hoedown_stack work_bufs[2]; + hoedown_extensions ext_flags; + size_t max_nesting; + int in_link_body; +}; + +/*************************** + * HELPER FUNCTIONS * + ***************************/ + +static hoedown_buffer * +newbuf(hoedown_document *doc, int type) +{ + static const size_t buf_size[2] = {256, 64}; + hoedown_buffer *work = NULL; + hoedown_stack *pool = &doc->work_bufs[type]; + + if (pool->size < pool->asize && + pool->item[pool->size] != NULL) { + work = pool->item[pool->size++]; + work->size = 0; + } else { + work = hoedown_buffer_new(buf_size[type]); + hoedown_stack_push(pool, work); + } + + return work; +} + +static void +popbuf(hoedown_document *doc, int type) +{ + doc->work_bufs[type].size--; +} + +static void +unscape_text(hoedown_buffer *ob, hoedown_buffer *src) +{ + size_t i = 0, org; + while (i < src->size) { + org = i; + while (i < src->size && src->data[i] != '\\') + i++; + + if (i > org) + hoedown_buffer_put(ob, src->data + org, i - org); + + if (i + 1 >= src->size) + break; + + hoedown_buffer_putc(ob, src->data[i + 1]); + i += 2; + } +} + +static unsigned int +hash_link_ref(const uint8_t *link_ref, size_t length) +{ + size_t i; + unsigned int hash = 0; + + for (i = 0; i < length; ++i) + hash = tolower(link_ref[i]) + (hash << 6) + (hash << 16) - hash; + + return hash; +} + +static struct link_ref * +add_link_ref( + struct link_ref **references, + const uint8_t *name, size_t name_size) +{ + struct link_ref *ref = hoedown_calloc(1, sizeof(struct link_ref)); + + ref->id = hash_link_ref(name, name_size); + ref->next = references[ref->id % REF_TABLE_SIZE]; + + references[ref->id % REF_TABLE_SIZE] = ref; + return ref; +} + +static struct link_ref * +find_link_ref(struct link_ref **references, uint8_t *name, size_t length) +{ + unsigned int hash = hash_link_ref(name, length); + struct link_ref *ref = NULL; + + ref = references[hash % REF_TABLE_SIZE]; + + while (ref != NULL) { + if (ref->id == hash) + return ref; + + ref = ref->next; + } + + return NULL; +} + +static void +free_link_refs(struct link_ref **references) +{ + size_t i; + + for (i = 0; i < REF_TABLE_SIZE; ++i) { + struct link_ref *r = references[i]; + struct link_ref *next; + + while (r) { + next = r->next; + hoedown_buffer_free(r->link); + hoedown_buffer_free(r->title); + free(r); + r = next; + } + } +} + +static struct footnote_ref * +create_footnote_ref(struct footnote_list *list, const uint8_t *name, size_t name_size) +{ + struct footnote_ref *ref = hoedown_calloc(1, sizeof(struct footnote_ref)); + + ref->id = hash_link_ref(name, name_size); + + return ref; +} + +static int +add_footnote_ref(struct footnote_list *list, struct footnote_ref *ref) +{ + struct footnote_item *item = hoedown_calloc(1, sizeof(struct footnote_item)); + if (!item) + return 0; + item->ref = ref; + + if (list->head == NULL) { + list->head = list->tail = item; + } else { + list->tail->next = item; + list->tail = item; + } + list->count++; + + return 1; +} + +static struct footnote_ref * +find_footnote_ref(struct footnote_list *list, uint8_t *name, size_t length) +{ + unsigned int hash = hash_link_ref(name, length); + struct footnote_item *item = NULL; + + item = list->head; + + while (item != NULL) { + if (item->ref->id == hash) + return item->ref; + item = item->next; + } + + return NULL; +} + +static void +free_footnote_ref(struct footnote_ref *ref) +{ + hoedown_buffer_free(ref->contents); + free(ref); +} + +static void +free_footnote_list(struct footnote_list *list, int free_refs) +{ + struct footnote_item *item = list->head; + struct footnote_item *next; + + while (item) { + next = item->next; + if (free_refs) + free_footnote_ref(item->ref); + free(item); + item = next; + } +} + + +/* + * Check whether a char is a Markdown spacing char. + + * Right now we only consider spaces the actual + * space and a newline: tabs and carriage returns + * are filtered out during the preprocessing phase. + * + * If we wanted to actually be UTF-8 compliant, we + * should instead extract an Unicode codepoint from + * this character and check for space properties. + */ +static int +_isspace(int c) +{ + return c == ' ' || c == '\n'; +} + +/* is_empty_all: verify that all the data is spacing */ +static int +is_empty_all(const uint8_t *data, size_t size) +{ + size_t i = 0; + while (i < size && _isspace(data[i])) i++; + return i == size; +} + +/* + * Replace all spacing characters in data with spaces. As a special + * case, this collapses a newline with the previous space, if possible. + */ +static void +replace_spacing(hoedown_buffer *ob, const uint8_t *data, size_t size) +{ + size_t i = 0, mark; + hoedown_buffer_grow(ob, size); + while (1) { + mark = i; + while (i < size && data[i] != '\n') i++; + hoedown_buffer_put(ob, data + mark, i - mark); + + if (i >= size) break; + + if (!(i > 0 && data[i-1] == ' ')) + hoedown_buffer_putc(ob, ' '); + i++; + } +} + +/**************************** + * INLINE PARSING FUNCTIONS * + ****************************/ + +/* is_mail_autolink • looks for the address part of a mail autolink and '>' */ +/* this is less strict than the original markdown e-mail address matching */ +static size_t +is_mail_autolink(uint8_t *data, size_t size) +{ + size_t i = 0, nb = 0; + + /* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */ + for (i = 0; i < size; ++i) { + if (isalnum(data[i])) + continue; + + switch (data[i]) { + case '@': + nb++; + + case '-': + case '.': + case '_': + break; + + case '>': + return (nb == 1) ? i + 1 : 0; + + default: + return 0; + } + } + + return 0; +} + +/* tag_length • returns the length of the given tag, or 0 is it's not valid */ +static size_t +tag_length(uint8_t *data, size_t size, hoedown_autolink_type *autolink) +{ + size_t i, j; + + /* a valid tag can't be shorter than 3 chars */ + if (size < 3) return 0; + + /* begins with a '<' optionally followed by '/', followed by letter or number */ + if (data[0] != '<') return 0; + i = (data[1] == '/') ? 2 : 1; + + if (!isalnum(data[i])) + return 0; + + /* scheme test */ + *autolink = HOEDOWN_AUTOLINK_NONE; + + /* try to find the beginning of an URI */ + while (i < size && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-')) + i++; + + if (i > 1 && data[i] == '@') { + if ((j = is_mail_autolink(data + i, size - i)) != 0) { + *autolink = HOEDOWN_AUTOLINK_EMAIL; + return i + j; + } + } + + if (i > 2 && data[i] == ':') { + *autolink = HOEDOWN_AUTOLINK_NORMAL; + i++; + } + + /* completing autolink test: no spacing or ' or " */ + if (i >= size) + *autolink = HOEDOWN_AUTOLINK_NONE; + + else if (*autolink) { + j = i; + + while (i < size) { + if (data[i] == '\\') i += 2; + else if (data[i] == '>' || data[i] == '\'' || + data[i] == '"' || data[i] == ' ' || data[i] == '\n') + break; + else i++; + } + + if (i >= size) return 0; + if (i > j && data[i] == '>') return i + 1; + /* one of the forbidden chars has been found */ + *autolink = HOEDOWN_AUTOLINK_NONE; + } + + /* looking for something looking like a tag end */ + while (i < size && data[i] != '>') i++; + if (i >= size) return 0; + return i + 1; +} + +/* parse_inline • parses inline markdown elements */ +static void +parse_inline(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size) +{ + size_t i = 0, end = 0, consumed = 0; + hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL }; + uint8_t *active_char = doc->active_char; + + if (doc->work_bufs[BUFFER_SPAN].size + + doc->work_bufs[BUFFER_BLOCK].size > doc->max_nesting) + return; + + while (i < size) { + /* copying inactive chars into the output */ + while (end < size && active_char[data[end]] == 0) + end++; + + if (doc->md.normal_text) { + work.data = data + i; + work.size = end - i; + doc->md.normal_text(ob, &work, &doc->data); + } + else + hoedown_buffer_put(ob, data + i, end - i); + + if (end >= size) break; + i = end; + + end = markdown_char_ptrs[ (int)active_char[data[end]] ](ob, doc, data + i, i - consumed, size - i); + if (!end) /* no action from the callback */ + end = i + 1; + else { + i += end; + end = i; + consumed = i; + } + } +} + +/* is_escaped • returns whether special char at data[loc] is escaped by '\\' */ +static int +is_escaped(uint8_t *data, size_t loc) +{ + size_t i = loc; + while (i >= 1 && data[i - 1] == '\\') + i--; + + /* odd numbers of backslashes escapes data[loc] */ + return (loc - i) % 2; +} + +/* find_emph_char • looks for the next emph uint8_t, skipping other constructs */ +static size_t +find_emph_char(uint8_t *data, size_t size, uint8_t c) +{ + size_t i = 0; + + while (i < size) { + while (i < size && data[i] != c && data[i] != '[' && data[i] != '`') + i++; + + if (i == size) + return 0; + + /* not counting escaped chars */ + if (is_escaped(data, i)) { + i++; continue; + } + + if (data[i] == c) + return i; + + /* skipping a codespan */ + if (data[i] == '`') { + size_t span_nb = 0, bt; + size_t tmp_i = 0; + + /* counting the number of opening backticks */ + while (i < size && data[i] == '`') { + i++; span_nb++; + } + + if (i >= size) return 0; + + /* finding the matching closing sequence */ + bt = 0; + while (i < size && bt < span_nb) { + if (!tmp_i && data[i] == c) tmp_i = i; + if (data[i] == '`') bt++; + else bt = 0; + i++; + } + + /* not a well-formed codespan; use found matching emph char */ + if (i >= size) return tmp_i; + } + /* skipping a link */ + else if (data[i] == '[') { + size_t tmp_i = 0; + uint8_t cc; + + i++; + while (i < size && data[i] != ']') { + if (!tmp_i && data[i] == c) tmp_i = i; + i++; + } + + i++; + while (i < size && _isspace(data[i])) + i++; + + if (i >= size) + return tmp_i; + + switch (data[i]) { + case '[': + cc = ']'; break; + + case '(': + cc = ')'; break; + + default: + if (tmp_i) + return tmp_i; + else + continue; + } + + i++; + while (i < size && data[i] != cc) { + if (!tmp_i && data[i] == c) tmp_i = i; + i++; + } + + if (i >= size) + return tmp_i; + + i++; + } + } + + return 0; +} + +/* parse_emph1 • parsing single emphase */ +/* closed by a symbol not preceded by spacing and not followed by symbol */ +static size_t +parse_emph1(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, uint8_t c) |
