#include "hoedown/autolink.h"

#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>

#ifndef _MSC_VER
#include <strings.h>
#else
#define strncasecmp    _strnicmp
#endif

int
hoedown_autolink_is_safe(const uint8_t *data, size_t size)
{
    static const size_t valid_uris_count = 6;
    static const char *valid_uris[] = {
        "http://", "https://", "/", "#", "ftp://", "mailto:"
    };
    static const size_t valid_uris_size[] = { 7, 8, 1, 1, 6, 7 };
    size_t i;

    for (i = 0; i < valid_uris_count; ++i) {
        size_t len = valid_uris_size[i];

        if (size > len &&
            strncasecmp((char *)data, valid_uris[i], len) == 0 &&
            isalnum(data[len]))
            return 1;
    }

    return 0;
}

static size_t
autolink_delim(uint8_t *data, size_t link_end, size_t max_rewind, size_t size)
{
    uint8_t cclose, copen = 0;
    size_t i;

    for (i = 0; i < link_end; ++i)
        if (data[i] == '<') {
            link_end = i;
            break;
        }

    while (link_end > 0) {
        if (strchr("?!.,:", data[link_end - 1]) != NULL)
            link_end--;

        else if (data[link_end - 1] == ';') {
            size_t new_end = link_end - 2;

            while (new_end > 0 && isalpha(data[new_end]))
                new_end--;

            if (new_end < link_end - 2 && data[new_end] == '&')
                link_end = new_end;
            else
                link_end--;
        }
        else break;
    }

    if (link_end == 0)
        return 0;

    cclose = data[link_end - 1];

    switch (cclose) {
    case '"':    copen = '"'; break;
    case '\'':    copen = '\''; break;
    case ')':    copen = '('; break;
    case ']':    copen = '['; break;
    case '}':    copen = '{'; break;
    }

    if (copen != 0) {
        size_t closing = 0;
        size_t opening = 0;
        size_t i = 0;

        /* Try to close the final punctuation sign in this same line;
         * if we managed to close it outside of the URL, that means that it's
         * not part of the URL. If it closes inside the URL, that means it
         * is part of the URL.
         *
         * Examples:
         *
         *    foo http://www.pokemon.com/Pikachu_(Electric) bar
         *        => http://www.pokemon.com/Pikachu_(Electric)
         *
         *    foo (http://www.pokemon.com/Pikachu_(Electric)) bar
         *        => http://www.pokemon.com/Pikachu_(Electric)
         *
         *    foo http://www.pokemon.com/Pikachu_(Electric)) bar
         *        => http://www.pokemon.com/Pikachu_(Electric))
         *
         *    (foo http://www.pokemon.com/Pikachu_(Electric)) bar
         *        => foo http://www.pokemon.com/Pikachu_(Electric)
         */

        while (i < link_end) {
            if (data[i] == copen)
                opening++;
            else if (data[i] == cclose)
                closing++;

            i++;
        }

        if (closing != opening)
            link_end--;
    }

    return link_end;
}

static size_t
check_domain(uint8_t *data, size_t size, int allow_short)
{
    size_t i, np = 0;

    if (!isalnum(data[0]))
        return 0;

    for (i = 1; i < size - 1; ++i) {
        if (strchr(".:", data[i]) != NULL) np++;
        else if (!isalnum(data[i]) && data[i] != '-') break;
    }

    if (allow_short) {
        /* We don't need a valid domain in the strict sense (with
         * least one dot; so just make sure it's composed of valid
         * domain characters and return the length of the the valid
         * sequence. */
        return i;
    } else {
        /* a valid domain needs to have at least a dot.
         * that's as far as we get */
        return np ? i : 0;
    }
}

size_t
hoedown_autolink__www(
    size_t *rewind_p,
    hoedown_buffer *link,
    uint8_t *data,
    size_t max_rewind,
    size_t size,
    unsigned int flags)
{
    size_t link_end;

    if (max_rewind > 0 && !ispunct(data[-1]) && !isspace(data[-1]))
        return 0;

    if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0)
        return 0;

    link_end = check_domain(data, size, 0);

    if (link_end == 0)
        return 0;

    while (link_end < size && !isspace(data[link_end]))
        link_end++;

    link_end = autolink_delim(data, link_end, max_rewind, size);

    if (link_end == 0)
        return 0;

    hoedown_buffer_put(link, data, link_end);
    *rewind_p = 0;

    return (int)link_end;
}

size_t
hoedown_autolink__email(
    size_t *rewind_p,
    hoedown_buffer *link,
    uint8_t *data,
    size_t max_rewind,
    size_t size,
    unsigned int flags)
{
    size_t link_end, rewind;
    int nb = 0, np = 0;

    for (rewind = 0; rewind < max_rewind; ++rewind) {
        uint8_t c = data[-1 - rewind];

        if (isalnum(c))
            continue;

        if (strchr(".+-_", c) != NULL)
            continue;

        break;
    }

    if (rewind == 0)
        return 0;

    for (link_end = 0; link_end < size; ++link_end) {
        uint8_t c = data[link_end];

        if (isalnum(c))
            continue;

        if (c == '@')
            nb++;
        else if (c == '.' && link_end < size - 1)
            np++;
        else if (c != '-' && c != '_')
            break;
    }

    if (link_end < 2 || nb != 1 || np == 0 ||
        !isalpha(data[link_end - 1]))
        return 0;

    link_end = autolink_delim(data, link_end, max_rewind, size);

    if (link_end == 0)
        return 0;

    hoedown_buffer_put(link, data - rewind, link_end + rewind);
    *rewind_p = rewind;

    return link_end;
}

size_t
hoedown_autolink__url(
    size_t *rewind_p,
    hoedown_buffer *link,
    uint8_t *data,
    size_t max_rewind,
    size_t size,
    unsigned int flags)
{
    size_t link_end, rewind = 0, domain_len;

    if (size < 4 || data[1] != '/' || data[2] != '/')
        return 0;

    while (rewind < max_rewind && isalpha(data[-1 - rewind]))
        rewind++;

    if (!hoedown_autolink_is_safe(data - rewind, size + rewind))
        return 0;

    link_end = strlen("://");

    domain_len = check_domain(
        data + link_end,
        size - link_end,
        flags & HOEDOWN_AUTOLINK_SHORT_DOMAINS);

    if (domain_len == 0)
        return 0;

    link_end += domain_len;
    while (link_end < size && !isspace(data[link_end]))
        link_end++;

    link_end = autolink_delim(data, link_end, max_rewind, size);

    if (link_end == 0)
        return 0;

    hoedown_buffer_put(link, data - rewind, link_end + rewind);
    *rewind_p = rewind;

    return link_end;
}