#if defined(__linux__)
#define _GNU_SOURCE
#endif //  defined(__linux__)

#include <libpxd/px_common.h>
#include <libpxd/px_gemini.h>
#include <libpxd/px_url.h>
#include <libpxd/px_log.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <ctype.h>
#include <stdio.h>
#include <limits.h>

// @brief initialie a url structure
void px_url_init(struct px_url* url) {
  memset(url, 0, sizeof(*url));
}

// @brief free a url's resource and reinitialize it
void px_url_reset(struct px_url* url) {
  if (!url)
    return;
  free(url->scheme);
  free(url->userinfo);
  free(url->host);
  free(url->port);
  free(url->path);
  free(url->query);
  free(url->fragment);
  px_url_init(url);
}

#define URL_SUBDELIMS "!$&'()*+,;="

static inline _Bool is_alnum(unsigned char c) {
 return (c >= 'a' && c <= 'z')
        || (c >= '0' && c <= '9')
        || (c >= 'A' && c <= 'Z');
}

static inline _Bool is_hexdig(unsigned char c) {
  return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
}

// @brief is character in the 'unreserved' set per std66
static inline _Bool is_unreserved(unsigned char c) { // , char const* accept_chars) {
  // per std66.  never allow null to be unreserved
  return is_alnum(c)
         ||  c == '-'
         ||  c == '.'
         ||  c == '_'
         ||  c == '~';
}

// @brief is character in the pchar set per std66
static inline _Bool is_pchar(unsigned char c) {
  // unreserved chars, pct-encoded stuff (add %, hex digits are unreserved),
  // sub-delims and ":@"
  return is_unreserved(c) || c == '%' || strchr(URL_SUBDELIMS "@:", c) != NULL;
}

// @brief is character in set of characters acceptable for a scheme per std66
static inline _Bool is_scheme_char(unsigned char c) {
  return is_alnum(c)
         || c == '+'
         || c == '-'
         || c == '.';
}

static inline _Bool is_userinfo_char(unsigned char c) {
  return is_unreserved(c) || c == '%' || strchr(URL_SUBDELIMS ":", c) != NULL;
}

static inline _Bool is_host_char(unsigned char c) {
  // need subdelims, plus [] and : for [v4.IPV4addr] and [IPv6addr]
  return is_unreserved(c) || c == '%' || strchr(URL_SUBDELIMS "[]:", c) != NULL;
}

static inline _Bool is_port_char(unsigned char c) {
  return isdigit(c);
}

// @brief is character in set of characters acceptable for a path per std66
static inline _Bool is_path_char(unsigned char c) {
  return is_pchar(c) || c == '/';
}

// @brief is character in set of characters acceptable for a query per std66
static inline _Bool is_query_char(unsigned char c) {
  // same as fragment
  return is_pchar(c) || c == '?' || c == '/';
}

// @brief is character in set of characters acceptable for a fragment per std66
static inline _Bool is_fragment_char(unsigned char c) {
  // same as query
  return is_pchar(c) || c == '?' || c == '/';
}

static char* copy_allowed_chars(
    char const* srcbeg,
    char const* srcend,
    _Bool (* const is_allowed_char)(unsigned char))
{
  // count the characters we'll need
  unsigned n_chars = 0;
  for (char const* srci = srcbeg; srci != srcend; ++srci) {
    if (*srci != '\0' && is_allowed_char(*srci)) // never accept null
      ++n_chars;
  }

  // this works even for empty strings

  char* dst0 = (char*)calloc(1, n_chars + 1);
  if (!dst0)
    return NULL;

  char* dst = dst0;
  for (char const* srci = srcbeg; srci != srcend; ++srci) {
    if (*srci != '\0' && is_allowed_char(*srci))
      *(dst++) = *srci;
  }
  *dst = '\0';

  return dst0;
}

struct px_url_parser {
  char const*     ppos;     ///< pointer to the current pos
  char const*     buf;      ///< beginning of the url buffer
  char const*     buf_end;  ///< one past the end of the url buffer
  struct px_url   url;      ///< output url
};

static _Bool parse_scheme(struct px_url_parser* pp) {
  char const* ppos0 = pp->ppos; // save the initial position
  char const* delim = ppos0;

  if (delim == pp->buf_end) // no more chars => no scheme
    return false;

  if (!isalpha((unsigned char)*ppos0)) // scheme must start with alpha
    return false;

  while (delim != pp->buf_end && *delim != ':') {
    if (!is_scheme_char(*delim))
      return false;
    ++delim;
  }

  if (delim == pp->buf_end) // didn't find a delimiter
    return false;

  pp->ppos = delim + 1; // consume the delimiter

  free(pp->url.scheme);
  pp->url.scheme = strndup(ppos0, delim - ppos0);
  return true;
}

static _Bool has_authority(struct px_url_parser* pp) {
  if (pp->ppos == pp->buf_end)
    return false;
  return (pp->ppos + 2 < pp->buf_end && pp->ppos[0] == '/' && pp->ppos[1] == '/');
}

static _Bool parse_authority(struct px_url_parser* pp) {
  if (!has_authority(pp))
    return false;

  // authority starts with // but we don't need them
  pp->ppos += 2;

  char const* host_start = pp->ppos;
  char const* delim = host_start;

  // we expect a userinfo first.  we can either end the authority with [/?#],
  // or switch to expecting a host with @
  _Bool has_colon = false;
  while (delim != pp->buf_end && strchr("/#?@", (unsigned char)*delim) == NULL) {
    if (!is_userinfo_char(*delim) && !is_host_char(*delim)) // unsure if we have a host or userinfo so far
      return false;
    if (*delim == ':')
      has_colon = true;
    ++delim;
  }

  if (delim != pp->buf_end) { // if we hit the end of the URL then we have a host
    switch (*delim) {
      case '@' :
        {
          // if the delimiter we hit is an @ then we need to slurp up the userinfo spec
          char const* userinfo_start = host_start;
          for (char const* p = userinfo_start; p != delim; ++p) {
            if (!is_userinfo_char(*p))
              return false;
          }
          free(pp->url.userinfo);
          pp->url.userinfo = copy_allowed_chars(userinfo_start, delim, is_userinfo_char);
          ++delim; // skip past the '@'
          host_start = delim; // host starts after the @
          pp->ppos = host_start; // mark the buffer up through the @ as consumed
        }
        break;
      case '/' :
      case '?' :
      case '#' :
        // any of these are valid delimiters for the host, we copy it in the
        // next code block below. if we hit a colon in our userinfo scan then
        // we need to reset the delimiter position so that we re-scan the host
        // portion to make sure we separate the host and port
        if (has_colon)
          delim = host_start;
        break;
      default :
        return false;
    }
  }

  // now we expect a host specification, go to the next delimiter/end of url
  while (delim != pp->buf_end && strchr("/#?:", (unsigned char)*delim) == NULL) {
    if (!is_host_char(*delim))
      return false;
    ++delim;
  }

  // copy the host string (may be empty)
  free(pp->url.host);
  pp->url.host = copy_allowed_chars(host_start, delim, is_host_char);
  pp->ppos = delim;

  // if we landed on a : then we need to copy the port
  if (pp->ppos != pp->buf_end && *pp->ppos == ':') { // a : after the host means we have a port
    ++delim;
    char const* port_start = delim;
    while (delim != pp->buf_end && strchr("/#?", (unsigned char)*delim) == NULL) {
      if (!is_port_char(*delim))
        return false;
      ++delim;
    }

    free(pp->url.port);
    pp->url.port = copy_allowed_chars(port_start, delim, is_port_char);
    pp->ppos = delim;
  }

  return true;
}

static _Bool parse_path(struct px_url_parser* pp) {
  char const* ppos0 = pp->ppos;
  char const* delim = ppos0;
  while (delim != pp->buf_end && strchr("#?", (unsigned char)*delim) == NULL) {
    if (!is_path_char(*delim)) // non-path character, whoops
      return false;
    ++delim;
  }
  pp->ppos = delim; // consume the buffer
  free(pp->url.path);
  pp->url.path = copy_allowed_chars(ppos0, delim, is_path_char);
  return true;
}

static _Bool parse_query(struct px_url_parser* pp) {
  if (pp->ppos == pp->buf_end)
    return false;
  if (*pp->ppos != '?')
    return false;
  ++pp->ppos;
  char const* delim = pp->ppos;
  while (delim != pp->buf_end && *delim != '#') {
    if (!is_query_char(*delim))
      return false;
    ++delim;
  }

  free(pp->url.query);
  pp->url.query = copy_allowed_chars(pp->ppos, delim, is_query_char);
  pp->ppos = delim;
  return true;
}

static _Bool parse_fragment(struct px_url_parser* pp) {
  if (pp->ppos == pp->buf_end)
    return false;
  if (*pp->ppos != '#')
    return false;
  ++pp->ppos;
  char const* delim = pp->ppos;
  while (delim != pp->buf_end) {
    if (!is_fragment_char(*delim))
      return false;
    ++delim;
  }

  free(pp->url.fragment);
  pp->url.fragment = copy_allowed_chars(pp->ppos, delim, is_fragment_char);
  pp->ppos = delim;
  return true;
}

// @brief creates a px_url with decoded components from a buffer (i.e. parses the url)
// @param url the url to output to
_Bool px_url_from_buffer(struct px_url* url, uint8_t const* buf, unsigned buflen) {
  struct px_url_parser pp = {
    .ppos     = (char const*)buf,
    .buf      = (char const*)buf,
    .buf_end  = (char const*)buf + buflen,
    .url = { 0 }
  };

  parse_scheme(&pp); // we don't care if we have a scheme or not - implicitly assume it's 'gemini'

  _Bool authority_present = has_authority(&pp);
  if (authority_present) { // if the next two characters are // then an authority is expected
    if (!parse_authority(&pp)) {
      px_url_reset(&pp.url);
      return false;
    }
  }

  while (pp.ppos != pp.buf_end) {
    switch (*pp.ppos) {
      case '/' : // need to parse a path
        // this has to come before query or fragment, but query and fragment
        // can both contain / and I have no idea how we could parse two paths,
        // so we should never actually hit the first three cases
        if (pp.url.path || pp.url.query || pp.url.fragment || !parse_path(&pp)) {
          px_url_reset(&pp.url);
          return false;
        }
        break;
      case '?' : // start of query
        if (pp.url.query || pp.url.fragment || !parse_query(&pp)) {
          px_url_reset(&pp.url);
          return false;
        }
        break;
      case '#' : // gemini spec forbids having fragments in the urls so fail here

        // we're going to return a failed parse, but later on if we want to
        // enable the ability to specify fragments for some reason, this is
        // what you'd do:
        if (pp.url.fragment || !parse_fragment(&pp)) {
          px_url_reset(&pp.url);
          return false;
        }
        px_url_reset(&pp.url);
        return false;
      default :
        // if we haven't already parsed a path, then any random char /may/ be
        // for a path, so try to parse the next portion as a path.  otherwise
        // the request is goofed up
        if (!pp.url.path && !pp.url.query && !pp.url.fragment
            && !authority_present // if there's an authority section then the path must be absolute or empty
            && parse_path(&pp))
          break;
        px_url_reset(&pp.url);
        return false;
    }
  }

  *url = pp.url;
  return true;
}

static int encode_scheme_char(unsigned char c) {
  return is_unreserved(c) ? 0 : 1;
}

static int encode_userinfo_char(unsigned char c) {
  return (is_unreserved(c) || c == ':') ? 0 : 1;
}

static int encode_host_char(unsigned char c) {
  return is_unreserved(c) ? 0 : 1;
}

static int encode_port_char(unsigned char c) {
  return is_alnum(c) ? 0 : 1;
}

static int encode_path_char(unsigned char c) {
  return (is_unreserved(c) || c == '/' || c == ':') ? 0 : 1;
}

static int encode_query_char(unsigned char c) {
  return (is_unreserved(c) || c == '&' || c == '=') ? 0 : 1;
}

static int encode_fragment_char(unsigned char c) {
  return (is_unreserved(c) || c == '&' || c == '=') ? 0 : 1;
}

static _Bool encode_path(struct px_path* path) {
  for (size_t i = 0; i < path->components_sz; ++i) {
    if (!path->components[i])
      continue;
    char* encoded_comp = px_url_encode_str(path->components[i], encode_path_char);
    if (!encoded_comp) {
      px_path_reset(path);
      return false;
    }
    free(path->components[i]);
    path->components[i] = encoded_comp;
  }
  return true;
}

#define ALWAYS_STR(x) ((x) ? (x) : "")

// @brief encode a url structure with proper semantics for each of the url fields
// @param out the url to output encoded data to
// @param url the url to encode
// @return true if conversion completed successfully, otherwise false
_Bool px_url_encode(struct px_url* out, struct px_url const* url) {
  if (!out)
    return false;
  *out = (struct px_url) { 0 };

  if (!url)
    return false;

  struct px_url encoded_url = { 0 };

  struct srcdst { char const* src; char** dst; int (*encode_char)(unsigned char); };
  struct srcdst fields[] = { { url->scheme, &encoded_url.scheme, encode_scheme_char },
                             { url->userinfo, &encoded_url.userinfo, encode_userinfo_char },
                             { url->host, &encoded_url.host, encode_host_char },
                             { url->port, &encoded_url.port, encode_port_char },
                             // do path as a separate piece below
                             { url->query, &encoded_url.query, encode_query_char },
                             { url->fragment, &encoded_url.fragment, encode_fragment_char } };

  for (size_t i = 0; i < px_n_elements(fields); ++i) {
    if (!fields[i].src)
      continue;

    char* encoded_str = px_url_encode_str(fields[i].src, fields[i].encode_char);
    if (!encoded_str)
      goto ERROR;

    *fields[i].dst = encoded_str;
  }

  // split the path into components, encode each component, then recombine them
  // this ensures the path is normalized
  if (url->path) {
    struct px_path pth = px_path_from_str(url->path);
    px_path_make_abs(&pth);

    if (!encode_path(&pth))
      goto ERROR;
    char* encoded_path_str = px_path_to_str(&pth);
    px_path_reset(&pth);
    if (!encoded_path_str)
      goto ERROR;
    encoded_url.path = encoded_path_str;
  }

  *out = encoded_url;
  return true;
ERROR:
  px_url_reset(&encoded_url);
  return false;
}

// @brief like px_url_to_string but percent-encoding is not done.  use px_url_to_str instead
// @param url the url to convert
// @return a malloc-allocated string
char* px_url_to_str_unencoded(struct px_url const* url) {
  if (!url)
    return NULL;

  char* ret = NULL;
  char const* fmtstr = "%s" // scheme
                       "%s" // scheme separator
                       "%s" // userinfo
                       "%s" // userinfo separator
                       "%s" // host
                       "%s" // port separator
                       "%s" // port
                       "%s" // path
                       "%s" // query separator
                       "%s" // query
                       "%s" // fragment separator
                       "%s"; // fragment

  _Bool need_portsep = url->scheme || url->userinfo || url->host;

  int r = asprintf(&ret, fmtstr,
                   ALWAYS_STR(url->scheme),              // scheme
                   url->scheme ? "://" : "",             // scheme separator
                   ALWAYS_STR(url->userinfo),            // userinfo
                   url->userinfo ? "@" : "",             // userinfo separator
                   ALWAYS_STR(url->host),                // host
                   url->port && need_portsep ? ":" : "", // port separator
                   ALWAYS_STR(url->port),                // port
                   ALWAYS_STR(url->path),                // path
                   url->query ? "?" : "",                // query separator
                   ALWAYS_STR(url->query),               // query
                   url->fragment ? "#" : "",             // fragment separator
                   ALWAYS_STR(url->fragment));           // fragment
  if (r < 0) {
    free(ret);
    ret = NULL;
  }

  return ret;
}

// @brief converts a url to a string.  necessary characters are percent-encoded
// @param url the url to convert
// @return a malloc-allocated string
char* px_url_to_str(struct px_url const* url) {
  if (!url)
    return NULL;

  struct px_url encoded;
  if (!px_url_encode(&encoded, url))
    return NULL;
  char* ret = px_url_to_str_unencoded(&encoded);
  px_url_reset(&encoded);
  return ret;
}

// decodes a pair of hexadecimal characters to a value
unsigned char decode_hexpair(char const in[2]) {
  unsigned char val = 0;
  for (unsigned i = 0; i < 2; ++i) {
    val <<= 4;
    unsigned char digit = in[i];
    val += (digit >= '0' && digit <= '9')
           ? digit - '0'
           : (digit >= 'a' && digit <= 'f')
             ? (digit - 'a') + 10
             : (digit >= 'A' && digit <= 'F')
               ? (digit - 'A') + 10
               : 0;

  }
  return val;
}

// @brief decode a percent-encoded string into raw form
// @param str the string to decode
// @return a malloc-allocated string or null on failure
char* px_url_decode_str(char const* str) {
  if (!str)
    return NULL;

  // the result will be maximally the same size as str
  // if every character is percent-encoded then str will be 3x larger than the result length
  size_t str_len = strlen(str);

  char* outbuf = (char*)calloc(1, str_len + 1);
  if (!outbuf)
    return NULL;

  size_t out_idx = 0;
  for (unsigned in_idx = 0; str[in_idx] != '\0'; ++in_idx) {
    unsigned char c = str[in_idx];
    if (c == '%'
        //&& str[in_idx + 1] != '\0' // this is already accomplished because is_hexdig('\0') == false
        && is_hexdig(str[in_idx + 1])
        //&& str[in_idx + 2] != '\0'
        && is_hexdig(str[in_idx + 2]))
    {
      unsigned char c = decode_hexpair(&str[in_idx+1]);
      if (c != 0) // skip nulls, we don't want to put them in the middle of our strings
        outbuf[out_idx++] = c;
      in_idx += 2; // skip the 2 decoded chars
    } else {
      // normal or reserved printable character, copy it to the buffer
      outbuf[out_idx++] = c;
    }
  }

  outbuf[out_idx] = '\0';

  if (out_idx != str_len) { // optimizaion to remove unused portion
    char* trimmed = (char*)realloc(outbuf, out_idx + 1);
    if (trimmed)
      outbuf = trimmed;
  }
  return outbuf;
}

char* px_url_encode_str(char const* str, int (* const encode_char)(unsigned char)) {
  return str ? px_url_encode_buffer((uint8_t const*)str, strlen(str), encode_char) : NULL;
}

// @brief percent-encode a buffer for use in a url
// @param buf the buffer to percent-encode
// @param buf_sz the number of characters in buf
// @param encode_char function run on each character of str.  should return:
//        < 0 character should be ignored (not copied or encoded)
//          0 character does not need to be encoded, copy it
//        > 0 character needs to be percent-encoded
// @return a pointer to a malloc-allocated null-terminated string or NULL if encoding failed
char* px_url_encode_buffer(uint8_t const* buf, size_t buf_sz, int (* const encode_char)(unsigned char)) {
  if (!buf || buf_sz == 0)
    return NULL;

  uint8_t const* buf_end = buf + buf_sz;

  size_t n_chars = 0;
  size_t n_encodable = 0;
  for (uint8_t const* buf_itr = buf; buf_itr != buf_end; ++buf_itr) {
    int encode = encode_char(*buf_itr);
    if (encode < 0) // skip this char
      continue;

    ++n_chars;
    if (encode > 0)
      ++n_encodable; // this char needs to be encoded
  }

  if (n_encodable == 0) {
    char* newbuf = (char*)calloc(1, (buf_sz + 1) * sizeof(char));
    if (!newbuf)
      return newbuf;
    memcpy(newbuf, buf, buf_sz);
    newbuf[buf_sz] = '\0';
    return newbuf;
  }

  // we need 2 additional characters per encodable character (x -> %yy), plus a null
  size_t encoded_sz = n_chars + (2 * n_encodable) + 1;
  char* encoded_buf = (char*)calloc(1, encoded_sz * sizeof(char));
  if (!encoded_buf) {
    px_log_error("no memory");
    return NULL;
  }

  char* encoded_buf_itr = encoded_buf;
  for (uint8_t const* buf_itr = buf; buf_itr != buf_end; ++buf_itr) {
    int encode = encode_char(*buf_itr);
    if (encode == 0) {
      *(encoded_buf_itr++) = *buf_itr; // char doesn't need to be encoded
    } else if (encode < 0) {
      continue; // skip the character
    } else {
      // character needs to be encoded
      unsigned char c = *buf_itr;
      int r = sprintf(encoded_buf_itr, "%%%02X", ((unsigned int)c & 0xFF));
      if (r != 3) {
        px_log_error("sprintf issue");
        free(encoded_buf);
        encoded_buf = NULL;
        return encoded_buf;
      }
      encoded_buf_itr += 3;
    }
  }
  // append a null byte
  *(encoded_buf_itr++) = '\0';
  px_log_assert(encoded_buf_itr == encoded_buf + encoded_sz, "programmer goofed his math");
  return encoded_buf;
}

// @brief decode a percent-encoded url to its raw form
// @param decoded the url to output values to.  must be initialized
// @param url the url to decode
_Bool px_url_decode(struct px_url* decoded_url, struct px_url const* url) {

  if (!url || !decoded_url)
    return false;

  struct url_field_cleaner { char const* src; char** dest; };

  // url-decode the components of the URL
  const struct url_field_cleaner fields[] = {
    { url->scheme, &decoded_url->scheme },
    { url->userinfo, &decoded_url->userinfo },
    { url->host, &decoded_url->host },
    { url->port, &decoded_url->port },
    { url->path, &decoded_url->path },
    { url->query, &decoded_url->query },
    { url->fragment, &decoded_url->fragment } };

  size_t n_fields = px_n_elements(fields);
  for (unsigned i = 0; i < n_fields; ++i) {
    char const* src = fields[i].src;
    char** dest = fields[i].dest;
    free(*dest);
    *dest = NULL;
    if (!src)
      continue;

    char* decoded = px_url_decode_str(src);
    if (!decoded)
      return false;
    *dest = decoded;
  }

  return true;
}

