#ifdef __linux__ // we need POLLRDHUP
#define _GNU_SOURCE
#endif // __linux__

#include <libpxd/px_log.h>
#include <libpxd/px_workq.h>

#include <errno.h>
#include <poll.h>
#include <limits.h>
#include <string.h>

// some systems don't have POLLRDHUP
#ifndef POLLRDHUP
#define POLLRDHUP 0
#endif // POLLRDHUP

// functions for handling the event queue and waiting for file descriptors
static int wait_for_events(struct pollfd_vec* pfdv,
                           size_t             n_events,
                           struct px_queue*   events_head,
                           struct px_queue*   triggered_head,
                           int                timeout);

static int partition_events (struct px_queue*       source_head,
                             struct px_queue*       pollable_evs_head,
                             struct px_queue*       unpolled_evs_head,
                             int*                   timeout,
                             struct timespec const* now);

_Bool px_workq_once(struct px_workq* workq, int timeout) {

  // no events to poll, return immediately
  if (px_queue_is_singleton(&workq->events_head))
    return true;

  // grab the current time for checking event expiry
  struct timespec now = px_time_now();

  // partition our events into pollable events (fd >= 0) and non-pollable
  // events so that we can set up a vector of pollfds.  we also adjust out
  // timeout downward based on the timeouts of all of the events and the
  // provided timeout value.
  struct px_queue pollable_evs_head;
  struct px_queue unpolled_evs_head;
  px_queue_init(&pollable_evs_head);
  px_queue_init(&unpolled_evs_head);

  int n_pollable = partition_events(&workq->events_head, &pollable_evs_head, &unpolled_evs_head, &timeout, &now);
  if (n_pollable < 0) {
    px_log_error("error while finding pollable events");
    px_eventq_clear(&workq->events_head);
    px_eventq_clear(&pollable_evs_head);
    px_eventq_clear(&unpolled_evs_head);
    return false;
  }

  // some sanity checks to aid debugging
  px_log_assert(px_queue_is_singleton(&workq->events_head), "events should have been cleared");
  int is_empty = px_queue_is_singleton(&pollable_evs_head);
  px_log_assert(((!!n_pollable) == !is_empty), "wrong value for n_pollable");

  // call poll on pollable events with the provided timeout
  if (n_pollable > 0) {
    struct px_queue triggered_head;
    px_queue_init(&triggered_head);

    int r = wait_for_events(&workq->pfd_vec, n_pollable, &pollable_evs_head, &triggered_head, timeout);

    if (r < 0) {
      px_eventq_clear(&triggered_head);
      px_eventq_clear(&pollable_evs_head);
      px_eventq_clear(&unpolled_evs_head);
      px_eventq_clear(&workq->events_head);
      return false;
    }

    while (!px_queue_is_singleton(&triggered_head)) {
      struct px_event* ev = px_event_from_queue(triggered_head.next);
      px_event_dequeue(ev);
      px_event_trigger_event(ev, ev->revents);
    }

    // move un-triggered pollable events to the end of the non-pollable list.
    // they still need to be checked for timesouts
    px_queue_splice(&unpolled_evs_head, &pollable_evs_head);
  }

  while (!px_queue_is_singleton(&unpolled_evs_head)) {
    struct px_event* ev = px_event_from_queue(unpolled_evs_head.next);
    px_event_dequeue(ev);

    // an untrigged pollable event timed out, then execute its callback with
    // a timeout arg, and clean it up if it is going to be abandoned.
    // otherwise put it back onto the worker's wait queue
    if (px_event_timeout_expired(ev, &now)) {
      px_event_trigger_timeout(ev);
      continue;
    }

    if (px_event_has_timeout(ev) || px_event_has_fd(ev)) {
      px_event_enqueue(ev, &workq->events_head);
    } else {
      px_event_trigger_dequeue(ev);
    }
  }
  px_log_assert(px_queue_is_singleton(&unpolled_evs_head), "programming error, unpolled evs should be empty");
  px_log_assert(px_queue_is_singleton(&pollable_evs_head), "programming error, pollable evs should be empty");

  return true;
}

// this applies a filter to the source events, and determines the timeout
// all events update the *timeout value
// any event with no valid file descriptor and no callback is cleaned/discarded
// events with a file descriptor are moved to pollable_evs
// events with no file descriptor but with callbacks are moved to unpolled_evs
int partition_events (struct px_queue*        source_head,
                      struct px_queue*        pollable_evs_head,
                      struct px_queue*        unpolled_evs_head,
                      int*                    timeout,
                      struct timespec const*  now)
{

  // we need to filter the waitq list for events with valid file descriptors,
  // and also consider the timouts in other events.
  int n_pollable = 0;

  while (!px_queue_is_singleton(source_head)) {
    struct px_event* ev = px_event_from_queue(source_head->next);
    px_event_dequeue(ev);

    // if the event has timed out then set the timeout so that poll() doesn't
    // wait.  if the event has a file descriptor associated then add it to the
    // pollables list so that we can get any file descriptor events that may be
    // present

    if (px_event_has_fd(ev)) {
      px_event_enqueue(ev, pollable_evs_head);
      ++n_pollable;
    } else {
      px_event_enqueue(ev, unpolled_evs_head);
    }

    if (px_event_has_timeout(ev)) {
      if (px_event_timeout_expired(ev, now)) {
        *timeout = 0;
      } else {
        int ms_diff = px_time_difference_ms(now, &ev->timeout_end);
        if (ms_diff < 0) // should never be < 0,
          ms_diff = 0;
        if (*timeout < 0 || ms_diff < *timeout)
          *timeout = ms_diff;
      }
    }
  }
  return n_pollable;
}

#define ROUND_UP_TO_8(v) ((((v) + 7) / 8) * 8)

int wait_for_events(
    struct pollfd_vec*  pfdv,
    size_t              n_events,
    struct px_queue*    events_head,
    struct px_queue*    triggered_head,
    int                 timeout)
{
  // do event allocations in 8-element chunks
  const size_t min_pfds = 8;
  size_t low_watermark = ROUND_UP_TO_8((pfdv->pfds_sz > min_pfds) ? (pfdv->pfds_sz - 8) : min_pfds);
  if (pfdv->pfds_sz < min_pfds
      || n_events > pfdv->pfds_sz
      || (n_events > min_pfds && n_events < low_watermark))
  {
    size_t pfds_sz = ROUND_UP_TO_8((n_events > min_pfds) ? n_events : min_pfds);
    px_log_assert(pfds_sz > 0, "should never have fewer than 8 pfds");
    struct pollfd* newpfds = (struct pollfd*)realloc(pfdv->pfds, pfds_sz * sizeof(struct pollfd));
    if (!newpfds) {
      px_log_warn("no memory");
      return -1;
    }
    pfdv->pfds = newpfds;
    pfdv->pfds_sz = pfds_sz;
  }

  size_t event_idx = 0;
  struct px_queue* q = events_head->next;
  while (q != events_head) {
    struct px_event* ev = px_event_from_queue(q);
    q = q->next;
    px_log_assert(event_idx < n_events, "programming error, n_events was wrong");

    // reset the revents field
    ev->revents = 0;

    struct pollfd* pfd = &pfdv->pfds[event_idx];
    memset(pfd, 0, sizeof(*pfd));
    pfd->events |= POLLRDHUP;
    pfd->fd = ev->fd;
    if (ev->events & PX_EVENT_READ)
      pfd->events |= POLLIN;
    if (ev->events & PX_EVENT_WRITE)
      pfd->events |= POLLOUT;

    ++event_idx;
  }
  px_log_assert(event_idx == n_events, "programming error, n_events was wrong");

  int r = 0;
  px_log_assert(n_events > 0, "programming error, waiting on zero events");
  if (n_events > 0 || timeout >= 0) // don't just wait indefinitely
    r = poll(pfdv->pfds, (nfds_t)n_events, timeout);

  if (r < 0) {
    int e = errno;
    if (e != EINTR) // signal interrupt
      px_log_error("error on poll(): %s", strerror(e));
    errno = e;

    // being interrupted isn't an error, but we should stop what we're doing
    return e == EINTR ? 0 : -1;
  }

  struct pollfd* pfd = &pfdv->pfds[0];
  int n_handled = 0;
  q = events_head->next;
  while (n_handled < r && q != events_head) {
    struct px_event* ev = px_event_from_queue(q);;
    q = q->next;

    px_log_assert(pfd->fd == ev->fd, "ev and pfd file descriptors should match");

    if (pfd->revents != 0) {
      ++n_handled;

      px_event_enqueue(ev, triggered_head); // move to 'triggered' queue
      if (pfd->revents & POLLIN)
        ev->revents |= PX_EVENT_READ;
      if (pfd->revents & POLLOUT)
        ev->revents |= PX_EVENT_WRITE;
      //if (pfd->revents & POLLERR || pfd->revents & POLLHUP)
      //  ev->revents |= PX_EVENT_ERROR;
      if ((pfd->revents & ~(POLLIN|POLLOUT)) != 0) {
        ev->revents |= PX_EVENT_ERROR;
        if ((pfd->revents & (POLLRDHUP|POLLHUP)) != 0)
          ev->revents |= PX_EVENT_HUP;
      }
    }

    ++pfd;
  }
  return n_handled;
}

void px_workq_init(struct px_workq* workq) {
  *workq = (struct px_workq) { 0 };
  px_queue_init(&workq->events_head);
}

void px_workq_reset(struct px_workq* workq) {
  // clear out any wait events.
  px_eventq_clear(&workq->events_head);
  free(workq->pfd_vec.pfds);
  workq->pfd_vec.pfds = NULL;
  workq->pfd_vec.pfds_sz = 0;
}

_Bool px_workq_is_empty(struct px_workq const* workq) {
  return px_queue_is_singleton(&workq->events_head);
}

// TODO remove
unsigned px_queue_sz_for_debugging(struct px_queue* q) {
  return px_queue_size(q);
}
