Scott M. Mcdermott

UNIX Systems & Network Administrator
available for contract or salaried positions

dbllist.c

/*
 * Doubly linked list implementation with sentinel node.
 * Node management and iterator methods provided.
 * No locking or reentrancy.
 */

#define _GNU_SOURCE

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <stdbool.h>

#include "dbllist.h"
#include "safe.h"
#include "log.h"

^L

dbll_head_t *
dbll_create (void)
{
        dbll_head_t *list;
        dbll_node_t *sentinel;

        list = xmalloc(sizeof(dbll_head_t));
        memset(list, 0, sizeof(dbll_head_t));
        sentinel = xmalloc(sizeof(dbll_node_t));
        sentinel->next = sentinel;
        sentinel->prev = sentinel;
        sentinel->item = NULL;
        list->sentinel = sentinel;

        return list;
};

dbll_head_t *
dbll_copy (dbll_head_t *orig,
           dbll_copy_fn_t copy_fn)
{
        dbll_head_t *copy;
        dbll_node_t *src, *new;
        unsigned i, nodes;

        copy = dbll_create();
        src = orig->sentinel->next;
        nodes = orig->nr_nodes;

        for (i = 0; i < nodes; i++) {
                new = copy_fn(src->item);
                dbll_add(copy, new);
                src = src->next;
        }

        return copy;
}


/*
 * Frees a dbll and every remaining (undeleted) node in it;
 * callers need to free allocations associated with any
 * referenced items beforehand.
 */
void
dbll_destroy (dbll_head_t *list)
{
        dbll_node_t *node;
        unsigned int i;

        assert(list);

        node = list->sentinel->next;
        for (i = 0; i < list->nr_nodes; i++) {
                node = node->next;
                free(node->prev);
        }
        free(list->sentinel);
        free(list);
}

dbll_node_t *
dbll_insert_before (dbll_node_t *node,
                    void *item)
{
        dbll_node_t *newnode;

        assert(item);
        assert(node);
        assert(node->prev);
        assert(node->next);

        newnode = xmalloc(sizeof(dbll_node_t));
        newnode->item = item;

        node->prev->next = newnode;
        newnode->next = node;
        newnode->prev = node->prev;
        node->prev = newnode;

        return newnode;
}

dbll_node_t *
dbll_insert_after (dbll_node_t *node,
                   void *item)
{
        return dbll_insert_before(node->next, item);
}

/*
 * Inserts an item before the sentinel node of a given list
 * (in other words, joins as the last node).  Returns the
 * newly allocated node with attached data item.
 */
dbll_node_t *
dbll_add (dbll_head_t *list,
          void *item)
{
        dbll_node_t *node;

        assert(list);
        assert(list->sentinel);
        assert(item);

        if ((node = dbll_insert_before(list->sentinel, item)))
                list->nr_nodes++;
        return node;
}

/*
 * Removes and frees a node from list.  Caller needs to free
 * the item referenced in the node itself.  Returns node
 * after the deleted node (new node at same FORWARD offset
 * from sentinel)
 */
dbll_node_t *
dbll_delete (dbll_head_t *list,
             dbll_node_t *node)
{
        dbll_node_t *nextnode;

        assert(list);
        assert(node);
        assert(node->next);
        assert(node->prev);

        node->prev->next = node->next;
        node->next->prev = node->prev;
        nextnode = node->next;
        free(node);
        list->nr_nodes--;

        return nextnode;
}

dbll_node_t *
dbll_first (dbll_head_t *list)
{
        return list->sentinel->next;
}

dbll_node_t *
dbll_last (dbll_head_t *list)
{
        return list->sentinel->prev;
}

dbll_node_t *
dbll_next (dbll_node_t *node)
{
        return node->next;
}

void *
dbll_item(dbll_node_t *node)
{
        return node->item;
}

/*
 * Runs given callback on each node in the list starting
 * with the node after the sentinel and ending with the node
 * just before the sentinel.  We don't use dbll_iterate()
 * because in the case of iterating through all the items we
 * can just run until node->item is NULL and can avoid some
 * testing, but note that this relies on all allocated nodes
 * referring to actual items.  Returns number of callback
 * iterations that returned true.
 *
 * XXX TODO: check out run times just doing this in a loop
 * nr_nodes times instead of testing for the sentinel.
 */
unsigned int
dbll_iterate_all (dbll_head_t *list,
                  dbll_iterate_fn_t callback)
{
        unsigned int listlen;
        dbll_node_t *node;
        int i, count = 0;

        assert(callback);
        node = dbll_first(list);
        listlen = list->nr_nodes;
        for (i = 0; i < listlen; i++) {
                if (callback(node) == false)
                        count++;
                node = node->next;
        }
        return count? false: true;
}

unsigned int
dbll_iterate_all_rev (dbll_head_t *list,
                      dbll_iterate_fn_t callback)
{
        unsigned int listlen;
        dbll_node_t *node;
        int i, count = 0;

        node = dbll_last(list);
        listlen = list->nr_nodes;
        for (i = 0; i < listlen; i++) {
                if (callback(node) == false)
                        count++;
                node = node->prev;
        }
        return count? false: true;
}

/*
 * Runs given callback on each item in the list starting
 * with the given first node and ending with the given last
 * node.  Returns number of callback iterations that
 * returned true.  If passed a NULLs for first or last,
 * defaults to sentinel's next and previous nodes,
 * respectively, but look at dbll_iterate_all() if it's
 * desired to just iterate over all nodes.  Relies on caller
 * to know that the iteration won't overlap its start node
 * before finishing.
 */
unsigned int
dbll_iterate (dbll_head_t *list,
              dbll_node_t *first,
              dbll_node_t *last,
              dbll_iterate_fn_t callback)
{
        dbll_node_t *p;
        unsigned int count = 0;

        assert(callback);

        if (!first)
                first = dbll_first(list);
        if (!last)
                last = dbll_last(list);
        for (p = first; p != last; p = p->next)
                if (callback(p))
                        count++;
        return count;
}

/*
 * Do a merge sort on the list with the user-supplied item
 * comparator.  This should have O(n log(n)) worst and
 * average case.  It has the best performance for arrays
 * that I can find, and works well with linked lists, so it
 * seems ideally suited.
 *
 * I used iteration, rather than recursion, since we already
 * keep track of how large our lists are in the list head
 * object; this keeps stack usage and calls down (which
 * contributed significant overhead when I profiled this
 * using the qsort()-backed dbll_sort_old()).
 *
 * I ended up sorting the list in-place after I found that
 * malloc()/free() overhead when using temporary lists was
 * very expensive for large numbers of iterations (I was
 * getting beat handily by dbll_sort_old()).  Fortunately,
 * we are actually maintaining two lists per dbll_head_t:
 * one in the forward direction, and one in the reverse.  So
 * we can just sort using the forward direction, and use the
 * reverse pointers to construct the merged list! Then swap
 * so reverse is forward and repeat.  When done, it is
 * necessary to rebuild all the prev pointers, since they've
 * been borrowed by the merge process and no longer have
 * referential integrity.
 *
 * UPDATE: I'm still getting beat by dbll_sort_old().  My
 * switching to doing the sort in-place, I'm now getting
 * beaten by a factor of 2, instead of 3 like before.
 * Profiling shows that the extra time is taken up largely
 * in the reversal at the end of each merge, and in the
 * steps advancing right by bucketsz.  The advantage of
 * dbll_sort_old() may simply me that it uses a temporary
 * array and thus involves less pointer dereferences.  I
 * need to spend more time on this and perhaps even
 * reimplement using an array.  I've been working on this
 * for about three days now though, and really I'm not using
 * this right now for anything where I'll noticed the 50%
 * speed difference.  Definitely something I'll want to
 * optimize later though...
 */
void
dbll_sort (dbll_head_t *list,
           comparison_fn_t comparison_fn)
{
        dbll_node_t *left, *right, *new, *previous, *node;

        unsigned nmerges;
        unsigned i, iterations;
        unsigned l, r, bucketsz, nodecount, sortlen;

        bucketsz = 1;

        do {
                nodecount = list->nr_nodes;
                sortlen = bucketsz * 2;
                iterations = nodecount / sortlen;
                if (nodecount % sortlen || sortlen > nodecount)
                        iterations++;

                nmerges = 0;
                left = right = list->sentinel->next;
                new = list->sentinel;

                while (iterations--) {

                        if (iterations || ((nodecount % sortlen) == 0)) {

                                /* won't be last iteration;
                                 * full run of sortlen
                                 * guaranteed available to
                                 * merge */
                                /* XXX TIME */
                                for (i = 0; i < bucketsz; i++)
                                        right = right->next;
                                l = r = bucketsz;

                        } else {

                                /* last iteration; may not
                                 * get a full bucketsz in
                                 * left or right so we have
                                 * to check to see where the
                                 * list ends and set the l
                                 * and r lengths accordingly */
                                unsigned seen = nmerges * sortlen;
                                unsigned remain = nodecount - seen;
                                assert(remain);

                                if (remain <= bucketsz) {
                                        r = 0;
                                        l = remain;
                                } else {
                                        l = bucketsz;
                                        r = remain - bucketsz;
                                        for (i = 0; i < l; i++)
                                                right = right->next;
                                }
                        }

                        /*
                         * Merge the buckets by using the
                         * comparator passed in by the user.
                         * Note that we do this in place by
                         * taking advantage of our doubly
                         * linked nature: we use the forward
                         * sort results obtained via next
                         * pointers (which we never disturb
                         * until done), and use the
                         * backwards ring by following
                         * previous node pointers (those get
                         * rearranged as we go to form the
                         * merge list)
                         */
                        while (l || r) {
                                if (!l) {
                                        new->prev = right;
                                        right = right->next;
                                        r--;
                                } else if (!r) {
                                        new->prev = left;
                                        left = left->next;
                                        l--;
                                } else if (comparison_fn(left->item,
                                        /* XXX TIME */   right->item) <= 0) {
                                        new->prev = left;
                                        left = left->next;
                                        l--;
                                } else {
                                        new->prev = right;
                                        right = right->next;
                                        r--;
                                }
                                new = new->prev;
                        }
                        left = right;
                        nmerges++;
                }

                /* retain our circular property! */
                new->prev = list->sentinel;

                /* now we swap the list order since we used
                 * the reverse direction to store the merged
                 * list as each sort occurred */
                /* XXX TIME */
                node = list->sentinel;
                for (i = 0; i <= nodecount; i++) {
                        previous = node->prev;
                        node->prev = node->next;
                        node->next = previous;
                        node = previous;
                }

                bucketsz *= 2;

        } while (nmerges > 1);

        /* finally now that we are done we must rebuild the
         * prev -> relationships because we overwrite it to
         * do the merges */
        /* XXX TIME */
        node = list->sentinel;
        for (i = 0; i <= nodecount; i++) {
                node->next->prev = node;
                node = node->next;
        }
}

/*
 * Old API using Quicksort with temporary array.  I didn't
 * like this API because it caused the library user to have
 * to dereference list node pointers themselves, which I
 * thought was bogus; that should be hidden behind the API.
 * Instead the new dbll_sort implements the sorting
 * algorithm itself instead of using a temporary array and
 * qsort() as done below.  This allows us to (1) use a merge
 * sort, which has O(n log(n)) worst-case instead of O(n^2)
 * like quicksort; (2) not have to use an array-oriented
 * sorting algorithm like quicksort (since we have a list
 * after all, it's easier to use a merge sort); (3) not
 * require the caller to know anything about the
 * implementation: they just pass in an item comparator,
 * instead of a node comparator as in the old implementation
 * (below)
 */
#if 0
static void
dbll_sort_old (dbll_head_t *list,
           comparison_fn_t comparison_fn)
{
        int n, i;
        dbll_node_t *node;
        dbll_node_t **nodes;

        n = list->nr_nodes;
        node = dbll_first(list);
        nodes = xmalloc(n * sizeof(node));
        for (i = 0; i < n; i++) {
                nodes[i] = node;
                node = node->next;
        }

        qsort(nodes, n, sizeof(node), comparison_fn);

        node = list->sentinel;
        for (i = 0; i < n; i++) {
                node->next = nodes[i];
                node->next->prev = node;
                node = node->next;
        }

}
#endif

/*
 * binary search of a sorted list
 *
 * XXX TODO at the place NULL is returned, node should be
 * pointing to the place where the key would be inserted and
 * would preserved the sorted property.  This could then be
 * used by dbll_add to retain the sorted property upon
 * insertion.
 */
static void *
dbll_search_node (dbll_head_t *list,
                  void *key,
                  comparison_fn_t comparison_fn)
{
        unsigned i, old, low, mid, high;
        int result, interval;
        dbll_node_t *node = NULL;

        high = list->nr_nodes;
        if (!high)
                return NULL;
        high--;

        low = mid = 0;
        node = list->sentinel->next;
        for (interval = high - low;
             interval >= 0;
             interval = high - low) {
                old = mid;
                mid = low + (high - low) / 2;
                if (mid > old)
                        for (i = old; i < mid; i++)
                                node = node->next;
                else
                        for (i = mid; i < old; i++)
                                node = node->prev;
                result = comparison_fn(key, node->item);
                if (result == 0)
                        return node;
                else if (result < 0)
                        high = mid - 1;
                else if (result > 0)
                        low = mid + 1;
        }

        return NULL;
}

static void *
dbll_search_item (dbll_head_t *list,
                  void *key,
                  comparison_fn_t comparison_fn)
{
        dbll_node_t *node;

        node = dbll_search_node(list, key, comparison_fn);

        return (node? node->item: NULL);
}

void *
dbll_search (dbll_head_t *list,
             void *key,
             comparison_fn_t comparison_fn)
{
        return dbll_search_item(list, key, comparison_fn);
}

/*
 * Replace the node matching the existing one, as determined
 * by dbll_search().  Since the comparator says they are the
 * same, there is no need to resort the list.
 *
 * If there are multiple existing keys which match, only the
 * first one will be replaced.
 *
 * The old item is deleted using the given deletion routine
 * (this is usually just free()).
 */
void
dbll_replace (dbll_head_t *list,
              void *key,
              comparison_fn_t comparison_fn,
              dbll_deletion_fn_t deletion_fn)
{
        dbll_node_t *node;

        node = dbll_search_node(list, key, comparison_fn);
        if (!node) {
                dbll_add(list, key);
                dbll_sort(list, comparison_fn);
                return;
        }
        if (deletion_fn)
                deletion_fn(node->item);

        node->item = key;
}

bool
print_node_as_string (dbll_node_t *node)
{
        assert(node);
        assert(node->item);

        printf("%s\n", (char *)node->item);

        return 0;
}

typedef struct {
        char *var;
        char *val;
} stringpair_t;

bool
print_node_as_stringpair (dbll_node_t *node)
{
        assert(node);
        assert(node->item);

        stringpair_t *pair = (stringpair_t *)node->item;

        printf("%-24s\t%s\n", pair->var, pair->val);

        return 0;
}