dbllist.c
/* * Doubly linked list implementation with sentinel node. * Node management and iterator methods provided. * No locking or reentrancy. */ #define _GNU_SOURCE #include <stdio.h> #include <stdlib.h> #include <string.h> #include <assert.h> #include <stdbool.h> #include "dbllist.h" #include "safe.h" #include "log.h" ^L dbll_head_t * dbll_create (void) { dbll_head_t *list; dbll_node_t *sentinel; list = xmalloc(sizeof(dbll_head_t)); memset(list, 0, sizeof(dbll_head_t)); sentinel = xmalloc(sizeof(dbll_node_t)); sentinel->next = sentinel; sentinel->prev = sentinel; sentinel->item = NULL; list->sentinel = sentinel; return list; }; dbll_head_t * dbll_copy (dbll_head_t *orig, dbll_copy_fn_t copy_fn) { dbll_head_t *copy; dbll_node_t *src, *new; unsigned i, nodes; copy = dbll_create(); src = orig->sentinel->next; nodes = orig->nr_nodes; for (i = 0; i < nodes; i++) { new = copy_fn(src->item); dbll_add(copy, new); src = src->next; } return copy; } /* * Frees a dbll and every remaining (undeleted) node in it; * callers need to free allocations associated with any * referenced items beforehand. */ void dbll_destroy (dbll_head_t *list) { dbll_node_t *node; unsigned int i; assert(list); node = list->sentinel->next; for (i = 0; i < list->nr_nodes; i++) { node = node->next; free(node->prev); } free(list->sentinel); free(list); } dbll_node_t * dbll_insert_before (dbll_node_t *node, void *item) { dbll_node_t *newnode; assert(item); assert(node); assert(node->prev); assert(node->next); newnode = xmalloc(sizeof(dbll_node_t)); newnode->item = item; node->prev->next = newnode; newnode->next = node; newnode->prev = node->prev; node->prev = newnode; return newnode; } dbll_node_t * dbll_insert_after (dbll_node_t *node, void *item) { return dbll_insert_before(node->next, item); } /* * Inserts an item before the sentinel node of a given list * (in other words, joins as the last node). Returns the * newly allocated node with attached data item. */ dbll_node_t * dbll_add (dbll_head_t *list, void *item) { dbll_node_t *node; assert(list); assert(list->sentinel); assert(item); if ((node = dbll_insert_before(list->sentinel, item))) list->nr_nodes++; return node; } /* * Removes and frees a node from list. Caller needs to free * the item referenced in the node itself. Returns node * after the deleted node (new node at same FORWARD offset * from sentinel) */ dbll_node_t * dbll_delete (dbll_head_t *list, dbll_node_t *node) { dbll_node_t *nextnode; assert(list); assert(node); assert(node->next); assert(node->prev); node->prev->next = node->next; node->next->prev = node->prev; nextnode = node->next; free(node); list->nr_nodes--; return nextnode; } dbll_node_t * dbll_first (dbll_head_t *list) { return list->sentinel->next; } dbll_node_t * dbll_last (dbll_head_t *list) { return list->sentinel->prev; } dbll_node_t * dbll_next (dbll_node_t *node) { return node->next; } void * dbll_item(dbll_node_t *node) { return node->item; } /* * Runs given callback on each node in the list starting * with the node after the sentinel and ending with the node * just before the sentinel. We don't use dbll_iterate() * because in the case of iterating through all the items we * can just run until node->item is NULL and can avoid some * testing, but note that this relies on all allocated nodes * referring to actual items. Returns number of callback * iterations that returned true. * * XXX TODO: check out run times just doing this in a loop * nr_nodes times instead of testing for the sentinel. */ unsigned int dbll_iterate_all (dbll_head_t *list, dbll_iterate_fn_t callback) { unsigned int listlen; dbll_node_t *node; int i, count = 0; assert(callback); node = dbll_first(list); listlen = list->nr_nodes; for (i = 0; i < listlen; i++) { if (callback(node) == false) count++; node = node->next; } return count? false: true; } unsigned int dbll_iterate_all_rev (dbll_head_t *list, dbll_iterate_fn_t callback) { unsigned int listlen; dbll_node_t *node; int i, count = 0; node = dbll_last(list); listlen = list->nr_nodes; for (i = 0; i < listlen; i++) { if (callback(node) == false) count++; node = node->prev; } return count? false: true; } /* * Runs given callback on each item in the list starting * with the given first node and ending with the given last * node. Returns number of callback iterations that * returned true. If passed a NULLs for first or last, * defaults to sentinel's next and previous nodes, * respectively, but look at dbll_iterate_all() if it's * desired to just iterate over all nodes. Relies on caller * to know that the iteration won't overlap its start node * before finishing. */ unsigned int dbll_iterate (dbll_head_t *list, dbll_node_t *first, dbll_node_t *last, dbll_iterate_fn_t callback) { dbll_node_t *p; unsigned int count = 0; assert(callback); if (!first) first = dbll_first(list); if (!last) last = dbll_last(list); for (p = first; p != last; p = p->next) if (callback(p)) count++; return count; } /* * Do a merge sort on the list with the user-supplied item * comparator. This should have O(n log(n)) worst and * average case. It has the best performance for arrays * that I can find, and works well with linked lists, so it * seems ideally suited. * * I used iteration, rather than recursion, since we already * keep track of how large our lists are in the list head * object; this keeps stack usage and calls down (which * contributed significant overhead when I profiled this * using the qsort()-backed dbll_sort_old()). * * I ended up sorting the list in-place after I found that * malloc()/free() overhead when using temporary lists was * very expensive for large numbers of iterations (I was * getting beat handily by dbll_sort_old()). Fortunately, * we are actually maintaining two lists per dbll_head_t: * one in the forward direction, and one in the reverse. So * we can just sort using the forward direction, and use the * reverse pointers to construct the merged list! Then swap * so reverse is forward and repeat. When done, it is * necessary to rebuild all the prev pointers, since they've * been borrowed by the merge process and no longer have * referential integrity. * * UPDATE: I'm still getting beat by dbll_sort_old(). My * switching to doing the sort in-place, I'm now getting * beaten by a factor of 2, instead of 3 like before. * Profiling shows that the extra time is taken up largely * in the reversal at the end of each merge, and in the * steps advancing right by bucketsz. The advantage of * dbll_sort_old() may simply me that it uses a temporary * array and thus involves less pointer dereferences. I * need to spend more time on this and perhaps even * reimplement using an array. I've been working on this * for about three days now though, and really I'm not using * this right now for anything where I'll noticed the 50% * speed difference. Definitely something I'll want to * optimize later though... */ void dbll_sort (dbll_head_t *list, comparison_fn_t comparison_fn) { dbll_node_t *left, *right, *new, *previous, *node; unsigned nmerges; unsigned i, iterations; unsigned l, r, bucketsz, nodecount, sortlen; bucketsz = 1; do { nodecount = list->nr_nodes; sortlen = bucketsz * 2; iterations = nodecount / sortlen; if (nodecount % sortlen || sortlen > nodecount) iterations++; nmerges = 0; left = right = list->sentinel->next; new = list->sentinel; while (iterations--) { if (iterations || ((nodecount % sortlen) == 0)) { /* won't be last iteration; * full run of sortlen * guaranteed available to * merge */ /* XXX TIME */ for (i = 0; i < bucketsz; i++) right = right->next; l = r = bucketsz; } else { /* last iteration; may not * get a full bucketsz in * left or right so we have * to check to see where the * list ends and set the l * and r lengths accordingly */ unsigned seen = nmerges * sortlen; unsigned remain = nodecount - seen; assert(remain); if (remain <= bucketsz) { r = 0; l = remain; } else { l = bucketsz; r = remain - bucketsz; for (i = 0; i < l; i++) right = right->next; } } /* * Merge the buckets by using the * comparator passed in by the user. * Note that we do this in place by * taking advantage of our doubly * linked nature: we use the forward * sort results obtained via next * pointers (which we never disturb * until done), and use the * backwards ring by following * previous node pointers (those get * rearranged as we go to form the * merge list) */ while (l || r) { if (!l) { new->prev = right; right = right->next; r--; } else if (!r) { new->prev = left; left = left->next; l--; } else if (comparison_fn(left->item, /* XXX TIME */ right->item) <= 0) { new->prev = left; left = left->next; l--; } else { new->prev = right; right = right->next; r--; } new = new->prev; } left = right; nmerges++; } /* retain our circular property! */ new->prev = list->sentinel; /* now we swap the list order since we used * the reverse direction to store the merged * list as each sort occurred */ /* XXX TIME */ node = list->sentinel; for (i = 0; i <= nodecount; i++) { previous = node->prev; node->prev = node->next; node->next = previous; node = previous; } bucketsz *= 2; } while (nmerges > 1); /* finally now that we are done we must rebuild the * prev -> relationships because we overwrite it to * do the merges */ /* XXX TIME */ node = list->sentinel; for (i = 0; i <= nodecount; i++) { node->next->prev = node; node = node->next; } } /* * Old API using Quicksort with temporary array. I didn't * like this API because it caused the library user to have * to dereference list node pointers themselves, which I * thought was bogus; that should be hidden behind the API. * Instead the new dbll_sort implements the sorting * algorithm itself instead of using a temporary array and * qsort() as done below. This allows us to (1) use a merge * sort, which has O(n log(n)) worst-case instead of O(n^2) * like quicksort; (2) not have to use an array-oriented * sorting algorithm like quicksort (since we have a list * after all, it's easier to use a merge sort); (3) not * require the caller to know anything about the * implementation: they just pass in an item comparator, * instead of a node comparator as in the old implementation * (below) */ #if 0 static void dbll_sort_old (dbll_head_t *list, comparison_fn_t comparison_fn) { int n, i; dbll_node_t *node; dbll_node_t **nodes; n = list->nr_nodes; node = dbll_first(list); nodes = xmalloc(n * sizeof(node)); for (i = 0; i < n; i++) { nodes[i] = node; node = node->next; } qsort(nodes, n, sizeof(node), comparison_fn); node = list->sentinel; for (i = 0; i < n; i++) { node->next = nodes[i]; node->next->prev = node; node = node->next; } } #endif /* * binary search of a sorted list * * XXX TODO at the place NULL is returned, node should be * pointing to the place where the key would be inserted and * would preserved the sorted property. This could then be * used by dbll_add to retain the sorted property upon * insertion. */ static void * dbll_search_node (dbll_head_t *list, void *key, comparison_fn_t comparison_fn) { unsigned i, old, low, mid, high; int result, interval; dbll_node_t *node = NULL; high = list->nr_nodes; if (!high) return NULL; high--; low = mid = 0; node = list->sentinel->next; for (interval = high - low; interval >= 0; interval = high - low) { old = mid; mid = low + (high - low) / 2; if (mid > old) for (i = old; i < mid; i++) node = node->next; else for (i = mid; i < old; i++) node = node->prev; result = comparison_fn(key, node->item); if (result == 0) return node; else if (result < 0) high = mid - 1; else if (result > 0) low = mid + 1; } return NULL; } static void * dbll_search_item (dbll_head_t *list, void *key, comparison_fn_t comparison_fn) { dbll_node_t *node; node = dbll_search_node(list, key, comparison_fn); return (node? node->item: NULL); } void * dbll_search (dbll_head_t *list, void *key, comparison_fn_t comparison_fn) { return dbll_search_item(list, key, comparison_fn); } /* * Replace the node matching the existing one, as determined * by dbll_search(). Since the comparator says they are the * same, there is no need to resort the list. * * If there are multiple existing keys which match, only the * first one will be replaced. * * The old item is deleted using the given deletion routine * (this is usually just free()). */ void dbll_replace (dbll_head_t *list, void *key, comparison_fn_t comparison_fn, dbll_deletion_fn_t deletion_fn) { dbll_node_t *node; node = dbll_search_node(list, key, comparison_fn); if (!node) { dbll_add(list, key); dbll_sort(list, comparison_fn); return; } if (deletion_fn) deletion_fn(node->item); node->item = key; } bool print_node_as_string (dbll_node_t *node) { assert(node); assert(node->item); printf("%s\n", (char *)node->item); return 0; } typedef struct { char *var; char *val; } stringpair_t; bool print_node_as_stringpair (dbll_node_t *node) { assert(node); assert(node->item); stringpair_t *pair = (stringpair_t *)node->item; printf("%-24s\t%s\n", pair->var, pair->val); return 0; }