Glib sort routines
- From: Charlie Brej <gtk-devel brej org>
- To: gtk-devel-list gnome org
- Subject: Glib sort routines
- Date: Tue, 29 Jun 2010 22:59:53 +0100
I have been taking a look into sorting routines recently and was
wondering if it would be reasonable to replace the glib merge sort
with something faster in cases where parts are already sorted. I
realise that it is currently not broken, so I will understand if the
answer is to leave it as is.
I was hoping to use a simplified Timsort[1] (which I call Stimsort) as
it is already used in Python. I made an initial implementation
(attached) which attempts to see if this is a good thing.
Here are some results:
Number of comparisons for an array size 5000, of random data (worst
case for stim)
merge sort: 55307
stim sort: 57009
Number of instructions when executing in callgrind:
orig sort: 2 288 149
stim sort: 1 970 813
So there is a CPU saving even though this is a worst case scenario for
stim sort (because the merge sort implementation walks the full list
in order to cut it into two).
In the best scenario of the list being already sorted, or reverse
sorted the number of comparisons in stim sort will drop to 4999, while
the merge sort should remain pretty much unchanged.
The code is rather work in progress and I will clean it up with an
explanation of the algorithm before submitting the full proposal along
with a full set of benchmarks.
Is this kind of thing desirable?
[1] http://en.wikipedia.org/wiki/Timsort
diff --git a/glib/glist.c b/glib/glist.c
index 252330a..066cf15 100644
--- a/glib/glist.c
+++ b/glib/glist.c
@@ -1084,6 +1084,250 @@ g_list_sort_real (GList *list,
user_data);
}
+static GList *
+g_list_sort_stim_merge (GList *node_a,
+ GList *node_b,
+ GCompareFunc compare_func,
+ gpointer user_data)
+{
+ GList *merged_list;
+ GList **next_element_ptr;
+ next_element_ptr = &merged_list;
+
+ while (1)
+ {
+ if (!node_a)
+ {
+ *next_element_ptr = node_b;
+ return merged_list;
+ }
+ if (!node_b)
+ {
+ *next_element_ptr = node_a;
+ return merged_list;
+ }
+
+ if (((GCompareDataFunc) compare_func) (node_a->data, node_b->data, user_data) <= 0)
+ {
+ *next_element_ptr = node_a;
+ next_element_ptr = &node_a->next;
+ node_a = node_a->next;
+ }
+ else
+ {
+ *next_element_ptr = node_b;
+ next_element_ptr = &node_b->next;
+ node_b = node_b->next;
+ }
+ }
+}
+
+static void
+g_list_sort_stim_extract (GList *node,
+ GList **reply,
+ int *node_count,
+ GList **reply_rest,
+ GCompareFunc compare_func,
+ gpointer user_data)
+{
+ if (!node)
+ {
+ *reply = NULL;
+ *reply_rest = NULL;
+ *node_count = 0;
+ return;
+ }
+ if (!node->next)
+ {
+ *reply = node;
+ *reply_rest = NULL;
+ *node_count = 1;
+ return;
+ }
+ if (((GCompareDataFunc) compare_func) (node->data, node->next->data, user_data) <= 0)
+ {
+ /* a <= b so do ascending list */
+ *reply = node;
+ node = node->next;
+ int count = 2;
+ while(1)
+ {
+ if (!node->next){
+ *reply_rest = NULL;
+ *node_count = count;
+ return;
+ }
+ if (((GCompareDataFunc) compare_func) (node->data, node->next->data, user_data) > 0)
+ {
+ *reply_rest = node->next;
+ node->next = NULL;
+ *node_count = count;
+ return;
+ }
+ count++;
+ node = node->next;
+ }
+ }
+ else
+ {
+ /* a < b so do strictly descending list */
+ GList *new_list;
+ GList *cur_node;
+ cur_node = node;
+ node = node->next;
+ cur_node->next = NULL;
+ new_list = cur_node;
+
+ cur_node = node;
+ node = node->next;
+ cur_node->next = new_list;
+ new_list = cur_node;
+
+ int count = 2;
+ while(1)
+ {
+ if (!node)
+ {
+ *reply = new_list;
+ *reply_rest = NULL;
+ *node_count = count;
+ return;
+ }
+ if (((GCompareDataFunc) compare_func) (new_list->data, node->data, user_data) <= 0)
+ {
+ *reply = new_list;
+ *reply_rest = node;
+ *node_count = count;
+ return;
+ }
+
+ count++;
+ cur_node = node;
+ node = node->next;
+ cur_node->next = new_list;
+ new_list = cur_node;
+ }
+ }
+}
+
+static void
+g_list_sort_stim_body (GList *node,
+ GList *left_list,
+ int left_count,
+ GList **reply1_list,
+ int *reply1_count,
+ GList **reply2_list,
+ int *reply2_count,
+ GList **reply_rest,
+ int want,
+ GCompareFunc compare_func,
+ gpointer user_data)
+{
+ GList *right1_list, *right2_list;
+ int right1_count, right2_count;
+ GList *rest;
+ int subwant = want - (want >> 2);
+
+ if (want < 16)
+ {
+ g_list_sort_stim_extract (node, &right1_list, &right1_count, &rest, compare_func, user_data);
+ }
+ else
+ {
+ g_list_sort_stim_body (node,
+ left_list, left_count,
+ &left_list, &left_count,
+ &right1_list, &right1_count,
+ &rest,
+ want/2,
+ compare_func,
+ user_data);
+ }
+
+ while (left_count < subwant)
+ {
+ int mywant = left_count > want / 3 ? want - left_count : left_count;
+ g_list_sort_stim_body (rest,
+ right1_list, right1_count,
+ &right1_list, &right1_count,
+ &right2_list, &right2_count,
+ &rest,
+ mywant,
+ compare_func,
+ user_data);
+
+ left_list = g_list_sort_stim_merge (left_list, right1_list, compare_func, user_data);
+ left_count = left_count + right1_count;
+ right1_list = right2_list;
+ right1_count = right2_count;
+
+ if (right1_count > subwant)
+ break;
+ if (!right1_count)
+ break;
+ }
+ *reply1_list = left_list;
+ *reply1_count = left_count;;
+ *reply2_list = right1_list;
+ *reply2_count = right1_count;
+ *reply_rest = rest;
+ return;
+}
+
+static GList*
+g_list_sort_stim_real (GList* list,
+ GCompareFunc compare_func,
+ gpointer user_data)
+{
+ GList *node, *rest;
+ GList *reply1, *reply2;
+ int count;
+ int reply1_count, reply2_count;
+ int want = g_list_length (list);
+
+ g_list_sort_stim_extract (list, &node, &count, &rest, compare_func, user_data);
+
+ g_list_sort_stim_body (rest,
+ node,
+ count,
+ &reply1,
+ &reply1_count,
+ &reply2,
+ &reply2_count,
+ &rest,
+ want,
+ compare_func,
+ user_data);
+ node = reply1;
+ GList *prev_node = NULL;
+ while (node)
+ {
+ node->prev = prev_node;
+ prev_node = node;
+ node = node->next;
+ }
+ return reply1;
+}
+
+
+GList *
+g_list_sort_stim (GList *list,
+ GCompareFunc compare_func)
+{
+ return g_list_sort_stim_real (list, (GFunc) compare_func, NULL);
+
+}
+
+
+GList *
+g_list_sort_stim_with_data (GList *list,
+ GCompareDataFunc compare_func,
+ gpointer user_data)
+{
+ return g_list_sort_stim_real (list, (GFunc) compare_func, user_data);
+}
+
+
/**
* g_list_sort:
* @list: a #GList
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]