[libdazzle] util: add levenshtein implementation
- From: Christian Hergert <chergert src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [libdazzle] util: add levenshtein implementation
- Date: Sun, 4 Jun 2017 02:07:12 +0000 (UTC)
commit 9df806c88a6246eadfdfc89cfcab73483ad16a1e
Author: Christian Hergert <chergert redhat com>
Date: Sat Jun 3 18:53:51 2017 -0700
util: add levenshtein implementation
src/dazzle.h | 1 +
src/meson.build | 2 +
src/util/dzl-levenshtein.c | 105 ++++++++++++++++++++++++++++++++++++++++++++
src/util/dzl-levenshtein.h | 31 +++++++++++++
tests/meson.build | 6 +++
tests/test-levenshtein.c | 39 ++++++++++++++++
6 files changed, 184 insertions(+), 0 deletions(-)
---
diff --git a/src/dazzle.h b/src/dazzle.h
index c98db61..688de66 100644
--- a/src/dazzle.h
+++ b/src/dazzle.h
@@ -108,6 +108,7 @@ G_BEGIN_DECLS
#include "util/dzl-file-manager.h"
#include "util/dzl-gdk.h"
#include "util/dzl-heap.h"
+#include "util/dzl-levenshtein.h"
#include "util/dzl-pango.h"
#include "util/dzl-rgba.h"
#include "util/dzl-ring.h"
diff --git a/src/meson.build b/src/meson.build
index 2d07bbd..9f7f29b 100644
--- a/src/meson.build
+++ b/src/meson.build
@@ -121,6 +121,7 @@ libdazzle_public_headers = [
'util/dzl-file-manager.h',
'util/dzl-gdk.h',
'util/dzl-heap.h',
+ 'util/dzl-levenshtein.h',
'util/dzl-pango.h',
'util/dzl-rgba.h',
'util/dzl-ring.h',
@@ -241,6 +242,7 @@ libdazzle_public_sources = [
'util/dzl-file-manager.c',
'util/dzl-gdk.c',
'util/dzl-heap.c',
+ 'util/dzl-levenshtein.c',
'util/dzl-pango.c',
'util/dzl-rgba.c',
'util/dzl-ring.c',
diff --git a/src/util/dzl-levenshtein.c b/src/util/dzl-levenshtein.c
new file mode 100644
index 0000000..16261b0
--- /dev/null
+++ b/src/util/dzl-levenshtein.c
@@ -0,0 +1,105 @@
+/* dzl-levenshtein.c
+ *
+ * Copyright (C) 2013-2017 Christian Hergert <christian hergert me>
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <glib.h>
+#include <string.h>
+
+gint
+dzl_levenshtein (const gchar *needle,
+ const gchar *haystack)
+{
+ const gchar *s;
+ const gchar *t;
+ gunichar sc;
+ gunichar tc;
+ gint *v0;
+ gint *v1;
+ gint haystack_char_len;
+ gint cost;
+ gint i;
+ gint j;
+ gint ret;
+
+ g_return_val_if_fail (needle, G_MAXINT);
+ g_return_val_if_fail (haystack, G_MAXINT);
+
+ /*
+ * Handle degenerate cases.
+ */
+ if (!g_strcmp0(needle, haystack)) {
+ return 0;
+ } else if (!*needle) {
+ return g_utf8_strlen(haystack, -1);
+ } else if (!*haystack) {
+ return g_utf8_strlen(needle, -1);
+ }
+
+ haystack_char_len = g_utf8_strlen (haystack, -1);
+
+ /*
+ * Create two vectors to hold our states.
+ */
+ v0 = g_new0(int, haystack_char_len + 1);
+ v1 = g_new0(int, haystack_char_len + 1);
+
+ /*
+ * initialize v0 (the previous row of distances).
+ * this row is A[0][i]: edit distance for an empty needle.
+ * the distance is just the number of characters to delete from haystack.
+ */
+ for (i = 0; i < haystack_char_len + 1; i++) {
+ v0[i] = i;
+ }
+
+ for (i = 0, s = needle; s && *s; i++, s = g_utf8_next_char(s)) {
+ /*
+ * Calculate v1 (current row distances) from the previous row v0.
+ */
+
+ sc = g_utf8_get_char(s);
+
+ /*
+ * first element of v1 is A[i+1][0]
+ *
+ * edit distance is delete (i+1) chars from needle to match empty
+ * haystack.
+ */
+ v1[0] = i + 1;
+
+ /*
+ * use formula to fill in the rest of the row.
+ */
+ for (j = 0, t = haystack; t && *t; j++, t = g_utf8_next_char(t)) {
+ tc = g_utf8_get_char(t);
+ cost = (sc == tc) ? 0 : 1;
+ v1[j+1] = MIN(v1[j] + 1, MIN(v0[j+1] + 1, v0[j] + cost));
+ }
+
+ /*
+ * copy v1 (current row) to v0 (previous row) for next iteration.
+ */
+ memcpy(v0, v1, sizeof(gint) * haystack_char_len);
+ }
+
+ ret = v1[haystack_char_len];
+
+ g_free(v0);
+ g_free(v1);
+
+ return ret;
+}
diff --git a/src/util/dzl-levenshtein.h b/src/util/dzl-levenshtein.h
new file mode 100644
index 0000000..23a09a6
--- /dev/null
+++ b/src/util/dzl-levenshtein.h
@@ -0,0 +1,31 @@
+/* dzl-levenshtein.h
+ *
+ * Copyright (C) 2017 Christian Hergert <chergert redhat com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef DZL_LEVENSHTEIN_H
+#define DZL_LEVENSHTEIN_H
+
+#include <glib.h>
+
+G_BEGIN_DECLS
+
+gint dzl_levenshtein (const gchar *needle,
+ const gchar *haystack);
+
+G_END_DECLS
+
+#endif /* DZL_LEVENSHTEIN_H */
diff --git a/tests/meson.build b/tests/meson.build
index 90dd659..f9a5075 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -220,3 +220,9 @@ test_trie = executable('test-trie', 'test-trie.c',
link_args: test_link_args,
dependencies: libdazzle_deps + [libdazzle_dep],
)
+
+test_levenshtein = executable('test-levenshtein', 'test-levenshtein.c',
+ c_args: test_cflags,
+ link_args: test_link_args,
+ dependencies: libdazzle_deps + [libdazzle_dep],
+)
diff --git a/tests/test-levenshtein.c b/tests/test-levenshtein.c
new file mode 100644
index 0000000..82523b1
--- /dev/null
+++ b/tests/test-levenshtein.c
@@ -0,0 +1,39 @@
+#include <dazzle.h>
+
+typedef struct
+{
+ const gchar *needle;
+ const gchar *haystack;
+ gint expected_score;
+} WordCheck;
+
+static void
+test_levenshtein_basic (void)
+{
+ static const WordCheck check[] = {
+ { "gtk", "gkt", 2 },
+ { "LibreFreeOpen", "Cromulent", 10 },
+ { "Xorg", "Wayland", 7 },
+ { "glib", "gobject", 6 },
+ { "gbobject", "gobject", 1 },
+ { "flip", "fliiiip", 3 },
+ { "flip", "fliiiipper", 6 },
+ { NULL }
+ };
+
+ for (guint i = 0; check[i].needle != NULL; i++)
+ {
+ gint score = dzl_levenshtein (check[i].needle, check[i].haystack);
+
+ g_assert_cmpint (score, ==, check[i].expected_score);
+ }
+}
+
+gint
+main (gint argc,
+ gchar *argv[])
+{
+ g_test_init (&argc, &argv, NULL);
+ g_test_add_func ("/Dazzle/Levenshtein/basic", test_levenshtein_basic);
+ return g_test_run ();
+}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]