[tracker/parser-unicode-libs-review] FTS parser: added unit tests
- From: Aleksander Morgado <aleksm src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker/parser-unicode-libs-review] FTS parser: added unit tests
- Date: Mon, 17 May 2010 11:25:46 +0000 (UTC)
commit 76325e8a9a4b32dcbc46ad69cab41053465f6068
Author: Aleksander Morgado <aleksander lanedo com>
Date: Mon May 17 13:24:18 2010 +0200
FTS parser: added unit tests
* Tests for normalization issues
* Tests for unaccenting issues
* Tests for casefolding issues
* Tests for output number of words issues
tests/libtracker-fts/Makefile.am | 7 +-
tests/libtracker-fts/tracker-parser-test.c | 290 ++++++++++++++++++++++++++++
2 files changed, 296 insertions(+), 1 deletions(-)
---
diff --git a/tests/libtracker-fts/Makefile.am b/tests/libtracker-fts/Makefile.am
index 6367b4d..ea5c17e 100644
--- a/tests/libtracker-fts/Makefile.am
+++ b/tests/libtracker-fts/Makefile.am
@@ -10,7 +10,8 @@ noinst_PROGRAMS = \
TEST_PROGS += \
- tracker-fts-test
+ tracker-fts-test \
+ tracker-parser-test
INCLUDES = \
-DTRACKER_COMPILATION \
@@ -43,6 +44,10 @@ tracker_fts_test_SOURCES = tracker-fts-test.c
tracker_fts_test_LDADD = $(common_ldadd)
+tracker_parser_test_SOURCES = tracker-parser-test.c
+
+tracker_parser_test_LDADD = $(common_ldadd)
+
tracker_parser_SOURCES = tracker-parser.c
tracker_parser_LDADD = $(common_ldadd)
diff --git a/tests/libtracker-fts/tracker-parser-test.c b/tests/libtracker-fts/tracker-parser-test.c
new file mode 100644
index 0000000..820173f
--- /dev/null
+++ b/tests/libtracker-fts/tracker-parser-test.c
@@ -0,0 +1,290 @@
+/*
+ * Copyright (C) 2010, Nokia <ivan frade nokia com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include "config.h"
+
+#include <string.h>
+
+#include <glib.h>
+#include <gio/gio.h>
+
+#include <libtracker-fts/tracker-parser.h>
+
+/* -------------- COMMON FOR ALL TESTS ----------------- */
+
+/* Fixture object type */
+typedef struct {
+ /* The parser object */
+ TrackerParser *parser;
+
+ /* Default parser configuration to use */
+ gint max_word_length;
+ gboolean delimit_words;
+ gboolean enable_stemmer;
+ gboolean enable_stop_words;
+ gboolean skip_reserved_words;
+ gboolean skip_numbers;
+} TrackerParserTestFixture;
+
+/* Common setup for all tests */
+static void
+test_common_setup (TrackerParserTestFixture *fixture,
+ gconstpointer data)
+{
+ TrackerLanguage *language;
+
+ /* Setup language for parser */
+ language = tracker_language_new (NULL);
+ if (!language) {
+ g_critical ("Language setup failed!");
+ return;
+ }
+
+ /* Default conf parameters */
+ fixture->max_word_length = 50;
+ fixture->delimit_words = TRUE;
+ fixture->enable_stemmer = TRUE;
+ fixture->enable_stop_words = TRUE;
+ fixture->skip_reserved_words = TRUE;
+ fixture->skip_numbers = TRUE;
+
+ /* Create the parser */
+ fixture->parser = tracker_parser_new (language,
+ fixture->max_word_length);
+ if (!fixture->parser) {
+ g_critical ("Parser creation failed!");
+ return;
+ }
+
+ g_object_unref (language);
+}
+
+/* Common teardown for all tests */
+static void
+test_common_teardown (TrackerParserTestFixture *fixture,
+ gconstpointer data)
+{
+ if (fixture->parser) {
+ tracker_parser_free (fixture->parser);
+ }
+}
+
+/* -------------- EXPECTED NUMBER OF WORDS TESTS ----------------- */
+
+/* Test struct for the expected-nwords tests */
+typedef struct TestDataExpectedNWords TestDataExpectedNWords;
+struct TestDataExpectedNWords {
+ const gchar *str;
+ gboolean skip_numbers;
+ guint expected_nwords;
+};
+
+/* Common expected_word test method */
+static void
+expected_nwords_check (TrackerParserTestFixture *fixture,
+ gconstpointer data)
+{
+ const TestDataExpectedNWords *testdata = data;
+ const gchar *word;
+ gint position;
+ gint byte_offset_start;
+ gint byte_offset_end;
+ gboolean stop_word;
+ gint word_length;
+ guint nwords = 0;
+
+ /* Reset the parser with the test string */
+ tracker_parser_reset (fixture->parser,
+ testdata->str,
+ strlen (testdata->str),
+ fixture->delimit_words,
+ fixture->enable_stemmer,
+ fixture->enable_stop_words,
+ fixture->skip_reserved_words,
+ testdata->skip_numbers);
+
+ /* Count number of output words */
+ while ((word = tracker_parser_next (fixture->parser,
+ &position,
+ &byte_offset_start,
+ &byte_offset_end,
+ &stop_word,
+ &word_length))) {
+ nwords++;
+ }
+
+ /* Check if input is same as expected */
+ g_assert_cmpuint (nwords, == , testdata->expected_nwords);
+}
+
+/* -------------- EXPECTED WORD TESTS ----------------- */
+
+/* Test struct for the expected-word tests */
+typedef struct TestDataExpectedWord TestDataExpectedWord;
+struct TestDataExpectedWord {
+ const gchar *str;
+ const gchar *expected;
+};
+
+/* Common expected_word test method */
+static void
+expected_word_check (TrackerParserTestFixture *fixture,
+ gconstpointer data)
+{
+ const TestDataExpectedWord *testdata = data;
+ const gchar *word;
+ gint position;
+ gint byte_offset_start;
+ gint byte_offset_end;
+ gboolean stop_word;
+ gint word_length;
+
+ /* Reset the parser with our string */
+ tracker_parser_reset (fixture->parser,
+ testdata->str,
+ strlen (testdata->str),
+ fixture->delimit_words,
+ FALSE, /* no stemming for this test */
+ fixture->enable_stop_words,
+ fixture->skip_reserved_words,
+ fixture->skip_numbers);
+
+ /* Process next word */
+ word = tracker_parser_next (fixture->parser,
+ &position,
+ &byte_offset_start,
+ &byte_offset_end,
+ &stop_word,
+ &word_length);
+
+ /* Check if input is same as expected */
+ g_assert_cmpstr (word, == , testdata->expected);
+}
+
+/* -------------- LIST OF TESTS ----------------- */
+
+/* Normalization-related tests */
+static const TestDataExpectedWord test_data_normalization[] = {
+ { "école", "ecole" },
+ { "Ã?COLE", "ecole" },
+ { "Ã?cole", "ecole" },
+ { "e" "\xCC\x81" "cole", "ecole" },
+ { "E" "\xCC\x81" "COLE", "ecole" },
+ { "E" "\xCC\x81" "cole", "ecole" },
+ { NULL, NULL }
+};
+
+/* Unaccenting-related tests */
+static const TestDataExpectedWord test_data_unaccent[] = {
+ { "Murciélago", "murcielago" },
+ { "camión", "camion" },
+ { "desagüe", "desague" },
+ { NULL, NULL }
+};
+
+/* Casefolding-related tests */
+static const TestDataExpectedWord test_data_casefolding[] = {
+ { "gross", "gross" },
+ { "GROSS", "gross" },
+ { "GrOsS", "gross" },
+ { "groÃ?", "gross" },
+ { NULL, NULL }
+};
+
+/* Number of expected words tests */
+static const TestDataExpectedNWords test_data_nwords[] = {
+ { "The quick (\"brown\") fox canâ??t jump 32.3 feet, right?", TRUE, 8 },
+ { "The quick (\"brown\") fox canâ??t jump 32.3 feet, right?", FALSE, 9 },
+ { "ã??ã?¢ã?»ã?µã??ã?¨ã?³ã?¹", TRUE, 2 }, /* katakana */
+ { "æ?¬å·?æ??主æµ?ç??é£?å?³", TRUE, 8 }, /* chinese */
+ { "Ð?меÑ?иканÑ?кие Ñ?Ñ?да наÑ?одÑ?Ñ?Ñ?Ñ? в междÑ?наÑ?однÑ?Ñ? водаÑ?.", TRUE, 6 }, /* russian */
+ { "Bần ch� là m�t anh nghèo xác", TRUE, 7 }, /* vietnamese */
+ { "ã??ã?¢ã?»ã?µã??ã?¨ã?³ã?¹ æ?¬å·?æ??主æµ?ç??é£?å?³ katakana, chinese, english", TRUE, 13 }, /* mixed */
+ { NULL, FALSE, 0 }
+};
+
+int
+main (int argc, char **argv)
+{
+ gint i;
+
+ g_type_init ();
+ if (!g_thread_supported ()) {
+ g_thread_init (NULL);
+ }
+ g_test_init (&argc, &argv, NULL);
+
+ /* Add normalization checks */
+ for (i = 0; test_data_normalization[i].str != NULL; i++) {
+ gchar *testpath;
+
+ testpath = g_strdup_printf ("/libtracker-fts/parser/normalization_%d", i);
+ g_test_add (testpath,
+ TrackerParserTestFixture,
+ &test_data_normalization[i],
+ test_common_setup,
+ expected_word_check,
+ test_common_teardown);
+ g_free (testpath);
+ }
+
+ /* Add unaccent checks */
+ for (i = 0; test_data_unaccent[i].str != NULL; i++) {
+ gchar *testpath;
+
+ testpath = g_strdup_printf ("/libtracker-fts/parser/unaccent_%d", i);
+ g_test_add (testpath,
+ TrackerParserTestFixture,
+ &test_data_unaccent[i],
+ test_common_setup,
+ expected_word_check,
+ test_common_teardown);
+ g_free (testpath);
+ }
+
+ /* Add casefolding checks */
+ for (i = 0; test_data_casefolding[i].str != NULL; i++) {
+ gchar *testpath;
+
+ testpath = g_strdup_printf ("/libtracker-fts/parser/casefolding_%d", i);
+ g_test_add (testpath,
+ TrackerParserTestFixture,
+ &test_data_casefolding[i],
+ test_common_setup,
+ expected_word_check,
+ test_common_teardown);
+ g_free (testpath);
+ }
+
+ /* Add expected number of words checks */
+ for (i = 0; test_data_nwords[i].str != NULL; i++) {
+ gchar *testpath;
+
+ testpath = g_strdup_printf ("/libtracker-fts/parser/nwords_%d", i);
+ g_test_add (testpath,
+ TrackerParserTestFixture,
+ &test_data_nwords[i],
+ test_common_setup,
+ expected_nwords_check,
+ test_common_teardown);
+ g_free (testpath);
+ }
+
+ return g_test_run ();
+}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]