[easytag/wip/application-window: 18/18] Improved scanner functions and added test cases



commit a343ef90ec2b3957733ecb394cae29d4c460ab45
Author: Abhinav <abhijangda hotmail com>
Date:   Wed Mar 12 01:21:42 2014 +0530

    Improved scanner functions and added test cases
    
    https://bugzilla.gnome.org/show_bug.cgi?id=726108

 Makefile.am              |   21 ++++++
 src/application_window.c |   40 +++++-----
 src/scan.c               |  162 ++++++++++++++++++-----------------------
 src/scan.h               |    8 +-
 src/scan_dialog.c        |   33 +++++++--
 tests/test-scan.c        |  180 ++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 323 insertions(+), 121 deletions(-)
---
diff --git a/Makefile.am b/Makefile.am
index 36935d3..96e3a21 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -272,7 +272,28 @@ tests/test-desktop-file-validate.sh: Makefile tests/.dstamp
 check_SCRIPTS = \
        tests/test-desktop-file-validate.sh
 
+check_PROGRAMS = \
+       tests/test-scan
+
+tests_test_scan_CPPFLAGS = \
+       -I$(top_srcdir)/src \
+       -I$(top_builddir) \
+       $(DEPRECATED_CPPFLAGS)
+
+tests_test_scan_CFLAGS = \
+       $(WARN_CFLAGS) \
+       $(EASYTAG_CFLAGS)
+
+tests_test_scan_SOURCES = \
+       tests/test-scan.c \
+       src/scan.c
+
+tests_test_scan_LDADD = \
+       $(EASYTAG_LIBS)
+
+# TODO: Use the GLib test runner
 TESTS = \
+       $(check_PROGRAMS) \
        $(check_SCRIPTS)
 endif
 
diff --git a/src/application_window.c b/src/application_window.c
index d740653..5620823 100644
--- a/src/application_window.c
+++ b/src/application_window.c
@@ -134,31 +134,34 @@ Convert_Space_Into_Underscore (GtkWidget *entry)
 static void
 Convert_All_Uppercase (GtkWidget *entry)
 {
-    gchar *string = g_strdup (gtk_entry_get_text (GTK_ENTRY (entry)));
+    gchar *res;
+    const gchar *string = gtk_entry_get_text (GTK_ENTRY (entry));
 
-    Scan_Process_Fields_All_Uppercase (string);
-    gtk_entry_set_text (GTK_ENTRY (entry), string);
-    g_free (string);
+    res = Scan_Process_Fields_All_Uppercase (string);
+    gtk_entry_set_text (GTK_ENTRY (entry), res);
+    g_free (res);
 }
 
 static void
 Convert_All_Lowercase (GtkWidget *entry)
 {
-    gchar *string = g_strdup (gtk_entry_get_text (GTK_ENTRY (entry)));
+    gchar *res;
+    const gchar *string = gtk_entry_get_text (GTK_ENTRY (entry));
 
-    Scan_Process_Fields_All_Downcase (string);
-    gtk_entry_set_text (GTK_ENTRY (entry), string);
-    g_free (string);
+    res = Scan_Process_Fields_All_Downcase (string);
+    gtk_entry_set_text (GTK_ENTRY (entry), res);
+    g_free (res);
 }
 
 static void
 Convert_Letter_Uppercase (GtkWidget *entry)
 {
-    gchar *string = g_strdup (gtk_entry_get_text (GTK_ENTRY (entry)));
+    gchar *res;
+    const gchar *string = gtk_entry_get_text (GTK_ENTRY (entry));
 
-    Scan_Process_Fields_Letter_Uppercase (string);
-    gtk_entry_set_text (GTK_ENTRY (entry), string);
-    g_free (string);
+    res = Scan_Process_Fields_Letter_Uppercase (string);
+    gtk_entry_set_text (GTK_ENTRY (entry), res);
+    g_free (res);
 }
 
 static void
@@ -188,15 +191,12 @@ Convert_Remove_Space (GtkWidget *entry)
 static void
 Convert_Insert_Space (GtkWidget *entry)
 {
-    // FIX ME : we suppose that it will not grow more than 2 times its size...
-    gsize string_length = 2 * strlen (gtk_entry_get_text (GTK_ENTRY (entry)));
-    gchar *string = g_malloc (string_length + 1);
-    strncpy (string, gtk_entry_get_text (GTK_ENTRY (entry)), string_length);
-    string[string_length] = '\0';
+    gchar *res;
+    const gchar *string = (gtk_entry_get_text (GTK_ENTRY (entry)));
 
-    Scan_Process_Fields_Insert_Space (&string);
-    gtk_entry_set_text (GTK_ENTRY (entry), string);
-    g_free (string);
+    res = Scan_Process_Fields_Insert_Space (string);
+    gtk_entry_set_text (GTK_ENTRY (entry), res);
+    g_free (res);
 }
 
 static void
diff --git a/src/scan.c b/src/scan.c
index 44f069f..e74b8c8 100644
--- a/src/scan.c
+++ b/src/scan.c
@@ -8,9 +8,9 @@
 void
 Scan_Convert_Underscore_Into_Space (gchar *string)
 {
-    gchar *tmp;
+    gchar *tmp = string;
 
-    while ((tmp = strchr (string, '_')) != NULL)
+    while ((tmp = strchr (tmp, '_')) != NULL)
     {
         *tmp = ' ';
     }
@@ -40,9 +40,9 @@ Scan_Convert_P20_Into_Space (gchar *string)
 void
 Scan_Convert_Space_Into_Underscore (gchar *string)
 {
-    gchar *tmp;
+    gchar *tmp = string;
 
-    while ((tmp = strchr (string, ' ')) != NULL)
+    while ((tmp = strchr (tmp, ' ')) != NULL)
     {
         *tmp = '_';
     }
@@ -66,39 +66,36 @@ Scan_Process_Fields_Remove_Space (gchar *string)
 }
 
 /*
- * The function inserts a space before an uppercase letter
- * It is needed to realloc the memory!
+ * Scan_Process_Fields_Insert_Space:
+ * @string: Input string
+ *
+ * This function will insert space before every uppercase character.
+ *
+ * Returns: A newly allocated string.
  */
-void
-Scan_Process_Fields_Insert_Space (gchar **string)
+gchar *
+Scan_Process_Fields_Insert_Space (const gchar *string)
 {
     gchar *iter;
     gunichar c;
-    gint j;
-    guint string_length;
-    gchar *string1;
-
-    // FIX ME : we suppose that it will not grow more than 2 times its size...
-    string_length = 2 * strlen(*string);
-    //string1 = g_realloc(*string, string_length+1);
-    string1       = g_malloc(string_length+1);
-    strncpy(string1,*string,string_length);
-    string1[string_length]='\0';
-    g_free(*string);
-    *string = string1;
-
-    for (iter = g_utf8_next_char(*string); *iter; iter = g_utf8_next_char(iter)) // At start : 
g_utf8_next_char to not consider first "uppercase" letter
+    GString *string1;
+
+    string1 = g_string_new ("");
+    g_string_append_c (string1, *string);
+
+    for (iter = g_utf8_next_char (string); *iter; iter = g_utf8_next_char (iter))
     {
-        c = g_utf8_get_char(iter);
+        c = g_utf8_get_char (iter);
 
-        if (g_unichar_isupper(c))
+        if (g_unichar_isupper (c))
         {
-            for (j = strlen(iter); j > 0; j--)
-                *(iter + j) = *(iter + j - 1);
-            *iter = ' ';
-            iter++;
+            g_string_append_c (string1, ' ');
         }
+
+        g_string_append_unichar (string1, c);
     }
+
+    return g_string_free (string1, FALSE);
 }
 
 /*
@@ -148,88 +145,69 @@ Scan_Remove_Spaces (gchar *string)
   }
 }
 
-void
-Scan_Process_Fields_All_Uppercase (gchar *string)
+/* Returns a newly-allocated string. */
+gchar *
+Scan_Process_Fields_All_Uppercase (const gchar *string)
 {
-    gchar *temp;
-    gchar temp2[6]; // Must have at least 6 bytes of space
-    gunichar c;
-
-    for (temp = string; *temp; temp = g_utf8_next_char(temp))
-    {
-        c = g_utf8_get_char(temp);
-        if (g_unichar_islower(c))
-            strncpy(temp, temp2, g_unichar_to_utf8(g_unichar_toupper(c), temp2));
-    }
+    return g_utf8_strup (string, -1);
 }
 
-void
-Scan_Process_Fields_All_Downcase (gchar *string)
+/* Returns a newly-allocated string. */
+gchar *
+Scan_Process_Fields_All_Downcase (const gchar *string)
 {
-    gchar *temp;
-    gchar temp2[6];
-    gunichar c;
-
-    for (temp = string; *temp; temp = g_utf8_next_char(temp))
-    {
-        c = g_utf8_get_char(temp);
-        if (g_unichar_isupper(c))
-            strncpy(temp, temp2, g_unichar_to_utf8(g_unichar_tolower(c), temp2));
-    }
+    return g_utf8_strdown (string, -1);
 }
 
-void
-Scan_Process_Fields_Letter_Uppercase (gchar *string)
+/* Returns a newly-allocated string. */
+gchar *
+Scan_Process_Fields_Letter_Uppercase (const gchar *string)
 {
-    gchar *temp;
+    const gchar *temp;
     gchar temp2[6];
     gboolean set_to_upper_case = TRUE;
     gunichar c;
-    gchar utf8_character[6];
-    gchar *word, *word1, *word2;
+    GString *string1;
+
+    string1 = g_string_new ("");
 
-    for (temp = string; *temp; temp = g_utf8_next_char(temp))
+    for (temp = string; *temp; temp = g_utf8_next_char (temp))
     {
-        c = g_utf8_get_char(temp);
-        if (set_to_upper_case && g_unichar_islower(c))
-            strncpy(temp, temp2, g_unichar_to_utf8(g_unichar_toupper(c), temp2));
-        else if (!set_to_upper_case && g_unichar_isupper(c))
-            strncpy(temp, temp2, g_unichar_to_utf8(g_unichar_tolower(c), temp2));
-        set_to_upper_case = FALSE; // After the first time, all will be down case
-    }
+        gchar *temp3;
+        int l;
 
-    temp = string;
+        c = g_utf8_get_char (temp);
+        l = g_unichar_to_utf8 (c, temp2);
 
-    // Uppercase again the word 'I' in english
-    while ( temp )
-    {
-        word = temp; // Needed if there is only one word
-        word1 = g_utf8_strchr(temp,-1,' ');
-        word2 = g_utf8_strchr(temp,-1,'_');
-
-        // Take the first string found (near beginning of string)
-        if (word1 && word2)
-            word = MIN(word1,word2);
-        else if (word1)
-            word = word1;
-        else if (word2)
-            word = word2;
+        if (set_to_upper_case && g_unichar_islower(c))
+        {
+            temp3 = g_utf8_strup (temp2, l);
+            g_string_append (string1, temp3);
+            g_free (temp3);
+        }
+        else if (!set_to_upper_case && g_unichar_isupper(c))
+        {
+            temp3 = g_utf8_strdown (temp2, l);
+            g_string_append (string1, temp3);
+            g_free (temp3);
+        }
         else
-            // Last word of the string
-            break;
-
-        // Go to first character of the word (char. after ' ' or '_')
-        word = word+1;
+        {
+            g_string_append_len (string1, temp2, l);
+        }
 
-        // Set uppercase word 'I'
-        if (g_ascii_strncasecmp("I ", word, strlen("I ")) == 0)
+        /* Uppercase the word 'I' in english */
+        if (!set_to_upper_case &&
+            (*(temp - 1) == ' ' || *(temp - 1) == '_') &&
+            (*temp == 'i' || *temp == 'I') &&
+            (*(temp + 1) == ' ' || *(temp + 1) == '_'))
         {
-            c = g_utf8_get_char(word);
-            strncpy(word, utf8_character, g_unichar_to_utf8(g_unichar_toupper(c), utf8_character));
+            string1->str [string1->len - 1] = 'I';
         }
 
-        temp = word;
+        /* After the first time, all will be lower case. */
+        set_to_upper_case = FALSE;
     }
-}
-
 
+    return g_string_free (string1, FALSE);
+}
diff --git a/src/scan.h b/src/scan.h
index 4ad7b63..b5a9c3d 100644
--- a/src/scan.h
+++ b/src/scan.h
@@ -9,12 +9,12 @@ void Scan_Convert_Underscore_Into_Space (gchar *string);
 void Scan_Convert_P20_Into_Space (gchar *string);
 void Scan_Convert_Space_Into_Underscore (gchar *string);
 void Scan_Process_Fields_Remove_Space (gchar *string);
-void Scan_Process_Fields_Insert_Space (gchar **string);
+gchar* Scan_Process_Fields_Insert_Space (const gchar *string);
 void Scan_Process_Fields_Keep_One_Space (gchar *string);
 void Scan_Remove_Spaces (gchar *string);
-void Scan_Process_Fields_All_Uppercase (gchar *string);
-void Scan_Process_Fields_All_Downcase (gchar *string);
-void Scan_Process_Fields_Letter_Uppercase (gchar *string);
+gchar* Scan_Process_Fields_All_Uppercase (const gchar *string);
+gchar* Scan_Process_Fields_All_Downcase (const gchar *string);
+gchar* Scan_Process_Fields_Letter_Uppercase (const gchar *string);
 
 G_END_DECLS
 
diff --git a/src/scan_dialog.c b/src/scan_dialog.c
index f02f193..9a7fe4c 100644
--- a/src/scan_dialog.c
+++ b/src/scan_dialog.c
@@ -1138,7 +1138,12 @@ Scan_Process_Fields_Functions (EtScanDialog *self, gchar **string)
         Scan_Convert_Space_Into_Underscore (*string);
 
     if (gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(priv->process_insert_space_toggle)))
-        Scan_Process_Fields_Insert_Space(string);
+    {
+        gchar *res;
+        res = Scan_Process_Fields_Insert_Space (*string);
+        g_free (*string);
+        *string = res;
+    }
 
     if (gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(priv->process_insert_one_space_toggle)))
         Scan_Process_Fields_Keep_One_Space(*string);
@@ -1147,13 +1152,28 @@ Scan_Process_Fields_Functions (EtScanDialog *self, gchar **string)
         Scan_Convert_Character (self, string);
 
     if (gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(priv->process_all_uppercase_toggle)))
-        Scan_Process_Fields_All_Uppercase(*string);
+    {
+        gchar *res;
+        res = Scan_Process_Fields_All_Uppercase (*string);
+        g_free (*string);
+        *string = res;
+    }
 
     if (gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(priv->process_all_lowercase_toggle)))
-        Scan_Process_Fields_All_Downcase(*string);
+    {
+        gchar *res;
+        res = Scan_Process_Fields_All_Downcase (*string);
+        g_free (*string);
+        *string = res;
+    }
 
     if (gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(priv->process_first_uppercase_toggle)))
-         Scan_Process_Fields_Letter_Uppercase(*string);
+    {
+        gchar *res;
+        res = Scan_Process_Fields_Letter_Uppercase (*string);
+        g_free (*string);
+        *string = res;
+    }
 
     if (gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(priv->process_first_style_uppercase_toggle)))
         Scan_Process_Fields_First_Letters_Uppercase (self, *string);
@@ -1900,7 +1920,10 @@ Scan_Process_Fields_First_Letters_Uppercase (EtScanDialog *self, gchar *string)
     {
         exempt[0] = NULL;
     }
-    Scan_Process_Fields_All_Downcase(string);
+
+    temp = Scan_Process_Fields_All_Downcase (string);
+    g_free (string);
+    string = temp;
 
     if (!g_utf8_validate(string,-1,NULL))
     {
diff --git a/tests/test-scan.c b/tests/test-scan.c
new file mode 100644
index 0000000..b870e83
--- /dev/null
+++ b/tests/test-scan.c
@@ -0,0 +1,180 @@
+#include "scan.h"
+
+/* TODO: Add more test strings, and possibly some performance tests. */
+
+static void
+check_string (gchar *cases, gchar *result)
+{
+    gchar *string1, *string2;
+
+    string1 = g_utf8_normalize (cases, -1, G_NORMALIZE_ALL);
+    string2 = g_utf8_normalize (result, -1, G_NORMALIZE_ALL);
+
+    g_assert_cmpstr (string1, ==, string2);
+
+    g_free (string1);
+    g_free (string2);
+}
+
+static void
+scan_underscore_to_space (void)
+{
+    gsize i;
+    gchar *cases[] = {" ်0STRING ်0_A_B"};
+    gchar *results[] = {" ်0STRING ်0 A B"};
+
+    for (i = 0; i < G_N_ELEMENTS (cases); i++)
+    {
+        gchar *string;
+
+        string = g_strdup (cases[i]);
+        Scan_Convert_Underscore_Into_Space (string);
+        check_string (string, results[i]);
+
+        g_free (string);
+    }
+}
+
+static void
+scan_remove_space (void)
+{
+    gsize i;
+    gchar *cases[] = { " STR ING A   B " };
+    gchar *results[] = { "STRINGAB" };
+
+    for (i = 0; i < G_N_ELEMENTS (cases); i++)
+    {
+        gchar *string;
+
+        string = g_strdup (cases[i]);
+        Scan_Process_Fields_Remove_Space (string);
+        check_string (string, results[i]);
+
+        g_free (string);
+    }
+}
+
+static void
+scan_p20_to_space (void)
+{
+    gsize i;
+    gchar *cases[] = { "S%20T%20R%20", "%20ă b  %20c", "STЂR%20ING%20A%20B" };
+    gchar *results[] = { "S T R ", " ă b   c", "STЂR ING A B" };
+
+    for (i = 0; i < G_N_ELEMENTS (cases); i++)
+    {
+        gchar *string;
+
+        string = g_strdup (cases[i]);
+        Scan_Convert_P20_Into_Space (string);
+        check_string (string, results[i]);
+
+        g_free (string);
+    }
+}
+
+static void
+scan_insert_space (void)
+{
+    gsize i;
+    gchar *cases[] = { "STRINGAB", "StRiNgAb", "tRßiNgAb", "AՄՆ", "bՄԵ", "cՄԻ",
+                       "dՎՆ", "eՄԽ", "fꜲ"};
+    gchar *results[] = { "S T R I N G A B", "St Ri Ng Ab", "t Rßi Ng Ab",
+                         "A Մ Ն", "b Մ Ե", "c Մ Ի", "d Վ Ն", "e Մ Խ", "f Ꜳ" };
+
+    for (i = 0; i < G_N_ELEMENTS (cases); i++)
+    {
+        gchar *string, *res;
+
+        string = g_strdup (cases[i]);
+        res = Scan_Process_Fields_Insert_Space (string);
+        check_string (res, results[i]);
+
+        g_free (string);
+        g_free (res);
+    }
+}
+
+static void
+scan_all_uppercase (void)
+{
+    gsize i;
+    gchar *cases[] = { "stringab", "tRßiNgAb", "aʼnbcd", "lowΐer", "uppΰer",
+                       "sTRINGև", "ᾖᾀ", "pᾖp", "sAfflAs" };
+    gchar *results[] = { "STRINGAB", "TRSSINGAB", "AʼNBCD", "LOWΪ́ER", "UPPΫ́ER",
+                         "STRINGԵՒ", "ἮΙἈΙ", "PἮΙP", "SAFFLAS" };
+
+    for (i = 0; i < G_N_ELEMENTS (cases); i++)
+    {
+        gchar *string, *res;
+
+        string = g_strdup (cases[i]);
+        res = Scan_Process_Fields_All_Uppercase (string);
+        check_string (res, results[i]);
+
+        g_free (string);
+        g_free (res);
+    }
+}
+
+static void
+scan_all_lowercase (void)
+{
+    gsize i;
+    gchar *cases[] = { "STRINGAB", "tRßiNgAb", "SMALLß", "AAAԵՒBB", "ʼN",
+                       "PΪ́E", "ἮΙ", "Ϋ́E" };
+    gchar *results[] = { "stringab", "trßingab", "smallß", "aaaեւbb", "ʼn",
+                         "pΐe", "ἦι", "ΰe" };
+
+    for (i = 0; i < G_N_ELEMENTS (cases); i++)
+    {
+        gchar *string, *res;
+
+        string = g_strdup (cases[i]);
+        res = Scan_Process_Fields_All_Downcase (string);
+        check_string (res, results[i]);
+
+        g_free (string);
+        g_free (res);
+    }
+}
+
+static void
+scan_letter_uppercase (void)
+{
+    gsize i;
+    gchar *cases[] = { "st ri ng in ab", "tr ßi ng ab", "ßr ßi ng ab",
+                       "ßr i ng ab", "ßr mi ng ab", "I I ng ab", "ß I ng ab",
+                       "ßi ng ab" };
+    gchar *results[] = { "St ri ng in ab", "Tr ßi ng ab", "SSr ßi ng ab",
+                         "SSr I ng ab", "SSr mi ng ab", "I I ng ab",
+                         "SS I ng ab", "SSi ng ab" };
+
+    for (i = 0; i < G_N_ELEMENTS (cases); i++)
+    {
+        gchar *string, *res;
+
+        string = g_strdup (cases [i]);
+        res = Scan_Process_Fields_Letter_Uppercase (string);
+        check_string (res, results [i]);
+
+        g_free (string);
+        g_free (res);
+    }
+}
+
+int
+main (int argc, char** argv)
+{
+    g_test_init (&argc, &argv, NULL);
+
+    g_test_add_func ("/scan/underscore-to-space", scan_underscore_to_space);
+    g_test_add_func ("/scan/remove-space", scan_remove_space);
+    g_test_add_func ("/scan/P20-to-space", scan_p20_to_space);
+    g_test_add_func ("/scan/insert-space", scan_insert_space);
+    g_test_add_func ("/scan/all-uppercase", scan_all_uppercase);
+    g_test_add_func ("/scan/all-lowercase", scan_all_lowercase);
+    g_test_add_func ("/scan/letter-uppercase", scan_letter_uppercase);
+
+    return g_test_run ();
+}


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]