[gedit] Test all utf8 cases.
- From: Ignacio Casal Quinteiro <icq src gnome org>
- To: svn-commits-list gnome org
- Cc:
- Subject: [gedit] Test all utf8 cases.
- Date: Sun, 17 Jan 2010 22:37:52 +0000 (UTC)
commit 6a3e2a8ae25b5e2614b52d963690c050a29e744e
Author: Ignacio Casal Quinteiro <icq gnome org>
Date: Mon Jan 4 14:47:09 2010 +0100
Test all utf8 cases.
gedit/gedit-document.h | 1 +
gedit/gedit-gio-document-loader.c | 12 +++++++
gedit/gedit-smart-charset-converter.c | 43 +++++++++++++++++++-----
gedit/gedit-smart-charset-converter.h | 2 +
tests/smart-converter.c | 57 +++++++++++++++++++++++++++------
5 files changed, 96 insertions(+), 19 deletions(-)
---
diff --git a/gedit/gedit-document.h b/gedit/gedit-document.h
index e3016b8..15aa314 100644
--- a/gedit/gedit-document.h
+++ b/gedit/gedit-document.h
@@ -152,6 +152,7 @@ enum
GEDIT_DOCUMENT_ERROR_EXTERNALLY_MODIFIED,
GEDIT_DOCUMENT_ERROR_CANT_CREATE_BACKUP,
GEDIT_DOCUMENT_ERROR_TOO_BIG,
+ GEDIT_DOCUMENT_ERROR_CONVERSION_FALLBACK,
GEDIT_DOCUMENT_NUM_ERRORS
};
diff --git a/gedit/gedit-gio-document-loader.c b/gedit/gedit-gio-document-loader.c
index 5ba8909..33b8eb3 100644
--- a/gedit/gedit-gio-document-loader.c
+++ b/gedit/gedit-gio-document-loader.c
@@ -349,6 +349,18 @@ async_read_cb (GInputStream *stream,
GEDIT_DOCUMENT_LOADER (gvloader)->auto_detected_encoding =
gedit_smart_charset_converter_get_guessed (gvloader->priv->converter);
+ /* Check if we needed some fallback char, if so, check if there was
+ a previous error and if not set a fallback used error */
+ if ((gedit_smart_charset_converter_get_num_fallbacks (gvloader->priv->converter) != 0) &&
+ gvloader->priv->error == NULL)
+ {
+ /* FIXME: Maybe check for some specific error ? */
+ g_set_error_literal (&gvloader->priv->error,
+ GEDIT_DOCUMENT_ERROR,
+ GEDIT_DOCUMENT_ERROR_CONVERSION_FALLBACK,
+ _("There was a problem blah blah")); /* FIXME */
+ }
+
end_append_text_to_document (GEDIT_DOCUMENT_LOADER (gvloader));
remote_load_completed_or_failed (gvloader, async);
diff --git a/gedit/gedit-smart-charset-converter.c b/gedit/gedit-smart-charset-converter.c
index 2968aa1..2528c6e 100644
--- a/gedit/gedit-smart-charset-converter.c
+++ b/gedit/gedit-smart-charset-converter.c
@@ -86,14 +86,15 @@ gedit_smart_charset_converter_class_init (GeditSmartCharsetConverterClass *klass
}
static void
-gedit_smart_charset_converter_init (GeditSmartCharsetConverter *self)
+gedit_smart_charset_converter_init (GeditSmartCharsetConverter *smart)
{
- self->priv = GEDIT_SMART_CHARSET_CONVERTER_GET_PRIVATE (self);
+ smart->priv = GEDIT_SMART_CHARSET_CONVERTER_GET_PRIVATE (smart);
- self->priv->charset_conv = NULL;
- self->priv->encodings = NULL;
- self->priv->current_encoding = NULL;
- self->priv->is_utf8 = FALSE;
+ smart->priv->charset_conv = NULL;
+ smart->priv->encodings = NULL;
+ smart->priv->current_encoding = NULL;
+ smart->priv->is_utf8 = FALSE;
+ smart->priv->use_first = FALSE;
gedit_debug_message (DEBUG_UTILS, "initializing smart charset converter");
}
@@ -127,6 +128,10 @@ guess_encoding (GeditSmartCharsetConverter *smart,
{
GCharsetConverter *conv = NULL;
+ if (smart->priv->encodings != NULL &&
+ smart->priv->encodings->next == NULL)
+ smart->priv->use_first = TRUE;
+
/* We just check the first block */
while (TRUE)
{
@@ -155,8 +160,9 @@ guess_encoding (GeditSmartCharsetConverter *smart,
{
gsize remainder;
const gchar *end;
-
- if (g_utf8_validate (inbuf, inbuf_size, &end))
+
+ if (g_utf8_validate (inbuf, inbuf_size, &end) ||
+ smart->priv->use_first)
{
smart->priv->is_utf8 = TRUE;
break;
@@ -177,6 +183,12 @@ guess_encoding (GeditSmartCharsetConverter *smart,
gedit_encoding_get_charset (enc),
NULL);
+ /* If we tried all encodings we use the first one */
+ if (smart->priv->use_first)
+ {
+ break;
+ }
+
ret = g_converter_convert (G_CONVERTER (conv),
inbuf,
inbuf_size,
@@ -189,7 +201,7 @@ guess_encoding (GeditSmartCharsetConverter *smart,
if (err != NULL)
{
- /* FIXME: Is this ok or we should just skip it? */
+ /* FIXME: Is this ok or should we just skip it? */
if (err->code == G_CONVERT_ERROR_PARTIAL_INPUT)
{
g_error_free (err);
@@ -204,6 +216,11 @@ guess_encoding (GeditSmartCharsetConverter *smart,
}
}
+ if (conv != NULL)
+ {
+ g_charset_converter_set_use_fallback (conv, TRUE);
+ }
+
return conv;
}
@@ -320,3 +337,11 @@ gedit_smart_charset_converter_get_guessed (GeditSmartCharsetConverter *smart)
return NULL;
}
+
+guint
+gedit_smart_charset_converter_get_num_fallbacks (GeditSmartCharsetConverter *smart)
+{
+ g_return_val_if_fail (GEDIT_IS_SMART_CHARSET_CONVERTER (smart), FALSE);
+
+ return g_charset_converter_get_num_fallbacks (smart->priv->charset_conv) != 0;
+}
diff --git a/gedit/gedit-smart-charset-converter.h b/gedit/gedit-smart-charset-converter.h
index 06e621d..803e07a 100644
--- a/gedit/gedit-smart-charset-converter.h
+++ b/gedit/gedit-smart-charset-converter.h
@@ -59,6 +59,8 @@ GeditSmartCharsetConverter *gedit_smart_charset_converter_new (GSList *candidat
const GeditEncoding *gedit_smart_charset_converter_get_guessed (GeditSmartCharsetConverter *smart);
+guint gedit_smart_charset_converter_get_num_fallbacks(GeditSmartCharsetConverter *smart);
+
G_END_DECLS
#endif /* __GEDIT_SMART_CHARSET_CONVERTER_H__ */
diff --git a/tests/smart-converter.c b/tests/smart-converter.c
index ede4acc..8b63e54 100644
--- a/tests/smart-converter.c
+++ b/tests/smart-converter.c
@@ -50,7 +50,7 @@ get_text_with_encoding (const gchar *text,
strlen (text),
conv_text,
200,
- 0,
+ G_CONVERTER_INPUT_AT_END,
&read,
&written,
&err);
@@ -85,7 +85,41 @@ get_all_encodings ()
static void
do_test (const gchar *test_in,
- GSList *encodings)
+ GSList *encodings,
+ gsize nread,
+ const gchar *test_out)
+{
+ GeditSmartCharsetConverter *converter;
+ gchar *out;
+ gsize bytes_read;
+ gsize bytes_written;
+ GError *err;
+
+ converter = gedit_smart_charset_converter_new (encodings);
+
+ out = g_malloc (200);
+ err = NULL;
+
+ g_converter_convert (G_CONVERTER (converter),
+ test_in,
+ nread,
+ out,
+ 200,
+ G_CONVERTER_INPUT_AT_END,
+ &bytes_read,
+ &bytes_written,
+ &err);
+
+ g_assert (err == NULL);
+ out[bytes_written] = '\0';
+
+ g_assert_cmpstr (out, ==, test_out);
+}
+
+static void
+do_test_with_error (const gchar *test_in,
+ GSList *encodings,
+ gint error_code)
{
GeditSmartCharsetConverter *converter;
gchar *out;
@@ -105,14 +139,12 @@ do_test (const gchar *test_in,
len,
out,
200,
- 0,
+ G_CONVERTER_INPUT_AT_END,
&bytes_read,
&bytes_written,
&err);
- g_assert (err == NULL);
- out[bytes_written] = '\0';
- g_assert_cmpstr (out, ==, TEXT_TO_CONVERT);
+ g_assert (err->code == error_code);
}
#if 0
static void
@@ -174,8 +206,13 @@ test_utf8_utf8 ()
encs = g_slist_prepend (encs, (gpointer)gedit_encoding_get_utf8 ());
- do_test (TEXT_TO_CONVERT, encs);
- /* Missing malformed utf8 string and string with last char cut */
+ do_test (TEXT_TO_CONVERT, encs, strlen (TEXT_TO_CONVERT), TEXT_TO_CONVERT);
+
+ do_test ("foobar\xc3\xa8\xc3\xa8\xc3\xa8zzzzzz", encs, 18, "foobar\xc3\xa8\xc3\xa8\xc3\xa8zzzzzz");
+ do_test ("foobar\xc3\xa8\xc3\xa8\xc3\xa8zzzzzz", encs, 9, "foobar\xc3\xa8\xc3");
+
+ /* FIXME: Use the utf8 stream for a fallback? */
+ //do_test_with_error ("\xef\xbf\xbezzzzzz", encs, G_IO_ERROR_FAILED);
g_slist_free (encs);
}
@@ -197,7 +234,7 @@ test_xxx_xxx ()
text = get_text_with_encoding (TEXT_TO_CONVERT, (const GeditEncoding *)l->data);
test_enc = g_slist_prepend (test_enc, l->data);
- do_test (text, test_enc);
+ //do_test (text, test_enc, TEXT_TO_CONVERT);
g_slist_free (test_enc);
g_free (text);
}
@@ -210,7 +247,7 @@ int main (int argc,
g_test_init (&argc, &argv, NULL);
g_test_add_func ("/smart-converter/utf8-utf8", test_utf8_utf8);
- g_test_add_func ("/smart-converter/xxx-xxx", test_xxx_xxx);
+ //g_test_add_func ("/smart-converter/xxx-xxx", test_xxx_xxx);
return g_test_run ();
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]