[shotwell/shotwell-0.24] Guess convert when UTF-8 validation fails
- From: Jens Georg <jensgeorg src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [shotwell/shotwell-0.24] Guess convert when UTF-8 validation fails
- Date: Wed, 4 Jan 2017 18:26:14 +0000 (UTC)
commit f55dd52e117883d8f539f0eb985993b6e2f51322
Author: Jens Georg <mail jensge org>
Date: Thu Dec 15 08:57:01 2016 +0100
Guess convert when UTF-8 validation fails
If validation of a text is requested and it fails, try to convert from
current locale if not UTF-8, otherwise fall-back to windows-1252 and
iso-8859-1.
Signed-off-by: Jens Georg <mail jensge org>
https://bugzilla.gnome.org/show_bug.cgi?id=718107
src/util/string.vala | 52 +++++++++++++++++++++++++++++++++++++++++++++----
1 files changed, 47 insertions(+), 5 deletions(-)
---
diff --git a/src/util/string.vala b/src/util/string.vala
index 7331780..bf7e605 100644
--- a/src/util/string.vala
+++ b/src/util/string.vala
@@ -91,15 +91,57 @@ public enum PrepareInputTextOptions {
DEFAULT = EMPTY_IS_NULL | VALIDATE | INVALID_IS_NULL | STRIP_CRLF | STRIP | NORMALIZE;
}
+private string? guess_convert(string text) {
+ string? output = null;
+ size_t bytes_read = 0;
+ unowned string charset = null;
+ debug ("CONVERT: Text did not validate as UTF-8, trying conversion");
+
+ // Try with locale
+ if (!GLib.get_charset(out charset)) {
+ output = text.locale_to_utf8(text.length, out bytes_read, null, null);
+ if (bytes_read == text.length) {
+ debug ("CONVERT: Locale is not UTF-8, convert from %s", charset);
+ return output;
+ }
+ }
+
+ try {
+ output = GLib.convert (text, text.length, "UTF-8", "WINDOWS-1252", out bytes_read);
+ charset = "WINDOWS-1252";
+ } catch (ConvertError error) {
+ if (error is ConvertError.NO_CONVERSION) {
+ try {
+ output = GLib.convert (text, text.length, "UTF-8", "ISO-8859-1", out bytes_read);
+ charset = "ISO-8859-1";
+ } catch (Error error) { /* do nothing */ }
+ }
+ }
+
+ if (bytes_read == text.length) {
+ debug ("CONVERT: Guessed conversion from %s", charset);
+
+ return output;
+ }
+
+ return null;
+}
+
public string? prepare_input_text(string? text, PrepareInputTextOptions options, int dest_length) {
if (text == null)
return null;
- if ((options & PrepareInputTextOptions.VALIDATE) != 0 && !text.validate())
- return (options & PrepareInputTextOptions.INVALID_IS_NULL) != 0 ? null : "";
-
- string prepped = text;
-
+ string? prepped = text;
+ if (PrepareInputTextOptions.VALIDATE in options) {
+ if (!text.validate()) {
+ prepped = guess_convert (text);
+
+ if (prepped == null) {
+ return (options & PrepareInputTextOptions.INVALID_IS_NULL) != 0 ? null : "";
+ }
+ }
+ }
+
// Using composed form rather than GLib's default (decomposed) as NFC is the preferred form in
// Linux and WWW. More importantly, Pango seems to have serious problems displaying decomposed
// forms of Korean language glyphs (and perhaps others). See:
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]