[glib/glib-2-54] glib-mkenums: best effort attempt on non-utf8 encoded files.
- From: Philip Withnall <pwithnall src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [glib/glib-2-54] glib-mkenums: best effort attempt on non-utf8 encoded files.
- Date: Wed, 13 Dec 2017 13:44:41 +0000 (UTC)
commit 2fc3948ba777d650833fc5ae3a9ee5f0ddb4e173
Author: Patrick Welche <prlw1 cam ac uk>
Date: Mon Oct 23 13:59:58 2017 +0100
glib-mkenums: best effort attempt on non-utf8 encoded files.
Some source files aren't valid utf-8 containing for example
iso8859-1 accented characters in author's names.
Replace invalid data with a replacement '?' character and print a
warning to keep things working.
Based on a patch from Christoph Reiter in
https://bugzilla.gnome.org/show_bug.cgi?id=785113#c20
gobject/glib-mkenums.in | 39 ++++++++++++++++++++++++++++-----------
1 files changed, 28 insertions(+), 11 deletions(-)
---
diff --git a/gobject/glib-mkenums.in b/gobject/glib-mkenums.in
index e8124b8..d551cdc 100755
--- a/gobject/glib-mkenums.in
+++ b/gobject/glib-mkenums.in
@@ -26,14 +26,6 @@ the GNU General Public License which can be found in the
GLib source package. Sources, examples and contact
information are available at http://www.gtk.org'''
-# Python 2 defaults to ASCII in case stdout is redirected.
-# This should make it match Python 3, which uses the locale encoding.
-if sys.stdout.encoding is None:
- output_stream = codecs.getwriter(
- locale.getpreferredencoding())(sys.stdout)
-else:
- output_stream = sys.stdout
-
# pylint: disable=too-few-public-methods
class Color:
'''ANSI Terminal colors'''
@@ -81,6 +73,29 @@ def write_output(output):
global output_stream
print(output, file=output_stream)
+
+# Python 2 defaults to ASCII in case stdout is redirected.
+# This should make it match Python 3, which uses the locale encoding.
+if sys.stdout.encoding is None:
+ output_stream = codecs.getwriter(
+ locale.getpreferredencoding())(sys.stdout)
+else:
+ output_stream = sys.stdout
+
+
+# Some source files aren't UTF-8 and the old perl version didn't care.
+# Replace invalid data with a replacement character to keep things working.
+# https://bugzilla.gnome.org/show_bug.cgi?id=785113#c20
+def replace_and_warn(err):
+ # 7 characters of context either side of the offending character
+ print_warning('UnicodeWarning: {} at {} ({})'.format(
+ err.reason, err.start,
+ err.object[err.start - 7:err.end + 7]))
+ return ('?', err.end)
+
+codecs.register_error('replace_and_warn', replace_and_warn)
+
+
# glib-mkenums.py
# Information about the current enumeration
flags = None # Is enumeration a bitmask?
@@ -157,7 +172,8 @@ def parse_entries(file, file_name):
m = re.match(r'\#include\s*<([^>]*)>', line)
if m:
newfilename = os.path.join("..", m.group(1))
- newfile = io.open(newfilename, encoding="utf-8")
+ newfile = io.open(newfilename, encoding="utf-8",
+ errors="replace_and_warn")
if not parse_entries(newfile, newfilename):
return False
@@ -253,7 +269,7 @@ def read_template_file(file):
}
in_ = 'junk'
- ifile = io.open(file, encoding="utf-8")
+ ifile = io.open(file, encoding="utf-8", errors="replace_and_warn")
for line in ifile:
m = re.match(r'\/\*\*\*\s+(BEGIN|END)\s+([\w-]+)\s+\*\*\*\/', line)
if m:
@@ -413,7 +429,8 @@ def process_file(curfilename):
firstenum = True
try:
- curfile = io.open(curfilename, encoding="utf-8")
+ curfile = io.open(curfilename, encoding="utf-8",
+ errors="replace_and_warn")
except IOError as e:
if e.errno == errno.ENOENT:
print_warning('No file "{}" found.'.format(curfilename))
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]