[cantarell-fonts] Add script to test font coverage against character sets.
- From: Nikolaus Waxweiler <nwaxweiler src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [cantarell-fonts] Add script to test font coverage against character sets.
- Date: Fri, 15 Jan 2016 16:37:41 +0000 (UTC)
commit 091805588902f8f14c08df3ea84c245d1c1c9d2c
Author: Nikolaus Waxweiler <madigens gmail com>
Date: Fri Jan 15 17:36:47 2016 +0100
Add script to test font coverage against character sets.
scripts/Makefile.am | 3 +-
scripts/test-coverage.py | 64 ++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 66 insertions(+), 1 deletions(-)
---
diff --git a/scripts/Makefile.am b/scripts/Makefile.am
index 332ad03..cfb5dd8 100644
--- a/scripts/Makefile.am
+++ b/scripts/Makefile.am
@@ -1,5 +1,6 @@
noinst_DATA = \
- generate.sh
+ generate.sh \
+ test-coverage.py
EXTRA_DIST = $(noinst_DATA)
diff --git a/scripts/test-coverage.py b/scripts/test-coverage.py
new file mode 100755
index 0000000..e4a25c8
--- /dev/null
+++ b/scripts/test-coverage.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python3
+#
+# Test Unicode coverage of a given font file against a list with Unicode
+# points, currently the Adobe Latin and Cyrillic precomposed glyphs lists.
+#
+# https://adobe-type-tools.github.io/adobe-latin-charsets/
+# https://adobe-type-tools.github.io/adobe-cyrillic-charsets/
+
+import os
+import argparse
+from fontTools.ttLib import TTFont
+from urllib.request import urlopen
+
+parser = argparse.ArgumentParser()
+parser.add_argument("fonts", nargs='+',
+ help="One or more font files (.otf/.ttf) you want to test for coverage.")
+args = parser.parse_args()
+
+charset_list = [
+"https://adobe-type-tools.github.io/adobe-latin-charsets/adobe-latin-4-precomposed.txt",
+"https://adobe-type-tools.github.io/adobe-cyrillic-charsets/adobe-cyrillic-2.txt"
+]
+
+for charset in charset_list:
+ charset_table = {}
+
+ # Parse charset file into charset_table.
+ with urlopen(charset) as c:
+ # Split table manually and slice off header.
+ raw_table = c.read().decode().split("\n")[1:]
+
+ # We care only about the first column with the hex code and the third
+ # column with the plain English description of that code point. The first
+ # column must be converted from e.g. a string of a hex "20AE" to an int
+ # 8366.
+ for raw_line in raw_table:
+ if raw_line: # Skip empty lines.
+ sliced_line = raw_line.split("\t")
+ charset_table[int(sliced_line[0], 16)] = sliced_line[3] # { 8366: "TUGRIK SIGN", ... }
+
+ charset_set = frozenset(charset_table.keys())
+
+ # Now compare each given font against this charset.
+ for font_file in args.fonts:
+ font = TTFont(font_file)
+
+ # Font can contain multiple cmaps that map unicode code points (U+0020) to
+ # glyph names ("space"), we want the code points from all Unicode cmaps and
+ # flatten them into a (unique) set.
+ codepoints = [[y[0] for y in x.cmap.items()]
+ for x in font['cmap'].tables if x.isUnicode()]
+ codepoints_set = frozenset([item for sublist in codepoints
+ for item in sublist])
+ missing_codepoints = charset_set.difference(codepoints_set)
+
+ if missing_codepoints:
+ font_filename = os.path.basename(font_file)
+ charset_filename = charset.rpartition("/")[-1]
+ print("\n" + font_filename + " is missing from " + charset_filename + ":")
+
+ for m in missing_codepoints:
+ print("U+" + format(m, "04X") + " " + charset_table[m])
+
+ font.close()
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]