[pango/log-attr-tweaks: 16/19] Add api to validate log attrs




commit d00bdb0c1fe95a5cb2caf1778ad9139a4849c742
Author: Matthias Clasen <mclasen redhat com>
Date:   Sat Aug 21 15:41:39 2021 -0400

    Add api to validate log attrs
    
    This is based on code that previously lived in
    tests/testboundaries.c.
    
    Fixes: #129

 pango/break.c       | 472 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 pango/pango-break.h |  22 +++
 2 files changed, 493 insertions(+), 1 deletion(-)
---
diff --git a/pango/break.c b/pango/break.c
index 48465ec2..767f6264 100644
--- a/pango/break.c
+++ b/pango/break.c
@@ -1711,6 +1711,410 @@ tailor_break (const char    *text,
   return res;
 }
 
+/* }}} */
+/* {{{ Validation */
+
+G_DEFINE_QUARK(pango-validate-error-quark, pango_validate_error)
+
+typedef gboolean (* CharForeachFunc) (int                  pos,
+                                      gunichar             wc,
+                                      gunichar             prev_wc,
+                                      gunichar             next_wc,
+                                      GUnicodeType         type,
+                                      GUnicodeType         prev_type,
+                                      GUnicodeType         next_type,
+                                      const PangoLogAttr  *attr,
+                                      const PangoLogAttr  *prev_attr,
+                                      const PangoLogAttr  *next_attr,
+                                      GError             **error);
+
+static gboolean
+log_attr_foreach (const char          *text,
+                  int                  length,
+                  const PangoLogAttr  *attrs,
+                  int                  attrs_len,
+                  CharForeachFunc      func,
+                  GError             **error)
+{
+  const gchar *next = text;
+  const gchar *end = text + length;
+  gint i = 0;
+  gunichar prev_wc;
+  gunichar next_wc;
+  GUnicodeType prev_type;
+  GUnicodeType next_type;
+
+  if (next == end)
+    goto done;
+
+  prev_type = (GUnicodeType) -1;
+  prev_wc = 0;
+
+  next_wc = g_utf8_get_char (next);
+  next_type = g_unichar_type (next_wc);
+
+  while (next_wc != 0)
+    {
+      GUnicodeType type;
+      gunichar wc;
+
+      wc = next_wc;
+      type = next_type;
+
+      next = g_utf8_next_char (next);
+
+      if (next >= end)
+        next_wc = 0;
+      else
+        next_wc = g_utf8_get_char (next);
+
+      if (next_wc)
+        next_type = g_unichar_type (next_wc);
+
+      if (!func (i,
+                 wc, prev_wc, next_wc,
+                 type, prev_type, next_type,
+                 &attrs[i],
+                 i != 0 ? &attrs[i - 1] : NULL,
+                 next_wc != 0 ? &attrs[i + 1] : NULL,
+                 error))
+        return FALSE;
+
+      prev_type = type;
+      prev_wc = wc;
+      i++;
+    }
+
+done:
+  return TRUE;
+}
+
+static gboolean
+check_line_char (int                  pos,
+                 gunichar             wc,
+                 gunichar             prev_wc,
+                 gunichar             next_wc,
+                 GUnicodeType         type,
+                 GUnicodeType         prev_type,
+                 GUnicodeType         next_type,
+                 const PangoLogAttr  *attr,
+                 const PangoLogAttr  *prev_attr,
+                 const PangoLogAttr  *next_attr,
+                 GError             **error)
+{
+  GUnicodeBreakType break_type;
+  GUnicodeBreakType prev_break_type;
+
+  break_type = g_unichar_break_type (wc);
+  if (prev_wc)
+    prev_break_type = g_unichar_break_type (prev_wc);
+  else
+    prev_break_type = G_UNICODE_BREAK_UNKNOWN;
+
+  if (wc == '\n' && prev_wc == '\r')
+    {
+      if (attr->is_line_break)
+        {
+          g_set_error (error,
+                       PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+                       "char %d: Do not break between \\r and \\n", pos);
+          return FALSE;
+        }
+    }
+
+  if (prev_wc == 0)
+    {
+      if (attr->is_line_break)
+        {
+          g_set_error (error,
+                       PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+                       "char %d: Do not break before first char (LB2)", pos);
+          return FALSE;
+        }
+    }
+
+  if (next_wc == 0)
+    {
+      if (!attr->is_line_break)
+        {
+          g_set_error (error,
+                       PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+                       "char %d: Always break after the last char (LB3)", pos);
+          return FALSE;
+        }
+    }
+
+  if (prev_break_type == G_UNICODE_BREAK_MANDATORY ||
+      prev_break_type == G_UNICODE_BREAK_CARRIAGE_RETURN ||
+      prev_break_type == G_UNICODE_BREAK_LINE_FEED ||
+      prev_break_type == G_UNICODE_BREAK_NEXT_LINE)
+    {
+      if (!attr->is_mandatory_break)
+        {
+          g_set_error (error,
+                       PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+                       "char %d: Always break after hard line breaks (LB4, LB5)", pos);
+          return FALSE;
+        }
+    }
+
+  if (break_type == G_UNICODE_BREAK_MANDATORY ||
+      break_type == G_UNICODE_BREAK_CARRIAGE_RETURN ||
+      break_type == G_UNICODE_BREAK_LINE_FEED ||
+      break_type == G_UNICODE_BREAK_NEXT_LINE)
+    {
+          if (attr->is_line_break)
+            {
+              g_set_error (error,
+                           PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+                           "char %d: Do not break before hard line beaks (LB6)", pos);
+              return FALSE;
+            }
+    }
+
+  if (break_type == G_UNICODE_BREAK_SPACE ||
+      break_type == G_UNICODE_BREAK_ZERO_WIDTH_SPACE)
+    {
+      if (attr->is_line_break && prev_attr != NULL &&
+          !attr->is_mandatory_break &&
+          !(next_wc && g_unichar_break_type (next_wc) == G_UNICODE_BREAK_COMBINING_MARK))
+        {
+          g_set_error (error,
+                       PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+                       "char %d: Can't break before a space unless mandatory precedes or combining mark 
follows (LB7)", pos);
+          return FALSE;
+        }
+    }
+
+  /* TODO: check LB8 */
+
+  if (break_type == G_UNICODE_BREAK_ZERO_WIDTH_JOINER)
+    {
+      if (attr->is_line_break)
+        {
+          g_set_error (error,
+                       PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+                       "char %d: Do not break after ZWJ (LB8a)", pos);
+          return FALSE;
+        }
+    }
+
+  /* TODO: check LB9 */
+
+  if (prev_break_type == G_UNICODE_BREAK_WORD_JOINER ||
+      break_type == G_UNICODE_BREAK_WORD_JOINER)
+    {
+      if (attr->is_line_break)
+        {
+          g_set_error (error,
+                       PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+                       "char %d: Do not break before or after WJ (LB11)", pos);
+          return FALSE;
+        }
+    }
+
+  if (prev_break_type == G_UNICODE_BREAK_NON_BREAKING_GLUE)
+    {
+          g_set_error (error,
+                       PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+                       "char %d: Do not break after GL (LB12)", pos);
+          return FALSE;
+    }
+
+  if (break_type == G_UNICODE_BREAK_OPEN_PUNCTUATION &&
+      prev_break_type == G_UNICODE_BREAK_OPEN_PUNCTUATION &&
+      attr->is_line_break && !attr->is_mandatory_break)
+    {
+      g_set_error (error,
+                   PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+                   "char %d: Can't break between two open punctuation chars", pos);
+      return FALSE;
+    }
+
+  if (break_type == G_UNICODE_BREAK_CLOSE_PUNCTUATION &&
+      prev_break_type == G_UNICODE_BREAK_CLOSE_PUNCTUATION &&
+      attr->is_line_break && !attr->is_mandatory_break)
+    {
+      g_set_error (error,
+                   PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+                   "char %d: Can't break between two close punctuation chars", pos);
+      return FALSE;
+    }
+
+  if (break_type == G_UNICODE_BREAK_QUOTATION &&
+      prev_break_type == G_UNICODE_BREAK_ALPHABETIC &&
+      attr->is_line_break && !attr->is_mandatory_break)
+    {
+      g_set_error (error,
+                   PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+                   "char %d: Can't break a letter-quotemark sequence", pos);
+      return FALSE;
+    }
+
+  /* internal consistency */
+
+  if (attr->is_mandatory_break && !attr->is_line_break)
+    {
+      g_set_error (error,
+                   PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_BREAK,
+                   "char %d: Mandatory breaks must also be marked as regular breaks", pos);
+      return FALSE;
+    }
+
+  return TRUE;
+}
+
+static gboolean
+check_line_invariants (const char          *text,
+                       int                  length,
+                       const PangoLogAttr  *log_attrs,
+                       int                  attrs_len,
+                       GError             **error)
+{
+  return log_attr_foreach (text, length,
+                           log_attrs, attrs_len,
+                           check_line_char, error);
+}
+
+static gboolean
+check_grapheme_invariants (const char          *text,
+                           int                  length,
+                           const PangoLogAttr  *log_attrs,
+                           int                  attrs_len,
+                           GError             **error)
+{
+  return TRUE;
+}
+
+static gboolean
+check_word_invariants (const char          *text,
+                       int                  length,
+                       const PangoLogAttr  *log_attrs,
+                       int                  attrs_len,
+                       GError             **error)
+{
+  enum {
+    AFTER_START,
+    AFTER_END
+  } state = AFTER_END;
+
+  for (int i = 0; i < attrs_len; i++)
+    {
+      switch (state)
+        {
+        case AFTER_END:
+          if (log_attrs[i].is_word_start)
+            {
+              if (log_attrs[i].is_word_end)
+                state = AFTER_END;
+              else
+                state = AFTER_START;
+              break;
+            }
+          if (log_attrs[i].is_word_end)
+            {
+              g_set_error (error,
+                           PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_WORD,
+                           "char %d: Unexpected word end", i);
+              return FALSE;
+            }
+          break;
+
+        case AFTER_START:
+          if (log_attrs[i].is_word_start)
+            {
+              g_set_error (error,
+                           PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_WORD,
+                           "char %d: Unexpected word start", i);
+              return FALSE;
+            }
+          if (log_attrs[i].is_word_end)
+            state = AFTER_END;
+          break;
+
+        default:
+          g_assert_not_reached ();
+        }
+    }
+
+  return TRUE;
+}
+
+static gboolean
+check_sentence_invariants (const char          *text,
+                           int                  length,
+                           const PangoLogAttr  *log_attrs,
+                           int                  attrs_len,
+                           GError             **error)
+{
+  enum {
+    AFTER_START,
+    AFTER_END
+  } state = AFTER_END;
+
+  for (int i = 0; i < attrs_len; i++)
+    {
+      switch (state)
+        {
+        case AFTER_END:
+          if (log_attrs[i].is_sentence_start)
+            {
+              if (log_attrs[i].is_sentence_end)
+                state = AFTER_END;
+              else
+                state = AFTER_START;
+              break;
+            }
+          if (log_attrs[i].is_sentence_end)
+            {
+              g_set_error (error,
+                           PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_SENTENCE,
+                           "char %d: Unexpected sentence end", i);
+              return FALSE;
+            }
+          break;
+
+        case AFTER_START:
+          if (log_attrs[i].is_sentence_start)
+            {
+              g_set_error (error,
+                           PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_SENTENCE,
+                           "char %d: Unexpected sentence start", i);
+              return FALSE;
+            }
+          if (log_attrs[i].is_sentence_end)
+            state = AFTER_END;
+          break;
+
+        default:
+          g_assert_not_reached ();
+        }
+    }
+
+  return TRUE;
+}
+
+static gboolean
+check_space_invariants (const char          *text,
+                        int                  length,
+                        const PangoLogAttr  *log_attrs,
+                        int                  attrs_len,
+                        GError             **error)
+{
+  for (int i = 0; i < attrs_len; i++)
+    {
+      if (log_attrs[i].is_expandable_space && !log_attrs[i].is_white)
+        {
+          g_set_error (error,
+                       PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_SPACE,
+                       "char %d: Expandable space must be space", i);
+          return FALSE;
+        }
+    }
+
+  return TRUE;
+}
+
 /* }}} */
 /* {{{ Public API */
 
@@ -1927,6 +2331,72 @@ pango_get_log_attrs (const char    *text,
                attrs_len);
 }
 
- /* }}} */
+/**
+ * pango_validate_log_attrs:
+ * @text: text to which @log_attrs belong
+ * @length: length of @text
+ * @log_attrs: `PangoLogAttr` array to validate
+ * @attrs_len: length of @log_attrs
+ *
+ * Apply sanity checks to @log_attrs.
+ *
+ * This function checks some conditions that Pango
+ * relies on. It is not guaranteed to be an exhaustive
+ * validity test. Currentlty, it checks that
+ *
+ * - There's no break before the first char
+ * - Mandatory breaks are line breaks
+ * - Line breaks are char breaks
+ * - Lines aren't broken between \\r and \\n
+ * - Lines aren't broken before a space (unless the break
+ *   is mandatory, or the space precedes a combining mark)
+ * - Lines aren't broken between two open punctuation
+ *   or between two close punctuation characters
+ * - Lines aren't broken between a letter and a quotation mark
+ * - Word starts and ends alternate
+ * - Sentence starts and ends alternate
+ * - Expandable spaces are spaces
+ *
+ * Returns: %TRUE if @log_attrs are valid
+ *
+ * Since: 1.50
+ */
+gboolean
+pango_validate_log_attrs (const char          *text,
+                          int                  length,
+                          const PangoLogAttr  *log_attrs,
+                          int                  attrs_len,
+                          GError             **error)
+{
+  int n_chars;
+
+  n_chars = g_utf8_strlen (text, length);
+  if (attrs_len != n_chars + 1)
+    {
+      g_set_error_literal (error,
+                           PANGO_VALIDATE_ERROR, PANGO_VALIDATE_ERROR_FAILED,
+                           "Array has wrong length");
+      return FALSE;
+    }
+
+  if (!check_line_invariants (text, length, log_attrs, attrs_len, error))
+    return FALSE;
+
+  if (!check_grapheme_invariants (text, length, log_attrs, attrs_len, error))
+    return FALSE;
+
+  if (!check_word_invariants (text, length, log_attrs, attrs_len, error))
+    return FALSE;
+
+  if (!check_sentence_invariants (text, length, log_attrs, attrs_len, error))
+    return FALSE;
+
+  if (!check_space_invariants (text, length, log_attrs, attrs_len, error))
+    return FALSE;
+
+  return TRUE;
+}
+
+/* }}} */
 
 /* vim:set foldmethod=marker expandtab: */
diff --git a/pango/pango-break.h b/pango/pango-break.h
index a8e6c5b9..91090c70 100644
--- a/pango/pango-break.h
+++ b/pango/pango-break.h
@@ -131,6 +131,28 @@ void                    pango_attr_break        (const char    *text,
                                                  PangoLogAttr  *attrs,
                                                  int            attrs_len);
 
+#define PANGO_VALIDATE_ERROR (pango_validate_error_quark ())
+
+typedef enum
+{
+  PANGO_VALIDATE_ERROR_FAILED,
+  PANGO_VALIDATE_ERROR_BREAK,
+  PANGO_VALIDATE_ERROR_GRAPHEME,
+  PANGO_VALIDATE_ERROR_WORD,
+  PANGO_VALIDATE_ERROR_SENTENCE,
+  PANGO_VALIDATE_ERROR_SPACE
+} PangoValidateError;
+
+PANGO_AVAILABLE_IN_1_50
+GQuark                 pango_validate_error_quark (void);
+
+PANGO_AVAILABLE_IN_1_50
+gboolean               pango_validate_log_attrs (const char          *text,
+                                                 int                  length,
+                                                 const PangoLogAttr  *log_attrs,
+                                                 int                  attrs_len,
+                                                 GError             **error);
+
 G_END_DECLS
 
 #endif /* __PANGO_BREAK_H__ */


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]