[pango/break-tailoring: 19/19] Add segmentation attributes
- From: Matthias Clasen <matthiasc src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [pango/break-tailoring: 19/19] Add segmentation attributes
- Date: Tue, 24 Aug 2021 01:29:50 +0000 (UTC)
commit 84cd9a305af6e6d91e83320f12c48e0f281b7cd4
Author: Matthias Clasen <mclasen redhat com>
Date: Sat Aug 21 21:01:29 2021 -0400
Add segmentation attributes
Add attributes that let us override word and
sentence boundaries.
docs/pango_markup.md | 6 ++
pango/break.c | 175 +++++++++++++++++++++++++++++++++++++++++++++++
pango/pango-attributes.c | 56 +++++++++++++++
pango/pango-attributes.h | 9 +++
pango/pango-layout.c | 2 +
pango/pango-markup.c | 21 ++++++
tests/test-common.c | 2 +
tests/testattributes.c | 6 +-
8 files changed, 276 insertions(+), 1 deletion(-)
---
diff --git a/docs/pango_markup.md b/docs/pango_markup.md
index 6c421795..9187c1a9 100644
--- a/docs/pango_markup.md
+++ b/docs/pango_markup.md
@@ -209,6 +209,12 @@ text_transform
'none', 'lowercase', 'uppercase' or 'capitalize'. Support for text transformation
was added in Pango 1.50.
+segment
+: Overrides word or sentence boundaries. The value can be 'word' or 'sentence',
+ to indicate that the span should be treated as a single word or sentence.
+ Overlapping segments will be split to allow this.
+ Available since Pango 1.50.
+
## Convenience Tags
`<b>`
diff --git a/pango/break.c b/pango/break.c
index 053fb329..8ecb34ef 100644
--- a/pango/break.c
+++ b/pango/break.c
@@ -1634,11 +1634,15 @@ break_attrs (const char *text,
{
PangoAttrList allow_breaks;
PangoAttrList line_breaks;
+ PangoAttrList words;
+ PangoAttrList sentences;
GSList *l;
gboolean tailored = FALSE;
_pango_attr_list_init (&allow_breaks);
_pango_attr_list_init (&line_breaks);
+ _pango_attr_list_init (&words);
+ _pango_attr_list_init (&sentences);
for (l = attributes; l; l = l->next)
{
@@ -1659,6 +1663,10 @@ break_attrs (const char *text,
pango_attr_list_insert (&allow_breaks, pango_attribute_copy (attr));
else if (attr->klass->type == PANGO_ATTR_LINE_BREAK)
pango_attr_list_insert (&line_breaks, pango_attribute_copy (attr));
+ else if (attr->klass->type == PANGO_ATTR_WORD)
+ pango_attr_list_insert (&words, pango_attribute_copy (attr));
+ else if (attr->klass->type == PANGO_ATTR_SENTENCE)
+ pango_attr_list_insert (&sentences, pango_attribute_copy (attr));
}
if (_pango_attr_list_has_attributes (&allow_breaks))
@@ -1756,8 +1764,175 @@ break_attrs (const char *text,
_pango_attr_iterator_destroy (&iter);
}
+ if (_pango_attr_list_has_attributes (&words))
+ {
+ PangoAttrIterator iter;
+
+ _pango_attr_list_get_iterator (&words, &iter);
+ do
+ {
+ const PangoAttribute *attr = pango_attr_iterator_get (&iter, PANGO_ATTR_WORD);
+ int start, end;
+ int start_pos, end_pos;
+ int pos;
+
+ if (!attr)
+ continue;
+
+ start = attr->start_index;
+ end = attr->end_index;
+ if (start < offset)
+ start_pos = 0;
+ else
+ start_pos = g_utf8_pointer_to_offset (text, text + start - offset);
+ if (end >= offset + length)
+ end_pos = log_attrs_len;
+ else
+ end_pos = g_utf8_pointer_to_offset (text, text + end - offset);
+
+ if ((start >= offset && !log_attrs[start_pos].is_cursor_position) ||
+ (end < offset + length && !log_attrs[end_pos].is_cursor_position))
+ {
+ g_warning ("Can't place word boundary on non-grapheme break");
+ continue;
+ }
+
+ for (pos = start_pos + 1; pos < end_pos; pos++)
+ {
+ log_attrs[pos].is_word_start = FALSE;
+ log_attrs[pos].is_word_end = FALSE;
+ log_attrs[pos].is_word_boundary = FALSE;
+ log_attrs[pos].is_sentence_start = FALSE;
+ log_attrs[pos].is_sentence_end = FALSE;
+ log_attrs[pos].is_sentence_boundary = FALSE;
+
+ tailored = TRUE;
+ }
+ if (start >= offset)
+ {
+ gboolean in_word = FALSE;
+ for (pos = start - 1; pos >= offset; pos--)
+ {
+ if (log_attrs[pos].is_word_end)
+ break;
+ if (log_attrs[pos].is_word_start)
+ {
+ in_word = TRUE;
+ break;
+ }
+ }
+ log_attrs[start_pos].is_word_start = TRUE;
+ log_attrs[start_pos].is_word_end = in_word;
+ log_attrs[start_pos].is_word_boundary = TRUE;
+ }
+ if (end < offset + length)
+ {
+ gboolean in_word = FALSE;
+ for (pos = end + 1; pos < offset + length; pos++)
+ {
+ if (log_attrs[pos].is_word_start)
+ break;
+ if (log_attrs[pos].is_word_end)
+ {
+ in_word = TRUE;
+ break;
+ }
+ }
+ log_attrs[end_pos].is_word_start = in_word;
+ log_attrs[end_pos].is_word_end = TRUE;
+ log_attrs[end_pos].is_word_boundary = TRUE;
+ }
+ }
+ while (pango_attr_iterator_next (&iter));
+
+ _pango_attr_iterator_destroy (&iter);
+ }
+
+ if (_pango_attr_list_has_attributes (&sentences))
+ {
+ PangoAttrIterator iter;
+
+ _pango_attr_list_get_iterator (&sentences, &iter);
+ do
+ {
+ const PangoAttribute *attr = pango_attr_iterator_get (&iter, PANGO_ATTR_SENTENCE);
+ int start, end;
+ int start_pos, end_pos;
+ int pos;
+
+ if (!attr)
+ continue;
+
+ start = attr->start_index;
+ end = attr->end_index;
+ if (start < offset)
+ start_pos = 0;
+ else
+ start_pos = g_utf8_pointer_to_offset (text, text + start - offset);
+ if (end >= offset + length)
+ end_pos = log_attrs_len;
+ else
+ end_pos = g_utf8_pointer_to_offset (text, text + end - offset);
+
+ if ((start >= offset && !log_attrs[start_pos].is_word_boundary) ||
+ (end < offset + length && !log_attrs[end_pos].is_word_boundary))
+ {
+ g_warning ("Can't place sentence boundary on non-word boundary");
+ continue;
+ }
+
+ for (pos = start_pos + 1; pos < end_pos; pos++)
+ {
+ log_attrs[pos].is_sentence_start = FALSE;
+ log_attrs[pos].is_sentence_end = FALSE;
+ log_attrs[pos].is_sentence_boundary = FALSE;
+
+ tailored = TRUE;
+ }
+ if (start >= offset)
+ {
+ gboolean in_sentence = FALSE;
+ for (pos = start - 1; pos >= offset; pos--)
+ {
+ if (log_attrs[pos].is_sentence_end)
+ break;
+ if (log_attrs[pos].is_sentence_start)
+ {
+ in_sentence = TRUE;
+ break;
+ }
+ }
+ log_attrs[start_pos].is_sentence_start = TRUE;
+ log_attrs[start_pos].is_sentence_end = in_sentence;
+ log_attrs[start_pos].is_sentence_boundary = TRUE;
+ }
+ if (end < offset + length)
+ {
+ gboolean in_sentence = FALSE;
+ for (pos = end + 1; pos < offset + length; pos++)
+ {
+ if (log_attrs[pos].is_sentence_start)
+ break;
+ if (log_attrs[pos].is_sentence_end)
+ {
+ in_sentence = TRUE;
+ break;
+ }
+ }
+ log_attrs[end_pos].is_sentence_start = in_sentence;
+ log_attrs[end_pos].is_sentence_end = TRUE;
+ log_attrs[end_pos].is_sentence_boundary = TRUE;
+ }
+ }
+ while (pango_attr_iterator_next (&iter));
+
+ _pango_attr_iterator_destroy (&iter);
+ }
+
_pango_attr_list_destroy (&allow_breaks);
_pango_attr_list_destroy (&line_breaks);
+ _pango_attr_list_destroy (&words);
+ _pango_attr_list_destroy (&sentences);
return tailored;
}
diff --git a/pango/pango-attributes.c b/pango/pango-attributes.c
index b2dce858..2831c4fd 100644
--- a/pango/pango-attributes.c
+++ b/pango/pango-attributes.c
@@ -1391,6 +1391,60 @@ pango_attr_line_break_new (PangoLineBreak before,
return pango_attr_int_new (&klass, before | (after << 16));
}
+/**
+ * pango_attr_word_new:
+ *
+ * Marks the range of the attribute as a single word.
+ *
+ * Note that this may require adjustments to word and
+ * sentence classification around the range.
+ *
+ * Return value: (transfer full): the newly allocated
+ * `PangoAttribute`, which should be freed with
+ * [method@Pango.Attribute.destroy]
+ *
+ * Since: 1.50
+ */
+PangoAttribute *
+pango_attr_word_new (void)
+{
+ static const PangoAttrClass klass = {
+ PANGO_ATTR_WORD,
+ pango_attr_int_copy,
+ pango_attr_int_destroy,
+ pango_attr_int_equal,
+ };
+
+ return pango_attr_int_new (&klass, 0);
+}
+
+/**
+ * pango_attr_sentence_new:
+ *
+ * Marks the range of the attribute as a single sentence.
+ *
+ * Note that this may require adjustments to word and
+ * sentence classification around the range.
+ *
+ * Return value: (transfer full): the newly allocated
+ * `PangoAttribute`, which should be freed with
+ * [method@Pango.Attribute.destroy]
+ *
+ * Since: 1.50
+ */
+PangoAttribute *
+pango_attr_sentence_new (void)
+{
+ static const PangoAttrClass klass = {
+ PANGO_ATTR_SENTENCE,
+ pango_attr_int_copy,
+ pango_attr_int_destroy,
+ pango_attr_int_equal,
+ };
+
+ return pango_attr_int_new (&klass, 0);
+}
+
/**
* pango_attr_overline_new:
* @overline: the overline style
@@ -1568,6 +1622,8 @@ pango_attribute_as_int (PangoAttribute *attr)
case PANGO_ATTR_OVERLINE:
case PANGO_ATTR_ABSOLUTE_LINE_HEIGHT:
case PANGO_ATTR_TEXT_TRANSFORM:
+ case PANGO_ATTR_WORD:
+ case PANGO_ATTR_SENTENCE:
return (PangoAttrInt *)attr;
default:
diff --git a/pango/pango-attributes.h b/pango/pango-attributes.h
index 9180e960..8e99e5f0 100644
--- a/pango/pango-attributes.h
+++ b/pango/pango-attributes.h
@@ -79,6 +79,8 @@ typedef struct _PangoAttrFontFeatures PangoAttrFontFeatures;
* @PANGO_ATTR_ABSOLUTE_LINE_HEIGHT: line height ([struct@Pango.AttrInt]). Since: 1.50
* @PANGO_ATTR_LINE_BREAK: override line breaks at the ends of the range ([struct@Pango.AttrInt]). Since 1.50
* @PANGO_ATTR_ALLOW_LINE_BREAKS: what algorithmically determined line breaks to allow
([struct@Pango.AttrInt]). Since 1.50
+ * @PANGO_ATTR_WORD: override segmentation to classify the range of the attribute as a single word
([struct@Pango.AttrInt]). Since 1.50
+ * @PANGO_ATTR_SENTENCE: override segmentation to classify the range of the attribute as a single sentence
([struct@Pango.AttrInt]). Since 1.50
*
* The `PangoAttrType` distinguishes between different types of attributes.
*
@@ -125,6 +127,8 @@ typedef enum
PANGO_ATTR_TEXT_TRANSFORM, /* PangoAttrInt */
PANGO_ATTR_LINE_BREAK, /* PangoAttrInt */
PANGO_ATTR_ALLOW_LINE_BREAKS, /* PangoAttrInt */
+ PANGO_ATTR_WORD, /* PangoAttrInt */
+ PANGO_ATTR_SENTENCE, /* PangoAttrInt */
} PangoAttrType;
/**
@@ -570,6 +574,11 @@ PANGO_AVAILABLE_IN_1_50
PangoAttribute * pango_attr_line_break_new (PangoLineBreak before,
PangoLineBreak after);
+PANGO_AVAILABLE_IN_1_50
+PangoAttribute * pango_attr_word_new (void);
+PANGO_AVAILABLE_IN_1_50
+PangoAttribute * pango_attr_sentence_new (void);
+
PANGO_AVAILABLE_IN_1_44
PangoAttribute * pango_attr_insert_hyphens_new (gboolean
insert_hyphens);
PANGO_AVAILABLE_IN_1_46
diff --git a/pango/pango-layout.c b/pango/pango-layout.c
index f9005b72..14a6066d 100644
--- a/pango/pango-layout.c
+++ b/pango/pango-layout.c
@@ -4326,6 +4326,8 @@ affects_break_or_shape (PangoAttribute *attr,
case PANGO_ATTR_ALLOW_BREAKS:
case PANGO_ATTR_ALLOW_LINE_BREAKS:
case PANGO_ATTR_LINE_BREAK:
+ case PANGO_ATTR_WORD:
+ case PANGO_ATTR_SENTENCE:
/* Affects shaping */
case PANGO_ATTR_INSERT_HYPHENS:
case PANGO_ATTR_FONT_FEATURES:
diff --git a/pango/pango-markup.c b/pango/pango-markup.c
index ab0a9487..d62d95ab 100644
--- a/pango/pango-markup.c
+++ b/pango/pango-markup.c
@@ -1232,6 +1232,7 @@ span_parse_func (MarkupData *md G_GNUC_UNUSED,
const char *text_transform = NULL;
const char *break_before = NULL;
const char *break_after = NULL;
+ const char *segment = NULL;
g_markup_parse_context_get_position (context,
&line_number, &char_number);
@@ -1301,6 +1302,7 @@ span_parse_func (MarkupData *md G_GNUC_UNUSED,
CHECK_ATTRIBUTE (strikethrough);
CHECK_ATTRIBUTE (strikethrough_color);
CHECK_ATTRIBUTE (style);
+ CHECK_ATTRIBUTE (segment);
break;
case 't':
CHECK_ATTRIBUTE (text_transform);
@@ -1760,6 +1762,25 @@ span_parse_func (MarkupData *md G_GNUC_UNUSED,
add_attribute (tag, pango_attr_insert_hyphens_new (b));
}
+ if (G_UNLIKELY (segment))
+ {
+ if (strcmp (segment, "word") == 0)
+ add_attribute (tag, pango_attr_word_new ());
+ else if (strcmp (segment, "sentence") == 0)
+ add_attribute (tag, pango_attr_sentence_new ());
+ else
+ {
+ g_set_error (error,
+ G_MARKUP_ERROR,
+ G_MARKUP_ERROR_INVALID_CONTENT,
+ _("Value of 'segment' attribute on <span> tag on line %d "
+ "could not be parsed; should be one of 'word' or "
+ "'sentence', not '%s'"),
+ line_number, segment);
+ goto error;
+ }
+ }
+
return TRUE;
error:
diff --git a/tests/test-common.c b/tests/test-common.c
index c93197eb..1a9c1154 100644
--- a/tests/test-common.c
+++ b/tests/test-common.c
@@ -145,6 +145,8 @@ print_attribute (PangoAttribute *attr, GString *string)
case PANGO_ATTR_SHOW:
case PANGO_ATTR_TEXT_TRANSFORM:
case PANGO_ATTR_ABSOLUTE_LINE_HEIGHT:
+ case PANGO_ATTR_WORD:
+ case PANGO_ATTR_SENTENCE:
g_string_append_printf (string, "%d", ((PangoAttrInt *)attr)->value);
break;
case PANGO_ATTR_LINE_BREAK:
diff --git a/tests/testattributes.c b/tests/testattributes.c
index d491a380..fd05aea0 100644
--- a/tests/testattributes.c
+++ b/tests/testattributes.c
@@ -77,6 +77,8 @@ test_attributes_basic (void)
test_copy (pango_attr_line_height_new_absolute (3000));
test_copy (pango_attr_line_break_new (PANGO_LINE_BREAK_CHAR, PANGO_LINE_BREAK_MANDATORY));
test_copy (pango_attr_allow_line_breaks_new (PANGO_LINE_BREAK_LINE));
+ test_copy (pango_attr_word_new ());
+ test_copy (pango_attr_sentence_new ());
}
static void
@@ -127,7 +129,7 @@ test_binding (PangoAttribute *attr)
INVALID, LANGUAGE, STRING, INT, INT, INT, INT, SIZE, FONT_DESC, COLOR,
COLOR, INT, INT, INT, SHAPE, FLOAT, INT, INT, COLOR, COLOR, SIZE,
INT, INT, FONT_FEATURES, INT, INT, INT, INT, INT, INT, COLOR, FLOAT,
- INT, INT, INT, INT
+ INT, INT, INT, INT, INT, INT
};
switch (attr_base[attr->klass->type])
@@ -211,6 +213,8 @@ test_binding_helpers (void)
test_binding (pango_attr_line_height_new_absolute (3000));
test_binding (pango_attr_line_break_new (PANGO_LINE_BREAK_CHAR, PANGO_LINE_BREAK_MANDATORY));
test_binding (pango_attr_allow_line_breaks_new (PANGO_LINE_BREAK_LINE));
+ test_binding (pango_attr_word_new ());
+ test_binding (pango_attr_sentence_new ());
}
static void
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]