[glib: 1/2] uri: add GUriParamsIter
- From: Philip Withnall <pwithnall src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [glib: 1/2] uri: add GUriParamsIter
- Date: Wed, 5 Aug 2020 16:07:43 +0000 (UTC)
commit 5767eef895357ec9e4212d0c3657991531f55b5f
Author: Marc-André Lureau <marcandre lureau redhat com>
Date: Sun Jul 12 22:26:46 2020 +0400
uri: add GUriParamsIter
See also:
https://gitlab.gnome.org/GNOME/glib/-/merge_requests/1328#note_863735
docs/reference/glib/glib-sections.txt | 3 +
glib/guri.c | 227 +++++++++++++++++++++++++++-------
glib/guri.h | 23 ++++
glib/tests/uri.c | 158 +++++++++++++++++------
4 files changed, 325 insertions(+), 86 deletions(-)
---
diff --git a/docs/reference/glib/glib-sections.txt b/docs/reference/glib/glib-sections.txt
index 3cefbe010..d38fccf40 100644
--- a/docs/reference/glib/glib-sections.txt
+++ b/docs/reference/glib/glib-sections.txt
@@ -3373,7 +3373,10 @@ g_uri_get_query
g_uri_get_fragment
g_uri_get_flags
<SUBSECTION>
+GUriParamsIter
GUriParamsFlags
+g_uri_params_iter_init
+g_uri_params_iter_next
g_uri_parse_params
<SUBSECTION>
G_URI_RESERVED_CHARS_ALLOWED_IN_PATH
diff --git a/glib/guri.c b/glib/guri.c
index 9dfebcf07..18028e3cc 100644
--- a/glib/guri.c
+++ b/glib/guri.c
@@ -1757,6 +1757,172 @@ str_ascii_case_equal (gconstpointer v1,
return g_ascii_strcasecmp (string1, string2) == 0;
}
+/**
+ * GUriParamsIter:
+ *
+ * Many URI schemes include one or more attribute/value pairs as part of the URI
+ * value (for example "scheme://server/path?query=string&is=there" has two
+ * attributes "query=string" and "is=there" in its query part).
+ *
+ * A #GUriParamsIter structure represents an iterator that can be used to
+ * iterate over the attribute/value pairs of a URI query string. #GUriParamsIter
+ * structures are typically allocated on the stack and then initialized with
+ * g_uri_params_iter_init(). See the documentation for g_uri_params_iter_init()
+ * for a usage example.
+ *
+ * Since: 2.66
+ */
+typedef struct
+{
+ GUriParamsFlags flags;
+ const gchar *attr;
+ const gchar *end;
+ guint8 sep_table[256]; /* 1 = index is a separator; 0 otherwise */
+} RealIter;
+
+G_STATIC_ASSERT (sizeof (GUriParamsIter) == sizeof (RealIter));
+G_STATIC_ASSERT (G_ALIGNOF (GUriParamsIter) >= G_ALIGNOF (RealIter));
+
+/**
+ * g_uri_params_iter_init:
+ * @iter: an uninitalized #GUriParamsIter
+ * @params: a `%`-encoded string containing "attribute=value"
+ * parameters
+ * @length: the length of @params, or -1 if it is NUL-terminated
+ * @separators: the separator byte character set between parameters. (usually
+ * "&", but sometimes ";" or both "&;"). Note that this function works on
+ * bytes not characters, so it can't be used to delimit UTF-8 strings for
+ * anything but ASCII characters. You may pass an empty set, in which case
+ * no splitting will occur.
+ * @flags: flags to modify the way the parameters are handled.
+ *
+ * Initializes an attribute/value pair iterator. The iterator keeps references
+ * over the @params and @separators arguments, those variables must thus outlive
+ * the iterator and not be modified during the iteration.
+ *
+ * |[<!-- language="C" -->
+ * GUriParamsIter iter;
+ * GError *error = NULL;
+ * gchar *attr, *value;
+ *
+ * g_uri_params_iter_init (&iter, "foo=bar&baz=bar", -1, "&", G_URI_PARAMS_NONE);
+ * while (g_uri_params_iter_next (&iter, &attr, &value, &error))
+ * {
+ * // do something with attr and value
+ * g_free (attr);
+ * g_free (value);
+ * }
+ * if (error)
+ * // handle parsing error
+ * ]|
+ *
+ * Since: 2.66
+ */
+void
+g_uri_params_iter_init (GUriParamsIter *iter,
+ const gchar *params,
+ gssize length,
+ const gchar *separators,
+ GUriParamsFlags flags)
+{
+ RealIter *ri = (RealIter *)iter;
+ const gchar *s;
+
+ g_return_if_fail (iter != NULL);
+ g_return_if_fail (length == 0 || params != NULL);
+ g_return_if_fail (length >= -1);
+ g_return_if_fail (separators != NULL);
+
+ ri->flags = flags;
+
+ if (length == -1)
+ ri->end = params + strlen (params);
+ else
+ ri->end = params + length;
+
+ memset (ri->sep_table, FALSE, sizeof (ri->sep_table));
+ for (s = separators; *s != '\0'; ++s)
+ ri->sep_table[*(guchar *)s] = TRUE;
+
+ ri->attr = params;
+}
+
+/**
+ * g_uri_params_iter_next:
+ * @iter: an initialized #GUriParamsIter
+ * @attribute: (out) (nullable) (optional) (transfer full): on return, contains
+ * the attribute, or %NULL.
+ * @value: (out) (nullable) (optional) (transfer full): on return, contains
+ * the value, or %NULL.
+ * @error: #GError for error reporting, or %NULL to ignore.
+ *
+ * Advances @iter and retrieves the next attribute/value. If %FALSE is returned,
+ * @attribute and @value are not set, and the iterator becomes invalid. Note
+ * that the same attribute value may be returned multiple times, since URIs
+ * allow repeated attributes.
+ *
+ * Returns: %FALSE if the end of the parameters has been reached or an error was
+ * encountered.
+ *
+ * Since: 2.66
+ */
+gboolean
+g_uri_params_iter_next (GUriParamsIter *iter,
+ gchar **attribute,
+ gchar **value,
+ GError **error)
+{
+ RealIter *ri = (RealIter *)iter;
+ const gchar *attr_end, *val, *val_end;
+ gchar *decoded_attr, *decoded_value;
+ gboolean www_form = ri->flags & G_URI_PARAMS_WWW_FORM;
+
+ g_return_val_if_fail (iter != NULL, FALSE);
+ g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
+
+ if (ri->attr >= ri->end)
+ return FALSE;
+
+ /* Check if each character in @attr is a separator, by indexing by the
+ * character value into the @sep_table, which has value 1 stored at an
+ * index if that index is a separator. */
+ for (val_end = ri->attr; val_end < ri->end; val_end++)
+ if (ri->sep_table[*(guchar *)val_end])
+ break;
+
+ attr_end = memchr (ri->attr, '=', val_end - ri->attr);
+ if (!attr_end)
+ {
+ g_set_error_literal (error, G_URI_ERROR, G_URI_ERROR_MISC,
+ _("Missing '=' and parameter value"));
+ return FALSE;
+ }
+ if (!uri_decode (&decoded_attr, NULL, ri->attr, attr_end - ri->attr,
+ www_form, G_URI_FLAGS_NONE, G_URI_ERROR_MISC, error))
+ {
+ return FALSE;
+ }
+
+ val = attr_end + 1;
+ if (!uri_decode (&decoded_value, NULL, val, val_end - val,
+ www_form, G_URI_FLAGS_NONE, G_URI_ERROR_MISC, error))
+ {
+ g_free (decoded_attr);
+ return FALSE;
+ }
+
+ if (attribute)
+ *attribute = g_steal_pointer (&decoded_attr);
+ if (value)
+ *value = g_steal_pointer (&decoded_value);
+
+ g_free (decoded_attr);
+ g_free (decoded_value);
+
+ ri->attr = val_end + 1;
+ return TRUE;
+}
+
/**
* g_uri_parse_params:
* @params: a `%`-encoded string containing "attribute=value"
@@ -1771,7 +1937,10 @@ str_ascii_case_equal (gconstpointer v1,
* @error: #GError for error reporting, or %NULL to ignore.
*
* Many URI schemes include one or more attribute/value pairs as part of the URI
- * value. This method can be used to parse them into a hash table.
+ * value. This method can be used to parse them into a hash table. When an
+ * attribute has multiple occurences, the last value is the final returned
+ * value. If you need to handle repeated attributes differently, use
+ * #GUriParamsIter.
*
* The @params string is assumed to still be `%`-encoded, but the returned
* values will be fully decoded. (Thus it is possible that the returned values
@@ -1797,10 +1966,9 @@ g_uri_parse_params (const gchar *params,
GError **error)
{
GHashTable *hash;
- const gchar *end, *attr, *attr_end, *value, *value_end, *s;
- gchar *decoded_attr, *decoded_value;
- guint8 sep_table[256]; /* 1 = index is a separator; 0 otherwise */
- gboolean www_form = flags & G_URI_PARAMS_WWW_FORM;
+ GUriParamsIter iter;
+ gchar *attribute, *value;
+ GError *err = NULL;
g_return_val_if_fail (length == 0 || params != NULL, NULL);
g_return_val_if_fail (length >= -1, NULL);
@@ -1819,51 +1987,16 @@ g_uri_parse_params (const gchar *params,
g_free, g_free);
}
- if (length == -1)
- end = params + strlen (params);
- else
- end = params + length;
+ g_uri_params_iter_init (&iter, params, length, separators, flags);
- memset (sep_table, FALSE, sizeof (sep_table));
- for (s = separators; *s != '\0'; ++s)
- sep_table[*(guchar *)s] = TRUE;
+ while (g_uri_params_iter_next (&iter, &attribute, &value, &err))
+ g_hash_table_insert (hash, attribute, value);
- attr = params;
- while (attr < end)
+ if (err)
{
- /* Check if each character in @attr is a separator, by indexing by the
- * character value into the @sep_table, which has value 1 stored at an
- * index if that index is a separator. */
- for (value_end = attr; value_end < end; value_end++)
- if (sep_table[*(guchar *)value_end])
- break;
-
- attr_end = memchr (attr, '=', value_end - attr);
- if (!attr_end)
- {
- g_hash_table_destroy (hash);
- g_set_error_literal (error, G_URI_ERROR, G_URI_ERROR_MISC,
- _("Missing '=' and parameter value"));
- return NULL;
- }
- if (!uri_decode (&decoded_attr, NULL, attr, attr_end - attr,
- www_form, G_URI_FLAGS_NONE, G_URI_ERROR_MISC, error))
- {
- g_hash_table_destroy (hash);
- return NULL;
- }
-
- value = attr_end + 1;
- if (!uri_decode (&decoded_value, NULL, value, value_end - value,
- www_form, G_URI_FLAGS_NONE, G_URI_ERROR_MISC, error))
- {
- g_free (decoded_attr);
- g_hash_table_destroy (hash);
- return NULL;
- }
-
- g_hash_table_insert (hash, decoded_attr, decoded_value);
- attr = value_end + 1;
+ g_propagate_error (error, g_steal_pointer (&err));
+ g_hash_table_destroy (hash);
+ return NULL;
}
return hash;
diff --git a/glib/guri.h b/glib/guri.h
index da0bc9bc4..b3c7a9015 100644
--- a/glib/guri.h
+++ b/glib/guri.h
@@ -254,6 +254,29 @@ GHashTable *g_uri_parse_params (const gchar *params,
GUriParamsFlags flags,
GError **error);
+typedef struct _GUriParamsIter GUriParamsIter;
+
+struct _GUriParamsIter
+{
+ /*< private >*/
+ gint dummy0;
+ gpointer dummy1;
+ gpointer dummy2;
+ guint8 dummy3[256];
+};
+
+GLIB_AVAILABLE_IN_2_66
+void g_uri_params_iter_init (GUriParamsIter *iter,
+ const gchar *params,
+ gssize length,
+ const gchar *separators,
+ GUriParamsFlags flags);
+
+GLIB_AVAILABLE_IN_2_66
+gboolean g_uri_params_iter_next (GUriParamsIter *iter,
+ gchar **attribute,
+ gchar **value,
+ GError **error);
/**
* G_URI_ERROR:
*
diff --git a/glib/tests/uri.c b/glib/tests/uri.c
index 99845c4d0..72c6dbbcf 100644
--- a/glib/tests/uri.c
+++ b/glib/tests/uri.c
@@ -1341,50 +1341,128 @@ test_uri_is_valid (void)
g_clear_error (&error);
}
+static const struct
+{
+ /* Inputs */
+ const gchar *uri;
+ gchar *separators;
+ GUriParamsFlags flags;
+ /* Outputs */
+ /* key, value, key, value, …, limited to length 2*expected_n_params */
+ gssize expected_n_iter; /* -1 => error expected */
+ const gchar *expected_iter_key_values[6];
+ gssize expected_n_params; /* -1 => error expected */
+ const gchar *expected_param_key_values[6];
+} params_tests[] =
+ {
+ { "p1=foo&p2=bar;p3=baz", "&;", G_URI_PARAMS_NONE,
+ 3, { "p1", "foo", "p2", "bar", "p3", "baz" },
+ 3, { "p1", "foo", "p2", "bar", "p3", "baz" }},
+ { "p1=foo&p2=bar", "", G_URI_PARAMS_NONE,
+ 1, { "p1", "foo&p2=bar" },
+ 1, { "p1", "foo&p2=bar" }},
+ { "p1=foo&&P1=bar", "&", G_URI_PARAMS_NONE,
+ 1, { "p1", "foo" },
+ -1, { NULL, }},
+ { "%00=foo", "&", G_URI_PARAMS_NONE,
+ 0, { NULL, },
+ -1, { NULL, }},
+ { "p1=%00", "&", G_URI_PARAMS_NONE,
+ 0, { NULL, },
+ -1, { NULL, }},
+ { "p1=foo&p1=bar", "&", G_URI_PARAMS_NONE,
+ 2, { "p1", "foo", "p1", "bar" },
+ 1, { "p1", "bar", NULL, }},
+ { "p1=foo&P1=bar", "&", G_URI_PARAMS_CASE_INSENSITIVE,
+ 2, { "p1", "foo", "P1", "bar" },
+ 1, { "p1", "bar", NULL, }},
+ { "=%", "&", G_URI_PARAMS_NONE,
+ 1, { "", "%", NULL, },
+ 1, { "", "%", NULL, }},
+ { "=", "&", G_URI_PARAMS_NONE,
+ 1, { "", "", NULL, },
+ 1, { "", "", NULL, }},
+ { "foo", "&", G_URI_PARAMS_NONE,
+ 0, { NULL, },
+ -1, { NULL, }},
+ { "foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver", "&", G_URI_PARAMS_WWW_FORM,
+ 2, { "foo", "bar & baz", "saisons", "Été+hiver", NULL, },
+ 2, { "foo", "bar & baz", "saisons", "Été+hiver", NULL, }},
+ { "foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver", "&", G_URI_PARAMS_NONE,
+ 2, { "foo", "bar+&+baz", "saisons", "Été+hiver", NULL, },
+ 2, { "foo", "bar+&+baz", "saisons", "Été+hiver", NULL, }},
+ };
+
static void
-test_uri_parse_params (gconstpointer test_data)
+test_uri_iter_params (gconstpointer test_data)
{
GError *err = NULL;
gboolean use_nul_terminated = GPOINTER_TO_INT (test_data);
- const struct
+ gsize i, n;
+
+ for (i = 0; i < G_N_ELEMENTS (params_tests); i++)
{
- /* Inputs */
- const gchar *uri;
- gchar *separators;
- GUriParamsFlags flags;
- /* Outputs */
- gssize expected_n_params; /* -1 => error expected */
- /* key, value, key, value, …, limited to length 2*expected_n_params */
- const gchar *expected_param_key_values[6];
+ GUriParamsIter iter;
+ gchar *uri, *attr, *value;
+ gssize uri_len;
+
+ g_test_message ("URI %" G_GSIZE_FORMAT ": %s", i, params_tests[i].uri);
+
+ g_assert (params_tests[i].expected_n_params < 0 ||
+ params_tests[i].expected_n_params <= G_N_ELEMENTS
(params_tests[i].expected_param_key_values) / 2);
+
+ /* The tests get run twice: once with the length unspecified, using a
+ * nul-terminated string; and once with the length specified and a copy of
+ * the string with the trailing nul explicitly removed (to help catch
+ * buffer overflows). */
+ if (use_nul_terminated)
+ {
+ uri_len = -1;
+ uri = g_strdup (params_tests[i].uri);
+ }
+ else
+ {
+ uri_len = strlen (params_tests[i].uri); /* no trailing nul */
+ uri = g_memdup (params_tests[i].uri, uri_len);
+ }
+
+ n = 0;
+ g_uri_params_iter_init (&iter, params_tests[i].uri, -1, params_tests[i].separators,
params_tests[i].flags);
+ while (g_uri_params_iter_next (&iter, &attr, &value, &err))
+ {
+ g_assert_cmpstr (attr, ==, params_tests[i].expected_iter_key_values[n * 2]);
+ g_assert_cmpstr (value, ==, params_tests[i].expected_iter_key_values[n * 2 + 1]);
+ n++;
+ g_free (attr);
+ g_free (value);
+ }
+ g_assert_cmpint (n, ==, params_tests[i].expected_n_iter);
+ if (err)
+ {
+ g_assert_error (err, G_URI_ERROR, G_URI_ERROR_MISC);
+ g_clear_error (&err);
+ }
+ g_free (uri);
}
- tests[] =
- {
- { "p1=foo&p2=bar;p3=baz", "&;", G_URI_PARAMS_NONE, 3, { "p1", "foo", "p2", "bar", "p3", "baz" }},
- { "p1=foo&p2=bar", "", G_URI_PARAMS_NONE, 1, { "p1", "foo&p2=bar" }},
- { "p1=foo&&P1=bar", "&", G_URI_PARAMS_NONE, -1, { NULL, }},
- { "%00=foo", "&", G_URI_PARAMS_NONE, -1, { NULL, }},
- { "p1=%00", "&", G_URI_PARAMS_NONE, -1, { NULL, }},
- { "p1=foo&P1=bar", "&", G_URI_PARAMS_CASE_INSENSITIVE, 1, { "p1", "bar", NULL, }},
- { "=%", "&", G_URI_PARAMS_NONE, 1, { "", "%", NULL, }},
- { "=", "&", G_URI_PARAMS_NONE, 1, { "", "", NULL, }},
- { "foo", "&", G_URI_PARAMS_NONE, -1, { NULL, }},
- { "foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver", "&", G_URI_PARAMS_WWW_FORM,
- 2, { "foo", "bar & baz", "saisons", "Été+hiver", NULL, }},
- { "foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver", "&", G_URI_PARAMS_NONE,
- 2, { "foo", "bar+&+baz", "saisons", "Été+hiver", NULL, }},
- };
+}
+
+static void
+test_uri_parse_params (gconstpointer test_data)
+{
+ GError *err = NULL;
+ gboolean use_nul_terminated = GPOINTER_TO_INT (test_data);
gsize i;
- for (i = 0; i < G_N_ELEMENTS (tests); i++)
+ for (i = 0; i < G_N_ELEMENTS (params_tests); i++)
{
GHashTable *params;
gchar *uri = NULL;
gssize uri_len;
- g_test_message ("URI %" G_GSIZE_FORMAT ": %s", i, tests[i].uri);
+ g_test_message ("URI %" G_GSIZE_FORMAT ": %s", i, params_tests[i].uri);
- g_assert (tests[i].expected_n_params < 0 ||
- tests[i].expected_n_params <= G_N_ELEMENTS (tests[i].expected_param_key_values) / 2);
+ g_assert (params_tests[i].expected_n_params < 0 ||
+ params_tests[i].expected_n_params <= G_N_ELEMENTS
(params_tests[i].expected_param_key_values) / 2);
/* The tests get run twice: once with the length unspecified, using a
* nul-terminated string; and once with the length specified and a copy of
@@ -1393,17 +1471,17 @@ test_uri_parse_params (gconstpointer test_data)
if (use_nul_terminated)
{
uri_len = -1;
- uri = g_strdup (tests[i].uri);
+ uri = g_strdup (params_tests[i].uri);
}
else
{
- uri_len = strlen (tests[i].uri); /* no trailing nul */
- uri = g_memdup (tests[i].uri, uri_len);
+ uri_len = strlen (params_tests[i].uri); /* no trailing nul */
+ uri = g_memdup (params_tests[i].uri, uri_len);
}
- params = g_uri_parse_params (uri, uri_len, tests[i].separators, tests[i].flags, &err);
+ params = g_uri_parse_params (uri, uri_len, params_tests[i].separators, params_tests[i].flags, &err);
- if (tests[i].expected_n_params < 0)
+ if (params_tests[i].expected_n_params < 0)
{
g_assert_null (params);
g_assert_error (err, G_URI_ERROR, G_URI_ERROR_MISC);
@@ -1414,11 +1492,11 @@ test_uri_parse_params (gconstpointer test_data)
gsize j;
g_assert_no_error (err);
- g_assert_cmpint (g_hash_table_size (params), ==, tests[i].expected_n_params);
+ g_assert_cmpint (g_hash_table_size (params), ==, params_tests[i].expected_n_params);
- for (j = 0; j < tests[i].expected_n_params; j += 2)
- g_assert_cmpstr (g_hash_table_lookup (params, tests[i].expected_param_key_values[j]), ==,
- tests[i].expected_param_key_values[j + 1]);
+ for (j = 0; j < params_tests[i].expected_n_params; j += 2)
+ g_assert_cmpstr (g_hash_table_lookup (params, params_tests[i].expected_param_key_values[j]), ==,
+ params_tests[i].expected_param_key_values[j + 1]);
}
g_clear_pointer (¶ms, g_hash_table_unref);
@@ -1480,6 +1558,8 @@ main (int argc,
g_test_add_func ("/uri/is_valid", test_uri_is_valid);
g_test_add_func ("/uri/to-string", test_uri_to_string);
g_test_add_func ("/uri/join", test_uri_join);
+ g_test_add_data_func ("/uri/iter-params/nul-terminated", GINT_TO_POINTER (TRUE), test_uri_iter_params);
+ g_test_add_data_func ("/uri/iter-params/length", GINT_TO_POINTER (FALSE), test_uri_iter_params);
g_test_add_data_func ("/uri/parse-params/nul-terminated", GINT_TO_POINTER (TRUE), test_uri_parse_params);
g_test_add_data_func ("/uri/parse-params/length", GINT_TO_POINTER (FALSE), test_uri_parse_params);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]